searchsocket 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -5,12 +5,12 @@ var path = require('path');
5
5
  var jiti = require('jiti');
6
6
  var zod = require('zod');
7
7
  var child_process = require('child_process');
8
- var pLimit2 = require('p-limit');
9
8
  var crypto = require('crypto');
10
9
  var cheerio = require('cheerio');
11
10
  var matter = require('gray-matter');
12
- var fs4 = require('fs/promises');
13
11
  var fg = require('fast-glob');
12
+ var pLimit = require('p-limit');
13
+ var fs3 = require('fs/promises');
14
14
  var net = require('net');
15
15
  var zlib = require('zlib');
16
16
  var mcp_js = require('@modelcontextprotocol/sdk/server/mcp.js');
@@ -22,10 +22,10 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
22
22
 
23
23
  var fs__default = /*#__PURE__*/_interopDefault(fs);
24
24
  var path__default = /*#__PURE__*/_interopDefault(path);
25
- var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
26
25
  var matter__default = /*#__PURE__*/_interopDefault(matter);
27
- var fs4__default = /*#__PURE__*/_interopDefault(fs4);
28
26
  var fg__default = /*#__PURE__*/_interopDefault(fg);
27
+ var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
28
+ var fs3__default = /*#__PURE__*/_interopDefault(fs3);
29
29
  var net__default = /*#__PURE__*/_interopDefault(net);
30
30
 
31
31
  var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -2771,12 +2771,12 @@ var require_ChildNode = __commonJS({
2771
2771
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
2772
2772
  var Node2 = require_Node();
2773
2773
  var LinkedList = require_LinkedList();
2774
- var createDocumentFragmentFromArguments = function(document, args) {
2775
- var docFrag = document.createDocumentFragment();
2774
+ var createDocumentFragmentFromArguments = function(document2, args) {
2775
+ var docFrag = document2.createDocumentFragment();
2776
2776
  for (var i = 0; i < args.length; i++) {
2777
2777
  var argItem = args[i];
2778
2778
  var isNode = argItem instanceof Node2;
2779
- docFrag.appendChild(isNode ? argItem : document.createTextNode(String(argItem)));
2779
+ docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
2780
2780
  }
2781
2781
  return docFrag;
2782
2782
  };
@@ -2934,7 +2934,7 @@ var require_NamedNodeMap = __commonJS({
2934
2934
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
2935
2935
  var require_Element = __commonJS({
2936
2936
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
2937
- module.exports = Element;
2937
+ module.exports = Element2;
2938
2938
  var xml = require_xmlnames();
2939
2939
  var utils = require_utils();
2940
2940
  var NAMESPACE = utils.NAMESPACE;
@@ -2951,7 +2951,7 @@ var require_Element = __commonJS({
2951
2951
  var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
2952
2952
  var NamedNodeMap = require_NamedNodeMap();
2953
2953
  var uppercaseCache = /* @__PURE__ */ Object.create(null);
2954
- function Element(doc, localName, namespaceURI, prefix) {
2954
+ function Element2(doc, localName, namespaceURI, prefix) {
2955
2955
  ContainerNode.call(this);
2956
2956
  this.nodeType = Node2.ELEMENT_NODE;
2957
2957
  this.ownerDocument = doc;
@@ -2971,7 +2971,7 @@ var require_Element = __commonJS({
2971
2971
  recursiveGetText(node.childNodes[i], a);
2972
2972
  }
2973
2973
  }
2974
- Element.prototype = Object.create(ContainerNode.prototype, {
2974
+ Element2.prototype = Object.create(ContainerNode.prototype, {
2975
2975
  isHTML: { get: function isHTML() {
2976
2976
  return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
2977
2977
  } },
@@ -3041,7 +3041,7 @@ var require_Element = __commonJS({
3041
3041
  return NodeUtils.serializeOne(this, { nodeType: 0 });
3042
3042
  },
3043
3043
  set: function(v) {
3044
- var document = this.ownerDocument;
3044
+ var document2 = this.ownerDocument;
3045
3045
  var parent = this.parentNode;
3046
3046
  if (parent === null) {
3047
3047
  return;
@@ -3052,8 +3052,8 @@ var require_Element = __commonJS({
3052
3052
  if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
3053
3053
  parent = parent.ownerDocument.createElement("body");
3054
3054
  }
3055
- var parser = document.implementation.mozHTMLParser(
3056
- document._address,
3055
+ var parser = document2.implementation.mozHTMLParser(
3056
+ document2._address,
3057
3057
  parent
3058
3058
  );
3059
3059
  parser.parse(v === null ? "" : String(v), true);
@@ -3112,7 +3112,7 @@ var require_Element = __commonJS({
3112
3112
  default:
3113
3113
  utils.SyntaxError();
3114
3114
  }
3115
- if (!(context instanceof Element) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3115
+ if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3116
3116
  context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
3117
3117
  }
3118
3118
  var parser = this.ownerDocument.implementation.mozHTMLParser(
@@ -3720,10 +3720,10 @@ var require_Element = __commonJS({
3720
3720
  return nodes.item ? nodes : new NodeList(nodes);
3721
3721
  } }
3722
3722
  });
3723
- Object.defineProperties(Element.prototype, ChildNode);
3724
- Object.defineProperties(Element.prototype, NonDocumentTypeChildNode);
3723
+ Object.defineProperties(Element2.prototype, ChildNode);
3724
+ Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
3725
3725
  attributes.registerChangeHandler(
3726
- Element,
3726
+ Element2,
3727
3727
  "id",
3728
3728
  function(element, lname, oldval, newval) {
3729
3729
  if (element.rooted) {
@@ -3737,7 +3737,7 @@ var require_Element = __commonJS({
3737
3737
  }
3738
3738
  );
3739
3739
  attributes.registerChangeHandler(
3740
- Element,
3740
+ Element2,
3741
3741
  "class",
3742
3742
  function(element, lname, oldval, newval) {
3743
3743
  if (element._classList) {
@@ -3836,7 +3836,7 @@ var require_Element = __commonJS({
3836
3836
  }
3837
3837
  }
3838
3838
  });
3839
- Element._Attr = Attr;
3839
+ Element2._Attr = Attr;
3840
3840
  function AttributesArray(elt) {
3841
3841
  NamedNodeMap.call(this, elt);
3842
3842
  for (var name in elt._attrsByQName) {
@@ -4238,7 +4238,7 @@ var require_DocumentFragment = __commonJS({
4238
4238
  var Node2 = require_Node();
4239
4239
  var NodeList = require_NodeList();
4240
4240
  var ContainerNode = require_ContainerNode();
4241
- var Element = require_Element();
4241
+ var Element2 = require_Element();
4242
4242
  var select = require_select();
4243
4243
  var utils = require_utils();
4244
4244
  function DocumentFragment(doc) {
@@ -4256,9 +4256,9 @@ var require_DocumentFragment = __commonJS({
4256
4256
  }
4257
4257
  },
4258
4258
  // Copy the text content getter/setter from Element
4259
- textContent: Object.getOwnPropertyDescriptor(Element.prototype, "textContent"),
4259
+ textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
4260
4260
  // Copy the text content getter/setter from Element
4261
- innerText: Object.getOwnPropertyDescriptor(Element.prototype, "innerText"),
4261
+ innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
4262
4262
  querySelector: { value: function(selector) {
4263
4263
  var nodes = this.querySelectorAll(selector);
4264
4264
  return nodes.length ? nodes[0] : null;
@@ -4266,8 +4266,8 @@ var require_DocumentFragment = __commonJS({
4266
4266
  querySelectorAll: { value: function(selector) {
4267
4267
  var context = Object.create(this);
4268
4268
  context.isHTML = true;
4269
- context.getElementsByTagName = Element.prototype.getElementsByTagName;
4270
- context.nextElement = Object.getOwnPropertyDescriptor(Element.prototype, "firstElementChild").get;
4269
+ context.getElementsByTagName = Element2.prototype.getElementsByTagName;
4270
+ context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
4271
4271
  var nodes = select(selector, context);
4272
4272
  return nodes.item ? nodes : new NodeList(nodes);
4273
4273
  } },
@@ -4349,7 +4349,7 @@ var require_ProcessingInstruction = __commonJS({
4349
4349
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
4350
4350
  var require_NodeFilter = __commonJS({
4351
4351
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
4352
- var NodeFilter = {
4352
+ var NodeFilter2 = {
4353
4353
  // Constants for acceptNode()
4354
4354
  FILTER_ACCEPT: 1,
4355
4355
  FILTER_REJECT: 2,
@@ -4374,7 +4374,7 @@ var require_NodeFilter = __commonJS({
4374
4374
  SHOW_NOTATION: 2048
4375
4375
  // historical
4376
4376
  };
4377
- module.exports = NodeFilter.constructor = NodeFilter.prototype = NodeFilter;
4377
+ module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
4378
4378
  }
4379
4379
  });
4380
4380
 
@@ -4449,7 +4449,7 @@ var require_TreeWalker = __commonJS({
4449
4449
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
4450
4450
  module.exports = TreeWalker;
4451
4451
  var Node2 = require_Node();
4452
- var NodeFilter = require_NodeFilter();
4452
+ var NodeFilter2 = require_NodeFilter();
4453
4453
  var NodeTraversal = require_NodeTraversal();
4454
4454
  var utils = require_utils();
4455
4455
  var mapChild = {
@@ -4469,11 +4469,11 @@ var require_TreeWalker = __commonJS({
4469
4469
  node = tw._currentNode[mapChild[type]];
4470
4470
  while (node !== null) {
4471
4471
  result = tw._internalFilter(node);
4472
- if (result === NodeFilter.FILTER_ACCEPT) {
4472
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4473
4473
  tw._currentNode = node;
4474
4474
  return node;
4475
4475
  }
4476
- if (result === NodeFilter.FILTER_SKIP) {
4476
+ if (result === NodeFilter2.FILTER_SKIP) {
4477
4477
  child = node[mapChild[type]];
4478
4478
  if (child !== null) {
4479
4479
  node = child;
@@ -4507,12 +4507,12 @@ var require_TreeWalker = __commonJS({
4507
4507
  while (sibling !== null) {
4508
4508
  node = sibling;
4509
4509
  result = tw._internalFilter(node);
4510
- if (result === NodeFilter.FILTER_ACCEPT) {
4510
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4511
4511
  tw._currentNode = node;
4512
4512
  return node;
4513
4513
  }
4514
4514
  sibling = node[mapChild[type]];
4515
- if (result === NodeFilter.FILTER_REJECT || sibling === null) {
4515
+ if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
4516
4516
  sibling = node[mapSibling[type]];
4517
4517
  }
4518
4518
  }
@@ -4520,7 +4520,7 @@ var require_TreeWalker = __commonJS({
4520
4520
  if (node === null || node === tw.root) {
4521
4521
  return null;
4522
4522
  }
4523
- if (tw._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4523
+ if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4524
4524
  return null;
4525
4525
  }
4526
4526
  }
@@ -4568,11 +4568,11 @@ var require_TreeWalker = __commonJS({
4568
4568
  utils.InvalidStateError();
4569
4569
  }
4570
4570
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4571
- return NodeFilter.FILTER_SKIP;
4571
+ return NodeFilter2.FILTER_SKIP;
4572
4572
  }
4573
4573
  filter = this._filter;
4574
4574
  if (filter === null) {
4575
- result = NodeFilter.FILTER_ACCEPT;
4575
+ result = NodeFilter2.FILTER_ACCEPT;
4576
4576
  } else {
4577
4577
  this._active = true;
4578
4578
  try {
@@ -4601,7 +4601,7 @@ var require_TreeWalker = __commonJS({
4601
4601
  if (node === null) {
4602
4602
  return null;
4603
4603
  }
4604
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4604
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4605
4605
  this._currentNode = node;
4606
4606
  return node;
4607
4607
  }
@@ -4654,17 +4654,17 @@ var require_TreeWalker = __commonJS({
4654
4654
  for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
4655
4655
  node = previousSibling;
4656
4656
  result = this._internalFilter(node);
4657
- if (result === NodeFilter.FILTER_REJECT) {
4657
+ if (result === NodeFilter2.FILTER_REJECT) {
4658
4658
  continue;
4659
4659
  }
4660
4660
  for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
4661
4661
  node = lastChild;
4662
4662
  result = this._internalFilter(node);
4663
- if (result === NodeFilter.FILTER_REJECT) {
4663
+ if (result === NodeFilter2.FILTER_REJECT) {
4664
4664
  break;
4665
4665
  }
4666
4666
  }
4667
- if (result === NodeFilter.FILTER_ACCEPT) {
4667
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4668
4668
  this._currentNode = node;
4669
4669
  return node;
4670
4670
  }
@@ -4673,7 +4673,7 @@ var require_TreeWalker = __commonJS({
4673
4673
  return null;
4674
4674
  }
4675
4675
  node = node.parentNode;
4676
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4676
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4677
4677
  this._currentNode = node;
4678
4678
  return node;
4679
4679
  }
@@ -4690,26 +4690,26 @@ var require_TreeWalker = __commonJS({
4690
4690
  nextNode: { value: function nextNode() {
4691
4691
  var node, result, firstChild, nextSibling;
4692
4692
  node = this._currentNode;
4693
- result = NodeFilter.FILTER_ACCEPT;
4693
+ result = NodeFilter2.FILTER_ACCEPT;
4694
4694
  CHILDREN:
4695
4695
  while (true) {
4696
4696
  for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
4697
4697
  node = firstChild;
4698
4698
  result = this._internalFilter(node);
4699
- if (result === NodeFilter.FILTER_ACCEPT) {
4699
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4700
4700
  this._currentNode = node;
4701
4701
  return node;
4702
- } else if (result === NodeFilter.FILTER_REJECT) {
4702
+ } else if (result === NodeFilter2.FILTER_REJECT) {
4703
4703
  break;
4704
4704
  }
4705
4705
  }
4706
4706
  for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
4707
4707
  node = nextSibling;
4708
4708
  result = this._internalFilter(node);
4709
- if (result === NodeFilter.FILTER_ACCEPT) {
4709
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4710
4710
  this._currentNode = node;
4711
4711
  return node;
4712
- } else if (result === NodeFilter.FILTER_SKIP) {
4712
+ } else if (result === NodeFilter2.FILTER_SKIP) {
4713
4713
  continue CHILDREN;
4714
4714
  }
4715
4715
  }
@@ -4728,7 +4728,7 @@ var require_TreeWalker = __commonJS({
4728
4728
  var require_NodeIterator = __commonJS({
4729
4729
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
4730
4730
  module.exports = NodeIterator;
4731
- var NodeFilter = require_NodeFilter();
4731
+ var NodeFilter2 = require_NodeFilter();
4732
4732
  var NodeTraversal = require_NodeTraversal();
4733
4733
  var utils = require_utils();
4734
4734
  function move(node, stayWithin, directionIsNext) {
@@ -4763,7 +4763,7 @@ var require_NodeIterator = __commonJS({
4763
4763
  }
4764
4764
  }
4765
4765
  var result = ni._internalFilter(node);
4766
- if (result === NodeFilter.FILTER_ACCEPT) {
4766
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4767
4767
  break;
4768
4768
  }
4769
4769
  }
@@ -4811,11 +4811,11 @@ var require_NodeIterator = __commonJS({
4811
4811
  utils.InvalidStateError();
4812
4812
  }
4813
4813
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4814
- return NodeFilter.FILTER_SKIP;
4814
+ return NodeFilter2.FILTER_SKIP;
4815
4815
  }
4816
4816
  filter = this._filter;
4817
4817
  if (filter === null) {
4818
- result = NodeFilter.FILTER_ACCEPT;
4818
+ result = NodeFilter2.FILTER_ACCEPT;
4819
4819
  } else {
4820
4820
  this._active = true;
4821
4821
  try {
@@ -5025,32 +5025,32 @@ var require_URL = __commonJS({
5025
5025
  else
5026
5026
  return basepath.substring(0, lastslash + 1) + refpath;
5027
5027
  }
5028
- function remove_dot_segments(path15) {
5029
- if (!path15) return path15;
5028
+ function remove_dot_segments(path13) {
5029
+ if (!path13) return path13;
5030
5030
  var output = "";
5031
- while (path15.length > 0) {
5032
- if (path15 === "." || path15 === "..") {
5033
- path15 = "";
5031
+ while (path13.length > 0) {
5032
+ if (path13 === "." || path13 === "..") {
5033
+ path13 = "";
5034
5034
  break;
5035
5035
  }
5036
- var twochars = path15.substring(0, 2);
5037
- var threechars = path15.substring(0, 3);
5038
- var fourchars = path15.substring(0, 4);
5036
+ var twochars = path13.substring(0, 2);
5037
+ var threechars = path13.substring(0, 3);
5038
+ var fourchars = path13.substring(0, 4);
5039
5039
  if (threechars === "../") {
5040
- path15 = path15.substring(3);
5040
+ path13 = path13.substring(3);
5041
5041
  } else if (twochars === "./") {
5042
- path15 = path15.substring(2);
5042
+ path13 = path13.substring(2);
5043
5043
  } else if (threechars === "/./") {
5044
- path15 = "/" + path15.substring(3);
5045
- } else if (twochars === "/." && path15.length === 2) {
5046
- path15 = "/";
5047
- } else if (fourchars === "/../" || threechars === "/.." && path15.length === 3) {
5048
- path15 = "/" + path15.substring(4);
5044
+ path13 = "/" + path13.substring(3);
5045
+ } else if (twochars === "/." && path13.length === 2) {
5046
+ path13 = "/";
5047
+ } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5048
+ path13 = "/" + path13.substring(4);
5049
5049
  output = output.replace(/\/?[^\/]*$/, "");
5050
5050
  } else {
5051
- var segment = path15.match(/(\/?([^\/]*))/)[0];
5051
+ var segment = path13.match(/(\/?([^\/]*))/)[0];
5052
5052
  output += segment;
5053
- path15 = path15.substring(segment.length);
5053
+ path13 = path13.substring(segment.length);
5054
5054
  }
5055
5055
  }
5056
5056
  return output;
@@ -5615,9 +5615,9 @@ var require_defineElement = __commonJS({
5615
5615
  });
5616
5616
  return c;
5617
5617
  };
5618
- function EventHandlerBuilder(body, document, form, element) {
5618
+ function EventHandlerBuilder(body, document2, form, element) {
5619
5619
  this.body = body;
5620
- this.document = document;
5620
+ this.document = document2;
5621
5621
  this.form = form;
5622
5622
  this.element = element;
5623
5623
  }
@@ -5651,7 +5651,7 @@ var require_defineElement = __commonJS({
5651
5651
  var require_htmlelts = __commonJS({
5652
5652
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
5653
5653
  var Node2 = require_Node();
5654
- var Element = require_Element();
5654
+ var Element2 = require_Element();
5655
5655
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
5656
5656
  var utils = require_utils();
5657
5657
  var URLUtils = require_URLUtils();
@@ -5719,10 +5719,10 @@ var require_htmlelts = __commonJS({
5719
5719
  this._form = null;
5720
5720
  };
5721
5721
  var HTMLElement = exports$1.HTMLElement = define({
5722
- superclass: Element,
5722
+ superclass: Element2,
5723
5723
  name: "HTMLElement",
5724
5724
  ctor: function HTMLElement2(doc, localName, prefix) {
5725
- Element.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5725
+ Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5726
5726
  },
5727
5727
  props: {
5728
5728
  dangerouslySetInnerHTML: {
@@ -7204,7 +7204,7 @@ var require_htmlelts = __commonJS({
7204
7204
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
7205
7205
  var require_svg = __commonJS({
7206
7206
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
7207
- var Element = require_Element();
7207
+ var Element2 = require_Element();
7208
7208
  var defineElement = require_defineElement();
7209
7209
  var utils = require_utils();
7210
7210
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
@@ -7218,10 +7218,10 @@ var require_svg = __commonJS({
7218
7218
  return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
7219
7219
  }
7220
7220
  var SVGElement = define({
7221
- superclass: Element,
7221
+ superclass: Element2,
7222
7222
  name: "SVGElement",
7223
7223
  ctor: function SVGElement2(doc, localName, prefix) {
7224
- Element.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7224
+ Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7225
7225
  },
7226
7226
  props: {
7227
7227
  style: { get: function() {
@@ -7356,7 +7356,7 @@ var require_Document = __commonJS({
7356
7356
  var Node2 = require_Node();
7357
7357
  var NodeList = require_NodeList();
7358
7358
  var ContainerNode = require_ContainerNode();
7359
- var Element = require_Element();
7359
+ var Element2 = require_Element();
7360
7360
  var Text = require_Text();
7361
7361
  var Comment = require_Comment();
7362
7362
  var Event = require_Event();
@@ -7365,7 +7365,7 @@ var require_Document = __commonJS({
7365
7365
  var DOMImplementation = require_DOMImplementation();
7366
7366
  var TreeWalker = require_TreeWalker();
7367
7367
  var NodeIterator = require_NodeIterator();
7368
- var NodeFilter = require_NodeFilter();
7368
+ var NodeFilter2 = require_NodeFilter();
7369
7369
  var URL2 = require_URL();
7370
7370
  var select = require_select();
7371
7371
  var events = require_events();
@@ -7504,13 +7504,13 @@ var require_Document = __commonJS({
7504
7504
  if (this.isHTML) {
7505
7505
  localName = utils.toASCIILowerCase(localName);
7506
7506
  }
7507
- return new Element._Attr(null, localName, null, null, "");
7507
+ return new Element2._Attr(null, localName, null, null, "");
7508
7508
  } },
7509
7509
  createAttributeNS: { value: function(namespace, qualifiedName) {
7510
7510
  namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
7511
7511
  qualifiedName = String(qualifiedName);
7512
7512
  var ve = validateAndExtract(namespace, qualifiedName);
7513
- return new Element._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7513
+ return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7514
7514
  } },
7515
7515
  createElement: { value: function(localName) {
7516
7516
  localName = String(localName);
@@ -7522,7 +7522,7 @@ var require_Document = __commonJS({
7522
7522
  } else if (this.contentType === "application/xhtml+xml") {
7523
7523
  return html.createElement(this, localName, null);
7524
7524
  } else {
7525
- return new Element(this, localName, null, null);
7525
+ return new Element2(this, localName, null, null);
7526
7526
  }
7527
7527
  }, writable: isApiWritable },
7528
7528
  createElementNS: { value: function(namespace, qualifiedName) {
@@ -7539,7 +7539,7 @@ var require_Document = __commonJS({
7539
7539
  } else if (namespace === NAMESPACE.SVG) {
7540
7540
  return svg.createElement(this, localName, prefix);
7541
7541
  }
7542
- return new Element(this, localName, namespace, prefix);
7542
+ return new Element2(this, localName, namespace, prefix);
7543
7543
  } },
7544
7544
  createEvent: { value: function createEvent(interfaceName) {
7545
7545
  interfaceName = interfaceName.toLowerCase();
@@ -7561,7 +7561,7 @@ var require_Document = __commonJS({
7561
7561
  if (!(root3 instanceof Node2)) {
7562
7562
  throw new TypeError("root not a node");
7563
7563
  }
7564
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7564
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7565
7565
  filter = filter === void 0 ? null : filter;
7566
7566
  return new TreeWalker(root3, whatToShow, filter);
7567
7567
  } },
@@ -7573,7 +7573,7 @@ var require_Document = __commonJS({
7573
7573
  if (!(root3 instanceof Node2)) {
7574
7574
  throw new TypeError("root not a node");
7575
7575
  }
7576
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7576
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7577
7577
  filter = filter === void 0 ? null : filter;
7578
7578
  return new NodeIterator(root3, whatToShow, filter);
7579
7579
  } },
@@ -7634,10 +7634,10 @@ var require_Document = __commonJS({
7634
7634
  return this.byId[id] instanceof MultiId;
7635
7635
  } },
7636
7636
  // Just copy this method from the Element prototype
7637
- getElementsByName: { value: Element.prototype.getElementsByName },
7638
- getElementsByTagName: { value: Element.prototype.getElementsByTagName },
7639
- getElementsByTagNameNS: { value: Element.prototype.getElementsByTagNameNS },
7640
- getElementsByClassName: { value: Element.prototype.getElementsByClassName },
7637
+ getElementsByName: { value: Element2.prototype.getElementsByName },
7638
+ getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
7639
+ getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
7640
+ getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
7641
7641
  adoptNode: { value: function adoptNode(node) {
7642
7642
  if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
7643
7643
  if (node.nodeType === Node2.ATTRIBUTE_NODE) {
@@ -16463,8 +16463,8 @@ var require_Window = __commonJS({
16463
16463
  var Location = require_Location();
16464
16464
  var utils = require_utils();
16465
16465
  module.exports = Window;
16466
- function Window(document) {
16467
- this.document = document || new DOMImplementation(null).createHTMLDocument("");
16466
+ function Window(document2) {
16467
+ this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
16468
16468
  this.document._scripting_enabled = true;
16469
16469
  this.document.defaultView = this;
16470
16470
  this.location = new Location(this, this.document._address || "about:blank");
@@ -16594,11 +16594,11 @@ var require_lib = __commonJS({
16594
16594
  };
16595
16595
  };
16596
16596
  exports$1.createWindow = function(html, address) {
16597
- var document = exports$1.createDocument(html);
16597
+ var document2 = exports$1.createDocument(html);
16598
16598
  if (address !== void 0) {
16599
- document._address = address;
16599
+ document2._address = address;
16600
16600
  }
16601
- return new impl.Window(document);
16601
+ return new impl.Window(document2);
16602
16602
  };
16603
16603
  exports$1.impl = impl;
16604
16604
  }
@@ -16663,29 +16663,18 @@ var searchSocketConfigSchema = zod.z.object({
16663
16663
  prependTitle: zod.z.boolean().optional(),
16664
16664
  pageSummaryChunk: zod.z.boolean().optional()
16665
16665
  }).optional(),
16666
- embeddings: zod.z.object({
16667
- provider: zod.z.literal("jina").optional(),
16668
- model: zod.z.string().min(1).optional(),
16669
- apiKey: zod.z.string().min(1).optional(),
16670
- apiKeyEnv: zod.z.string().min(1).optional(),
16671
- batchSize: zod.z.number().int().positive().optional(),
16672
- concurrency: zod.z.number().int().positive().optional(),
16673
- pricePer1kTokens: zod.z.number().positive().optional()
16666
+ upstash: zod.z.object({
16667
+ url: zod.z.string().url().optional(),
16668
+ token: zod.z.string().min(1).optional(),
16669
+ urlEnv: zod.z.string().min(1).optional(),
16670
+ tokenEnv: zod.z.string().min(1).optional()
16674
16671
  }).optional(),
16675
- vector: zod.z.object({
16676
- dimension: zod.z.number().int().positive().optional(),
16677
- turso: zod.z.object({
16678
- url: zod.z.string().url().optional(),
16679
- authToken: zod.z.string().min(1).optional(),
16680
- urlEnv: zod.z.string().optional(),
16681
- authTokenEnv: zod.z.string().optional(),
16682
- localPath: zod.z.string().optional()
16683
- }).optional()
16684
- }).optional(),
16685
- rerank: zod.z.object({
16686
- enabled: zod.z.boolean().optional(),
16687
- topN: zod.z.number().int().positive().optional(),
16688
- model: zod.z.string().optional()
16672
+ search: zod.z.object({
16673
+ semanticWeight: zod.z.number().min(0).max(1).optional(),
16674
+ inputEnrichment: zod.z.boolean().optional(),
16675
+ reranking: zod.z.boolean().optional(),
16676
+ dualSearch: zod.z.boolean().optional(),
16677
+ pageSearchWeight: zod.z.number().min(0).max(1).optional()
16689
16678
  }).optional(),
16690
16679
  ranking: zod.z.object({
16691
16680
  enableIncomingLinkBoost: zod.z.boolean().optional(),
@@ -16695,11 +16684,12 @@ var searchSocketConfigSchema = zod.z.object({
16695
16684
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16696
16685
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
16697
16686
  minScore: zod.z.number().min(0).max(1).optional(),
16687
+ scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
16698
16688
  weights: zod.z.object({
16699
16689
  incomingLinks: zod.z.number().optional(),
16700
16690
  depth: zod.z.number().optional(),
16701
- rerank: zod.z.number().optional(),
16702
- aggregation: zod.z.number().optional()
16691
+ aggregation: zod.z.number().optional(),
16692
+ titleMatch: zod.z.number().optional()
16703
16693
  }).optional()
16704
16694
  }).optional(),
16705
16695
  api: zod.z.object({
@@ -16721,8 +16711,7 @@ var searchSocketConfigSchema = zod.z.object({
16721
16711
  }).optional()
16722
16712
  }).optional(),
16723
16713
  state: zod.z.object({
16724
- dir: zod.z.string().optional(),
16725
- writeMirror: zod.z.boolean().optional()
16714
+ dir: zod.z.string().optional()
16726
16715
  }).optional()
16727
16716
  });
16728
16717
 
@@ -16776,24 +16765,16 @@ function createDefaultConfig(projectId) {
16776
16765
  prependTitle: true,
16777
16766
  pageSummaryChunk: true
16778
16767
  },
16779
- embeddings: {
16780
- provider: "jina",
16781
- model: "jina-embeddings-v5-text-small",
16782
- apiKeyEnv: "JINA_API_KEY",
16783
- batchSize: 64,
16784
- concurrency: 4
16785
- },
16786
- vector: {
16787
- turso: {
16788
- urlEnv: "TURSO_DATABASE_URL",
16789
- authTokenEnv: "TURSO_AUTH_TOKEN",
16790
- localPath: ".searchsocket/vectors.db"
16791
- }
16768
+ upstash: {
16769
+ urlEnv: "UPSTASH_SEARCH_REST_URL",
16770
+ tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16792
16771
  },
16793
- rerank: {
16794
- enabled: true,
16795
- topN: 20,
16796
- model: "jina-reranker-v3"
16772
+ search: {
16773
+ semanticWeight: 0.75,
16774
+ inputEnrichment: true,
16775
+ reranking: true,
16776
+ dualSearch: true,
16777
+ pageSearchWeight: 0.3
16797
16778
  },
16798
16779
  ranking: {
16799
16780
  enableIncomingLinkBoost: true,
@@ -16802,12 +16783,13 @@ function createDefaultConfig(projectId) {
16802
16783
  aggregationCap: 5,
16803
16784
  aggregationDecay: 0.5,
16804
16785
  minChunkScoreRatio: 0.5,
16805
- minScore: 0,
16786
+ minScore: 0.3,
16787
+ scoreGapThreshold: 0.4,
16806
16788
  weights: {
16807
16789
  incomingLinks: 0.05,
16808
16790
  depth: 0.03,
16809
- rerank: 1,
16810
- aggregation: 0.1
16791
+ aggregation: 0.1,
16792
+ titleMatch: 0.15
16811
16793
  }
16812
16794
  },
16813
16795
  api: {
@@ -16825,8 +16807,7 @@ function createDefaultConfig(projectId) {
16825
16807
  }
16826
16808
  },
16827
16809
  state: {
16828
- dir: ".searchsocket",
16829
- writeMirror: false
16810
+ dir: ".searchsocket"
16830
16811
  }
16831
16812
  };
16832
16813
  }
@@ -16950,21 +16931,13 @@ ${issues}`
16950
16931
  ...defaults.chunking,
16951
16932
  ...parsed.chunking
16952
16933
  },
16953
- embeddings: {
16954
- ...defaults.embeddings,
16955
- ...parsed.embeddings
16934
+ upstash: {
16935
+ ...defaults.upstash,
16936
+ ...parsed.upstash
16956
16937
  },
16957
- vector: {
16958
- ...defaults.vector,
16959
- ...parsed.vector,
16960
- turso: {
16961
- ...defaults.vector.turso,
16962
- ...parsed.vector?.turso
16963
- }
16964
- },
16965
- rerank: {
16966
- ...defaults.rerank,
16967
- ...parsed.rerank
16938
+ search: {
16939
+ ...defaults.search,
16940
+ ...parsed.search
16968
16941
  },
16969
16942
  ranking: {
16970
16943
  ...defaults.ranking,
@@ -17143,660 +17116,245 @@ function resolveScope(config, override) {
17143
17116
  scopeId: `${config.project.id}:${scopeName}`
17144
17117
  };
17145
17118
  }
17146
- function sleep(ms) {
17147
- return new Promise((resolve) => {
17148
- setTimeout(resolve, ms);
17149
- });
17150
- }
17151
- var JinaEmbeddingsProvider = class {
17152
- apiKey;
17153
- batchSize;
17154
- concurrency;
17155
- defaultTask;
17156
- constructor(options) {
17157
- if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
17158
- throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
17159
- }
17160
- if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
17161
- throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
17162
- }
17163
- this.apiKey = options.apiKey;
17164
- this.batchSize = options.batchSize;
17165
- this.concurrency = options.concurrency;
17166
- this.defaultTask = options.task ?? "retrieval.passage";
17167
- }
17168
- estimateTokens(text) {
17169
- const normalized = text.trim();
17170
- if (!normalized) {
17171
- return 0;
17172
- }
17173
- const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
17174
- const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
17175
- const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
17176
- const charEstimate = Math.ceil(normalized.length / 4);
17177
- const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
17178
- return Math.max(1, Math.max(charEstimate, lexicalEstimate));
17179
- }
17180
- async embedTexts(texts, modelId, task) {
17181
- if (texts.length === 0) {
17182
- return [];
17183
- }
17184
- const batches = [];
17185
- for (let i = 0; i < texts.length; i += this.batchSize) {
17186
- batches.push({
17187
- index: i,
17188
- values: texts.slice(i, i + this.batchSize)
17189
- });
17190
- }
17191
- const outputs = new Array(batches.length);
17192
- const limit = pLimit2__default.default(this.concurrency);
17193
- await Promise.all(
17194
- batches.map(
17195
- (batch, position) => limit(async () => {
17196
- outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
17197
- })
17198
- )
17199
- );
17200
- return outputs.flat();
17201
- }
17202
- async embedWithRetry(texts, modelId, task) {
17203
- const maxAttempts = 5;
17204
- let attempt = 0;
17205
- while (attempt < maxAttempts) {
17206
- attempt += 1;
17207
- let response;
17208
- try {
17209
- response = await fetch("https://api.jina.ai/v1/embeddings", {
17210
- method: "POST",
17211
- headers: {
17212
- "content-type": "application/json",
17213
- authorization: `Bearer ${this.apiKey}`
17214
- },
17215
- body: JSON.stringify({
17216
- model: modelId,
17217
- input: texts,
17218
- task
17219
- })
17220
- });
17221
- } catch (error) {
17222
- if (attempt >= maxAttempts) {
17223
- throw error;
17224
- }
17225
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17226
- continue;
17227
- }
17228
- if (!response.ok) {
17229
- const retryable = response.status === 429 || response.status >= 500;
17230
- if (!retryable || attempt >= maxAttempts) {
17231
- const errorBody = await response.text();
17232
- throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
17233
- }
17234
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17235
- continue;
17236
- }
17237
- const payload = await response.json();
17238
- if (!payload.data || !Array.isArray(payload.data)) {
17239
- throw new Error("Invalid Jina embeddings response format");
17240
- }
17241
- return payload.data.map((entry) => entry.embedding);
17242
- }
17243
- throw new Error("Unreachable retry state");
17244
- }
17245
- };
17246
-
17247
- // src/embeddings/factory.ts
17248
- function createEmbeddingsProvider(config) {
17249
- if (config.embeddings.provider !== "jina") {
17250
- throw new SearchSocketError(
17251
- "CONFIG_MISSING",
17252
- `Unsupported embeddings provider ${config.embeddings.provider}`
17253
- );
17254
- }
17255
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17256
- if (!apiKey) {
17257
- throw new SearchSocketError(
17258
- "CONFIG_MISSING",
17259
- `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
17260
- );
17261
- }
17262
- return new JinaEmbeddingsProvider({
17263
- apiKey,
17264
- batchSize: config.embeddings.batchSize,
17265
- concurrency: config.embeddings.concurrency
17266
- });
17267
- }
17268
-
17269
- // src/rerank/jina.ts
17270
- function sleep2(ms) {
17271
- return new Promise((resolve) => {
17272
- setTimeout(resolve, ms);
17273
- });
17274
- }
17275
- var JinaReranker = class {
17276
- apiKey;
17277
- model;
17278
- maxRetries;
17279
- constructor(options) {
17280
- this.apiKey = options.apiKey;
17281
- this.model = options.model;
17282
- this.maxRetries = options.maxRetries ?? 2;
17283
- }
17284
- async rerank(query, candidates, topN) {
17285
- if (candidates.length === 0) {
17286
- return [];
17287
- }
17288
- const body = {
17289
- model: this.model,
17290
- query,
17291
- documents: candidates.map((candidate) => candidate.text),
17292
- top_n: topN ?? candidates.length,
17293
- return_documents: false
17294
- };
17295
- let attempt = 0;
17296
- while (attempt <= this.maxRetries) {
17297
- attempt += 1;
17298
- let response;
17299
- try {
17300
- response = await fetch("https://api.jina.ai/v1/rerank", {
17301
- method: "POST",
17302
- headers: {
17303
- "content-type": "application/json",
17304
- authorization: `Bearer ${this.apiKey}`
17305
- },
17306
- body: JSON.stringify(body)
17307
- });
17308
- } catch (error) {
17309
- if (attempt <= this.maxRetries) {
17310
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17311
- continue;
17312
- }
17313
- throw error;
17314
- }
17315
- if (!response.ok) {
17316
- const retryable = response.status === 429 || response.status >= 500;
17317
- if (retryable && attempt <= this.maxRetries) {
17318
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17319
- continue;
17320
- }
17321
- const errorBody = await response.text();
17322
- throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
17323
- }
17324
- const payload = await response.json();
17325
- const rawResults = payload.results ?? payload.data ?? [];
17326
- if (!Array.isArray(rawResults)) {
17327
- throw new Error("Invalid Jina rerank response format");
17328
- }
17329
- return rawResults.flatMap((item) => {
17330
- const index = item.index;
17331
- if (typeof index !== "number" || index < 0 || index >= candidates.length) {
17332
- return [];
17333
- }
17334
- const candidate = candidates[index];
17335
- if (!candidate) {
17336
- return [];
17337
- }
17338
- const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
17339
- return [
17340
- {
17341
- id: candidate.id,
17342
- score
17343
- }
17344
- ];
17345
- }).sort((a, b) => b.score - a.score);
17346
- }
17347
- throw new Error("Jina rerank request failed after retries");
17348
- }
17349
- };
17350
-
17351
- // src/rerank/factory.ts
17352
- function createReranker(config) {
17353
- if (!config.rerank.enabled) {
17354
- return null;
17355
- }
17356
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17357
- if (!apiKey) {
17358
- return null;
17359
- }
17360
- return new JinaReranker({
17361
- apiKey,
17362
- model: config.rerank.model
17363
- });
17364
- }
17365
17119
  function ensureStateDirs(cwd, stateDir, scope) {
17366
17120
  const statePath = path__default.default.resolve(cwd, stateDir);
17367
- const pagesPath = path__default.default.join(statePath, "pages", scope.scopeName);
17368
- fs__default.default.mkdirSync(pagesPath, { recursive: true });
17369
- return { statePath, pagesPath };
17121
+ fs__default.default.mkdirSync(statePath, { recursive: true });
17122
+ return { statePath };
17370
17123
  }
17371
17124
 
17372
- // src/vector/turso.ts
17373
- var TursoVectorStore = class {
17125
+ // src/vector/upstash.ts
17126
+ function chunkIndexName(scope) {
17127
+ return `${scope.projectId}--${scope.scopeName}`;
17128
+ }
17129
+ function pageIndexName(scope) {
17130
+ return `${scope.projectId}--${scope.scopeName}--pages`;
17131
+ }
17132
+ var UpstashSearchStore = class {
17374
17133
  client;
17375
- dimension;
17376
- chunksReady = false;
17377
- registryReady = false;
17378
- pagesReady = false;
17379
17134
  constructor(opts) {
17380
17135
  this.client = opts.client;
17381
- this.dimension = opts.dimension;
17382
17136
  }
17383
- async ensureRegistry() {
17384
- if (this.registryReady) return;
17385
- await this.client.execute(`
17386
- CREATE TABLE IF NOT EXISTS registry (
17387
- scope_key TEXT PRIMARY KEY,
17388
- project_id TEXT NOT NULL,
17389
- scope_name TEXT NOT NULL,
17390
- model_id TEXT NOT NULL,
17391
- last_indexed_at TEXT NOT NULL,
17392
- vector_count INTEGER,
17393
- last_estimate_tokens INTEGER,
17394
- last_estimate_cost_usd REAL,
17395
- last_estimate_changed_chunks INTEGER
17396
- )
17397
- `);
17398
- const estimateCols = [
17399
- { name: "last_estimate_tokens", def: "INTEGER" },
17400
- { name: "last_estimate_cost_usd", def: "REAL" },
17401
- { name: "last_estimate_changed_chunks", def: "INTEGER" }
17402
- ];
17403
- for (const col of estimateCols) {
17404
- try {
17405
- await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
17406
- } catch (error) {
17407
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17408
- throw error;
17409
- }
17410
- }
17411
- }
17412
- this.registryReady = true;
17413
- }
17414
- async ensureChunks(dim) {
17415
- if (this.chunksReady) return;
17416
- const exists = await this.chunksTableExists();
17417
- if (exists) {
17418
- const currentDim = await this.getChunksDimension();
17419
- if (currentDim !== null && currentDim !== dim) {
17420
- await this.client.batch([
17421
- "DROP INDEX IF EXISTS idx",
17422
- "DROP TABLE IF EXISTS chunks"
17423
- ]);
17424
- }
17425
- }
17426
- await this.client.batch([
17427
- `CREATE TABLE IF NOT EXISTS chunks (
17428
- id TEXT PRIMARY KEY,
17429
- project_id TEXT NOT NULL,
17430
- scope_name TEXT NOT NULL,
17431
- url TEXT NOT NULL,
17432
- path TEXT NOT NULL,
17433
- title TEXT NOT NULL,
17434
- section_title TEXT NOT NULL DEFAULT '',
17435
- heading_path TEXT NOT NULL DEFAULT '[]',
17436
- snippet TEXT NOT NULL DEFAULT '',
17437
- chunk_text TEXT NOT NULL DEFAULT '',
17438
- ordinal INTEGER NOT NULL DEFAULT 0,
17439
- content_hash TEXT NOT NULL DEFAULT '',
17440
- model_id TEXT NOT NULL DEFAULT '',
17441
- depth INTEGER NOT NULL DEFAULT 0,
17442
- incoming_links INTEGER NOT NULL DEFAULT 0,
17443
- route_file TEXT NOT NULL DEFAULT '',
17444
- tags TEXT NOT NULL DEFAULT '[]',
17445
- description TEXT NOT NULL DEFAULT '',
17446
- keywords TEXT NOT NULL DEFAULT '[]',
17447
- embedding F32_BLOB(${dim})
17448
- )`,
17449
- `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17450
- ]);
17451
- this.chunksReady = true;
17137
+ chunkIndex(scope) {
17138
+ return this.client.index(chunkIndexName(scope));
17452
17139
  }
17453
- async ensurePages() {
17454
- if (this.pagesReady) return;
17455
- await this.client.execute(`
17456
- CREATE TABLE IF NOT EXISTS pages (
17457
- project_id TEXT NOT NULL,
17458
- scope_name TEXT NOT NULL,
17459
- url TEXT NOT NULL,
17460
- title TEXT NOT NULL,
17461
- markdown TEXT NOT NULL,
17462
- route_file TEXT NOT NULL DEFAULT '',
17463
- route_resolution TEXT NOT NULL DEFAULT 'exact',
17464
- incoming_links INTEGER NOT NULL DEFAULT 0,
17465
- outgoing_links INTEGER NOT NULL DEFAULT 0,
17466
- depth INTEGER NOT NULL DEFAULT 0,
17467
- tags TEXT NOT NULL DEFAULT '[]',
17468
- indexed_at TEXT NOT NULL,
17469
- PRIMARY KEY (project_id, scope_name, url)
17470
- )
17471
- `);
17472
- this.pagesReady = true;
17140
+ pageIndex(scope) {
17141
+ return this.client.index(pageIndexName(scope));
17473
17142
  }
17474
- async chunksTableExists() {
17475
- try {
17476
- await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
17477
- return true;
17478
- } catch (error) {
17479
- if (error instanceof Error && error.message.includes("no such table")) {
17480
- return false;
17481
- }
17482
- throw error;
17483
- }
17484
- }
17485
- /**
17486
- * Read the current F32_BLOB dimension from the chunks table schema.
17487
- * Returns null if the table doesn't exist or the dimension can't be parsed.
17488
- */
17489
- async getChunksDimension() {
17490
- try {
17491
- const rs = await this.client.execute(
17492
- "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
17493
- );
17494
- if (rs.rows.length === 0) return null;
17495
- const sql = rs.rows[0].sql;
17496
- const match = sql.match(/F32_BLOB\((\d+)\)/i);
17497
- return match ? parseInt(match[1], 10) : null;
17498
- } catch {
17499
- return null;
17500
- }
17501
- }
17502
- /**
17503
- * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
17504
- * Used by `clean --remote` for a full reset.
17505
- */
17506
- async dropAllTables() {
17507
- await this.client.batch([
17508
- "DROP INDEX IF EXISTS idx",
17509
- "DROP TABLE IF EXISTS chunks",
17510
- "DROP TABLE IF EXISTS registry",
17511
- "DROP TABLE IF EXISTS pages"
17512
- ]);
17513
- this.chunksReady = false;
17514
- this.registryReady = false;
17515
- this.pagesReady = false;
17516
- }
17517
- async upsert(records, _scope) {
17518
- if (records.length === 0) return;
17519
- const dim = this.dimension ?? records[0].vector.length;
17520
- await this.ensureChunks(dim);
17143
+ async upsertChunks(chunks, scope) {
17144
+ if (chunks.length === 0) return;
17145
+ const index = this.chunkIndex(scope);
17521
17146
  const BATCH_SIZE = 100;
17522
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
17523
- const batch = records.slice(i, i + BATCH_SIZE);
17524
- const stmts = batch.map((r) => ({
17525
- sql: `INSERT OR REPLACE INTO chunks
17526
- (id, project_id, scope_name, url, path, title, section_title,
17527
- heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17528
- incoming_links, route_file, tags, description, keywords, embedding)
17529
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17530
- args: [
17531
- r.id,
17532
- r.metadata.projectId,
17533
- r.metadata.scopeName,
17534
- r.metadata.url,
17535
- r.metadata.path,
17536
- r.metadata.title,
17537
- r.metadata.sectionTitle,
17538
- JSON.stringify(r.metadata.headingPath),
17539
- r.metadata.snippet,
17540
- r.metadata.chunkText,
17541
- r.metadata.ordinal,
17542
- r.metadata.contentHash,
17543
- r.metadata.modelId,
17544
- r.metadata.depth,
17545
- r.metadata.incomingLinks,
17546
- r.metadata.routeFile,
17547
- JSON.stringify(r.metadata.tags),
17548
- r.metadata.description ?? "",
17549
- JSON.stringify(r.metadata.keywords ?? []),
17550
- JSON.stringify(r.vector)
17551
- ]
17552
- }));
17553
- await this.client.batch(stmts);
17147
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17148
+ const batch = chunks.slice(i, i + BATCH_SIZE);
17149
+ await index.upsert(batch);
17554
17150
  }
17555
17151
  }
17556
- async query(queryVector, opts, scope) {
17557
- const dim = this.dimension ?? queryVector.length;
17558
- await this.ensureChunks(dim);
17559
- const queryJson = JSON.stringify(queryVector);
17560
- const rs = await this.client.execute({
17561
- sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
17562
- c.section_title, c.heading_path, c.snippet, c.chunk_text,
17563
- c.ordinal, c.content_hash,
17564
- c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17565
- c.description, c.keywords,
17566
- vector_distance_cos(c.embedding, vector(?)) AS distance
17567
- FROM vector_top_k('idx', vector(?), ?) AS v
17568
- JOIN chunks AS c ON c.rowid = v.id`,
17569
- args: [queryJson, queryJson, opts.topK]
17152
+ async search(query, opts, scope) {
17153
+ const index = this.chunkIndex(scope);
17154
+ const results = await index.search({
17155
+ query,
17156
+ limit: opts.limit,
17157
+ semanticWeight: opts.semanticWeight,
17158
+ inputEnrichment: opts.inputEnrichment,
17159
+ reranking: opts.reranking,
17160
+ filter: opts.filter
17570
17161
  });
17571
- let hits = [];
17572
- for (const row of rs.rows) {
17573
- const projectId = row.project_id;
17574
- const scopeName = row.scope_name;
17575
- if (projectId !== scope.projectId || scopeName !== scope.scopeName) {
17576
- continue;
17162
+ return results.map((doc) => ({
17163
+ id: doc.id,
17164
+ score: doc.score,
17165
+ metadata: {
17166
+ projectId: doc.metadata?.projectId ?? "",
17167
+ scopeName: doc.metadata?.scopeName ?? "",
17168
+ url: doc.content.url,
17169
+ path: doc.metadata?.path ?? "",
17170
+ title: doc.content.title,
17171
+ sectionTitle: doc.content.sectionTitle,
17172
+ headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17173
+ snippet: doc.metadata?.snippet ?? "",
17174
+ chunkText: doc.content.text,
17175
+ ordinal: doc.metadata?.ordinal ?? 0,
17176
+ contentHash: doc.metadata?.contentHash ?? "",
17177
+ depth: doc.metadata?.depth ?? 0,
17178
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17179
+ routeFile: doc.metadata?.routeFile ?? "",
17180
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17181
+ description: doc.metadata?.description || void 0,
17182
+ keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17577
17183
  }
17578
- const rowPath = row.path;
17579
- if (opts.pathPrefix) {
17580
- const rawPrefix = opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}`;
17581
- const prefix = rawPrefix.endsWith("/") ? rawPrefix : `${rawPrefix}/`;
17582
- const normalizedPath = rowPath.replace(/\/$/, "");
17583
- const normalizedPrefix = rawPrefix.replace(/\/$/, "");
17584
- if (normalizedPath !== normalizedPrefix && !rowPath.startsWith(prefix)) {
17585
- continue;
17586
- }
17587
- }
17588
- const tags = JSON.parse(row.tags || "[]");
17589
- if (opts.tags && opts.tags.length > 0) {
17590
- if (!opts.tags.every((t) => tags.includes(t))) {
17591
- continue;
17592
- }
17593
- }
17594
- const distance = row.distance;
17595
- const score = 1 - distance;
17596
- const description = row.description || void 0;
17597
- const keywords = (() => {
17598
- const raw = row.keywords || "[]";
17599
- const parsed = JSON.parse(raw);
17600
- return parsed.length > 0 ? parsed : void 0;
17601
- })();
17602
- hits.push({
17603
- id: row.id,
17604
- score,
17605
- metadata: {
17606
- projectId,
17607
- scopeName,
17608
- url: row.url,
17609
- path: rowPath,
17610
- title: row.title,
17611
- sectionTitle: row.section_title,
17612
- headingPath: JSON.parse(row.heading_path || "[]"),
17613
- snippet: row.snippet,
17614
- chunkText: row.chunk_text || "",
17615
- ordinal: row.ordinal || 0,
17616
- contentHash: row.content_hash,
17617
- modelId: row.model_id,
17618
- depth: row.depth,
17619
- incomingLinks: row.incoming_links,
17620
- routeFile: row.route_file,
17621
- tags,
17622
- description,
17623
- keywords
17624
- }
17184
+ }));
17185
+ }
17186
+ async searchPages(query, opts, scope) {
17187
+ const index = this.pageIndex(scope);
17188
+ let results;
17189
+ try {
17190
+ results = await index.search({
17191
+ query,
17192
+ limit: opts.limit,
17193
+ semanticWeight: opts.semanticWeight,
17194
+ inputEnrichment: opts.inputEnrichment,
17195
+ reranking: true,
17196
+ filter: opts.filter
17625
17197
  });
17198
+ } catch {
17199
+ return [];
17626
17200
  }
17627
- hits.sort((a, b) => b.score - a.score);
17628
- return hits;
17201
+ return results.map((doc) => ({
17202
+ id: doc.id,
17203
+ score: doc.score,
17204
+ title: doc.content.title,
17205
+ url: doc.content.url,
17206
+ description: doc.content.description ?? "",
17207
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17208
+ depth: doc.metadata?.depth ?? 0,
17209
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17210
+ routeFile: doc.metadata?.routeFile ?? ""
17211
+ }));
17629
17212
  }
17630
17213
  async deleteByIds(ids, scope) {
17631
17214
  if (ids.length === 0) return;
17215
+ const index = this.chunkIndex(scope);
17632
17216
  const BATCH_SIZE = 500;
17633
17217
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17634
17218
  const batch = ids.slice(i, i + BATCH_SIZE);
17635
- const placeholders = batch.map(() => "?").join(", ");
17636
- await this.client.execute({
17637
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
17638
- args: [scope.projectId, scope.scopeName, ...batch]
17639
- });
17219
+ await index.delete(batch);
17640
17220
  }
17641
17221
  }
17642
17222
  async deleteScope(scope) {
17643
- await this.ensureRegistry();
17644
17223
  try {
17645
- await this.client.execute({
17646
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ?`,
17647
- args: [scope.projectId, scope.scopeName]
17648
- });
17649
- } catch (error) {
17650
- if (error instanceof Error && !error.message.includes("no such table")) {
17651
- throw error;
17652
- }
17224
+ const chunkIdx = this.chunkIndex(scope);
17225
+ await chunkIdx.deleteIndex();
17226
+ } catch {
17653
17227
  }
17654
17228
  try {
17655
- await this.client.execute({
17656
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17657
- args: [scope.projectId, scope.scopeName]
17658
- });
17659
- } catch (error) {
17660
- if (error instanceof Error && !error.message.includes("no such table")) {
17661
- throw error;
17662
- }
17229
+ const pageIdx = this.pageIndex(scope);
17230
+ await pageIdx.deleteIndex();
17231
+ } catch {
17663
17232
  }
17664
- await this.client.execute({
17665
- sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
17666
- args: [scope.projectId, scope.scopeName]
17667
- });
17668
- }
17669
- async listScopes(scopeProjectId) {
17670
- await this.ensureRegistry();
17671
- const rs = await this.client.execute({
17672
- sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
17673
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
17674
- FROM registry WHERE project_id = ?`,
17675
- args: [scopeProjectId]
17676
- });
17677
- return rs.rows.map((row) => ({
17678
- projectId: row.project_id,
17679
- scopeName: row.scope_name,
17680
- modelId: row.model_id,
17681
- lastIndexedAt: row.last_indexed_at,
17682
- vectorCount: row.vector_count,
17683
- lastEstimateTokens: row.last_estimate_tokens,
17684
- lastEstimateCostUSD: row.last_estimate_cost_usd,
17685
- lastEstimateChangedChunks: row.last_estimate_changed_chunks
17686
- }));
17687
17233
  }
17688
- async recordScope(info) {
17689
- await this.ensureRegistry();
17690
- const key = `${info.projectId}:${info.scopeName}`;
17691
- await this.client.execute({
17692
- sql: `INSERT OR REPLACE INTO registry
17693
- (scope_key, project_id, scope_name, model_id, last_indexed_at, vector_count,
17694
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks)
17695
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17696
- args: [
17697
- key,
17698
- info.projectId,
17699
- info.scopeName,
17700
- info.modelId,
17701
- info.lastIndexedAt,
17702
- info.vectorCount ?? null,
17703
- info.lastEstimateTokens ?? null,
17704
- info.lastEstimateCostUSD ?? null,
17705
- info.lastEstimateChangedChunks ?? null
17706
- ]
17707
- });
17234
+ async listScopes(projectId) {
17235
+ const allIndexes = await this.client.listIndexes();
17236
+ const prefix = `${projectId}--`;
17237
+ const scopeNames = /* @__PURE__ */ new Set();
17238
+ for (const name of allIndexes) {
17239
+ if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17240
+ const scopeName = name.slice(prefix.length);
17241
+ scopeNames.add(scopeName);
17242
+ }
17243
+ }
17244
+ const scopes = [];
17245
+ for (const scopeName of scopeNames) {
17246
+ const scope = {
17247
+ projectId,
17248
+ scopeName,
17249
+ scopeId: `${projectId}:${scopeName}`
17250
+ };
17251
+ try {
17252
+ const info = await this.chunkIndex(scope).info();
17253
+ scopes.push({
17254
+ projectId,
17255
+ scopeName,
17256
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17257
+ documentCount: info.documentCount
17258
+ });
17259
+ } catch {
17260
+ scopes.push({
17261
+ projectId,
17262
+ scopeName,
17263
+ lastIndexedAt: "unknown",
17264
+ documentCount: 0
17265
+ });
17266
+ }
17267
+ }
17268
+ return scopes;
17708
17269
  }
17709
17270
  async getContentHashes(scope) {
17710
- const exists = await this.chunksTableExists();
17711
- if (!exists) return /* @__PURE__ */ new Map();
17712
- const rs = await this.client.execute({
17713
- sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
17714
- args: [scope.projectId, scope.scopeName]
17715
- });
17716
17271
  const map = /* @__PURE__ */ new Map();
17717
- for (const row of rs.rows) {
17718
- map.set(row.id, row.content_hash);
17272
+ const index = this.chunkIndex(scope);
17273
+ let cursor = "0";
17274
+ try {
17275
+ for (; ; ) {
17276
+ const result = await index.range({ cursor, limit: 100 });
17277
+ for (const doc of result.documents) {
17278
+ if (doc.metadata?.contentHash) {
17279
+ map.set(doc.id, doc.metadata.contentHash);
17280
+ }
17281
+ }
17282
+ if (!result.nextCursor || result.nextCursor === "0") break;
17283
+ cursor = result.nextCursor;
17284
+ }
17285
+ } catch {
17719
17286
  }
17720
17287
  return map;
17721
17288
  }
17722
17289
  async upsertPages(pages, scope) {
17723
17290
  if (pages.length === 0) return;
17724
- await this.ensurePages();
17725
- for (const page of pages) {
17726
- if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
17727
- throw new Error(
17728
- `Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
17729
- );
17730
- }
17731
- }
17732
- const BATCH_SIZE = 100;
17291
+ const index = this.pageIndex(scope);
17292
+ const BATCH_SIZE = 50;
17733
17293
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17734
17294
  const batch = pages.slice(i, i + BATCH_SIZE);
17735
- const stmts = batch.map((p) => ({
17736
- sql: `INSERT OR REPLACE INTO pages
17737
- (project_id, scope_name, url, title, markdown, route_file,
17738
- route_resolution, incoming_links, outgoing_links, depth, tags, indexed_at)
17739
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17740
- args: [
17741
- p.projectId,
17742
- p.scopeName,
17743
- p.url,
17744
- p.title,
17745
- p.markdown,
17746
- p.routeFile,
17747
- p.routeResolution,
17748
- p.incomingLinks,
17749
- p.outgoingLinks,
17750
- p.depth,
17751
- JSON.stringify(p.tags),
17752
- p.indexedAt
17753
- ]
17295
+ const docs = batch.map((p) => ({
17296
+ id: p.url,
17297
+ content: {
17298
+ title: p.title,
17299
+ url: p.url,
17300
+ type: "page",
17301
+ description: p.description ?? "",
17302
+ keywords: (p.keywords ?? []).join(","),
17303
+ summary: p.summary ?? "",
17304
+ tags: p.tags.join(",")
17305
+ },
17306
+ metadata: {
17307
+ markdown: p.markdown,
17308
+ projectId: p.projectId,
17309
+ scopeName: p.scopeName,
17310
+ routeFile: p.routeFile,
17311
+ routeResolution: p.routeResolution,
17312
+ incomingLinks: p.incomingLinks,
17313
+ outgoingLinks: p.outgoingLinks,
17314
+ depth: p.depth,
17315
+ indexedAt: p.indexedAt
17316
+ }
17754
17317
  }));
17755
- await this.client.batch(stmts);
17318
+ await index.upsert(docs);
17756
17319
  }
17757
17320
  }
17758
17321
  async getPage(url, scope) {
17759
- await this.ensurePages();
17760
- const rs = await this.client.execute({
17761
- sql: `SELECT * FROM pages WHERE project_id = ? AND scope_name = ? AND url = ?`,
17762
- args: [scope.projectId, scope.scopeName, url]
17763
- });
17764
- if (rs.rows.length === 0) return null;
17765
- const row = rs.rows[0];
17766
- return {
17767
- url: row.url,
17768
- title: row.title,
17769
- markdown: row.markdown,
17770
- projectId: row.project_id,
17771
- scopeName: row.scope_name,
17772
- routeFile: row.route_file,
17773
- routeResolution: row.route_resolution,
17774
- incomingLinks: row.incoming_links,
17775
- outgoingLinks: row.outgoing_links,
17776
- depth: row.depth,
17777
- tags: JSON.parse(row.tags || "[]"),
17778
- indexedAt: row.indexed_at
17779
- };
17322
+ const index = this.pageIndex(scope);
17323
+ try {
17324
+ const results = await index.fetch([url]);
17325
+ const doc = results[0];
17326
+ if (!doc) return null;
17327
+ return {
17328
+ url: doc.content.url,
17329
+ title: doc.content.title,
17330
+ markdown: doc.metadata.markdown,
17331
+ projectId: doc.metadata.projectId,
17332
+ scopeName: doc.metadata.scopeName,
17333
+ routeFile: doc.metadata.routeFile,
17334
+ routeResolution: doc.metadata.routeResolution,
17335
+ incomingLinks: doc.metadata.incomingLinks,
17336
+ outgoingLinks: doc.metadata.outgoingLinks,
17337
+ depth: doc.metadata.depth,
17338
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17339
+ indexedAt: doc.metadata.indexedAt,
17340
+ summary: doc.content.summary || void 0,
17341
+ description: doc.content.description || void 0,
17342
+ keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17343
+ };
17344
+ } catch {
17345
+ return null;
17346
+ }
17780
17347
  }
17781
17348
  async deletePages(scope) {
17782
- await this.ensurePages();
17783
- await this.client.execute({
17784
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17785
- args: [scope.projectId, scope.scopeName]
17786
- });
17787
- }
17788
- async getScopeModelId(scope) {
17789
- await this.ensureRegistry();
17790
- const rs = await this.client.execute({
17791
- sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
17792
- args: [scope.projectId, scope.scopeName]
17793
- });
17794
- if (rs.rows.length === 0) return null;
17795
- return rs.rows[0].model_id;
17349
+ try {
17350
+ const index = this.pageIndex(scope);
17351
+ await index.reset();
17352
+ } catch {
17353
+ }
17796
17354
  }
17797
17355
  async health() {
17798
17356
  try {
17799
- await this.client.execute("SELECT 1");
17357
+ await this.client.info();
17800
17358
  return { ok: true };
17801
17359
  } catch (error) {
17802
17360
  return {
@@ -17805,40 +17363,34 @@ var TursoVectorStore = class {
17805
17363
  };
17806
17364
  }
17807
17365
  }
17366
+ async dropAllIndexes(projectId) {
17367
+ const allIndexes = await this.client.listIndexes();
17368
+ const prefix = `${projectId}--`;
17369
+ for (const name of allIndexes) {
17370
+ if (name.startsWith(prefix)) {
17371
+ try {
17372
+ const index = this.client.index(name);
17373
+ await index.deleteIndex();
17374
+ } catch {
17375
+ }
17376
+ }
17377
+ }
17378
+ }
17808
17379
  };
17809
17380
 
17810
17381
  // src/vector/factory.ts
17811
- async function createVectorStore(config, cwd) {
17812
- const turso = config.vector.turso;
17813
- const remoteUrl = turso.url ?? process.env[turso.urlEnv];
17814
- if (remoteUrl) {
17815
- const { createClient: createClient2 } = await import('@libsql/client/http');
17816
- const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
17817
- const client2 = createClient2({
17818
- url: remoteUrl,
17819
- authToken
17820
- });
17821
- return new TursoVectorStore({
17822
- client: client2,
17823
- dimension: config.vector.dimension
17824
- });
17825
- }
17826
- if (isServerless()) {
17382
+ async function createUpstashStore(config) {
17383
+ const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
17384
+ const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
17385
+ if (!url || !token) {
17827
17386
  throw new SearchSocketError(
17828
17387
  "VECTOR_BACKEND_UNAVAILABLE",
17829
- `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
17388
+ `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17830
17389
  );
17831
17390
  }
17832
- const { createClient } = await import('@libsql/client');
17833
- const localPath = path__default.default.resolve(cwd, turso.localPath);
17834
- fs__default.default.mkdirSync(path__default.default.dirname(localPath), { recursive: true });
17835
- const client = createClient({
17836
- url: `file:${localPath}`
17837
- });
17838
- return new TursoVectorStore({
17839
- client,
17840
- dimension: config.vector.dimension
17841
- });
17391
+ const { Search } = await import('@upstash/search');
17392
+ const client = new Search({ url, token });
17393
+ return new UpstashSearchStore({ client });
17842
17394
  }
17843
17395
  function sha1(input) {
17844
17396
  return crypto.createHash("sha1").update(input).digest("hex");
@@ -17857,13 +17409,6 @@ function normalizeUrlPath(rawPath) {
17857
17409
  }
17858
17410
  return out;
17859
17411
  }
17860
- function urlPathToMirrorRelative(urlPath) {
17861
- const normalized = normalizeUrlPath(urlPath);
17862
- if (normalized === "/") {
17863
- return "index.md";
17864
- }
17865
- return `${normalized.slice(1)}.md`;
17866
- }
17867
17412
  function staticHtmlFileToUrl(filePath, rootDir) {
17868
17413
  const relative = path__default.default.relative(rootDir, filePath).replace(/\\/g, "/");
17869
17414
  if (relative === "index.html") {
@@ -18138,7 +17683,7 @@ function buildEmbeddingText(chunk, prependTitle) {
18138
17683
 
18139
17684
  ${chunk.chunkText}`;
18140
17685
  }
18141
- function chunkMirrorPage(page, config, scope) {
17686
+ function chunkPage(page, config, scope) {
18142
17687
  const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
18143
17688
  const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
18144
17689
  const chunks = [];
@@ -19169,53 +18714,6 @@ function extractFromMarkdown(url, markdown, title) {
19169
18714
  weight: mdWeight
19170
18715
  };
19171
18716
  }
19172
- function yamlString(value) {
19173
- return JSON.stringify(value);
19174
- }
19175
- function yamlArray(values) {
19176
- return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
19177
- }
19178
- function buildMirrorMarkdown(page) {
19179
- const frontmatterLines = [
19180
- "---",
19181
- `url: ${yamlString(page.url)}`,
19182
- `title: ${yamlString(page.title)}`,
19183
- `scope: ${yamlString(page.scope)}`,
19184
- `routeFile: ${yamlString(page.routeFile)}`,
19185
- `routeResolution: ${yamlString(page.routeResolution)}`,
19186
- `generatedAt: ${yamlString(page.generatedAt)}`,
19187
- `incomingLinks: ${page.incomingLinks}`,
19188
- `outgoingLinks: ${page.outgoingLinks}`,
19189
- `depth: ${page.depth}`,
19190
- `tags: ${yamlArray(page.tags)}`,
19191
- "---",
19192
- ""
19193
- ];
19194
- return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
19195
- }
19196
- function stripGeneratedAt(content) {
19197
- return content.replace(/^generatedAt: .*$/m, "");
19198
- }
19199
- async function writeMirrorPage(statePath, scope, page) {
19200
- const relative = urlPathToMirrorRelative(page.url);
19201
- const outputPath = path__default.default.join(statePath, "pages", scope.scopeName, relative);
19202
- await fs4__default.default.mkdir(path__default.default.dirname(outputPath), { recursive: true });
19203
- const newContent = buildMirrorMarkdown(page);
19204
- try {
19205
- const existing = await fs4__default.default.readFile(outputPath, "utf8");
19206
- if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
19207
- return outputPath;
19208
- }
19209
- } catch {
19210
- }
19211
- await fs4__default.default.writeFile(outputPath, newContent, "utf8");
19212
- return outputPath;
19213
- }
19214
- async function cleanMirrorForScope(statePath, scope) {
19215
- const target = path__default.default.join(statePath, "pages", scope.scopeName);
19216
- await fs4__default.default.rm(target, { recursive: true, force: true });
19217
- await fs4__default.default.mkdir(target, { recursive: true });
19218
- }
19219
18717
  function segmentToRegex(segment) {
19220
18718
  if (segment.startsWith("(") && segment.endsWith(")")) {
19221
18719
  return { regex: "", score: 0 };
@@ -19408,7 +18906,7 @@ async function parseManifest(cwd, outputDir) {
19408
18906
  const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
19409
18907
  let content;
19410
18908
  try {
19411
- content = await fs4__default.default.readFile(manifestPath, "utf8");
18909
+ content = await fs3__default.default.readFile(manifestPath, "utf8");
19412
18910
  } catch {
19413
18911
  throw new SearchSocketError(
19414
18912
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19581,7 +19079,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
19581
19079
  const visited = /* @__PURE__ */ new Set();
19582
19080
  const pages = [];
19583
19081
  const queue = [];
19584
- const limit = pLimit2__default.default(8);
19082
+ const limit = pLimit__default.default(8);
19585
19083
  for (const seed of seedUrls) {
19586
19084
  const normalized = normalizeUrlPath(seed);
19587
19085
  if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
@@ -19663,7 +19161,7 @@ async function loadBuildPages(cwd, config, maxPages) {
19663
19161
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
19664
19162
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
19665
19163
  try {
19666
- const concurrencyLimit = pLimit2__default.default(8);
19164
+ const concurrencyLimit = pLimit__default.default(8);
19667
19165
  const results = await Promise.allSettled(
19668
19166
  selected.map(
19669
19167
  (route) => concurrencyLimit(async () => {
@@ -19737,7 +19235,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
19737
19235
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
19738
19236
  const pages = [];
19739
19237
  for (const filePath of selected) {
19740
- const raw = await fs4__default.default.readFile(filePath, "utf8");
19238
+ const raw = await fs3__default.default.readFile(filePath, "utf8");
19741
19239
  const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
19742
19240
  pages.push({
19743
19241
  url: filePathToUrl(filePath, baseDir),
@@ -19832,7 +19330,7 @@ async function loadCrawledPages(config, maxPages) {
19832
19330
  const routes = await resolveRoutes(config);
19833
19331
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
19834
19332
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
19835
- const concurrencyLimit = pLimit2__default.default(8);
19333
+ const concurrencyLimit = pLimit__default.default(8);
19836
19334
  const results = await Promise.allSettled(
19837
19335
  selected.map(
19838
19336
  (route) => concurrencyLimit(async () => {
@@ -19873,7 +19371,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
19873
19371
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
19874
19372
  const pages = [];
19875
19373
  for (const filePath of selected) {
19876
- const html = await fs4__default.default.readFile(filePath, "utf8");
19374
+ const html = await fs3__default.default.readFile(filePath, "utf8");
19877
19375
  pages.push({
19878
19376
  url: staticHtmlFileToUrl(filePath, outputDir),
19879
19377
  html,
@@ -19936,7 +19434,7 @@ function isBlockedByRobots(urlPath, rules3) {
19936
19434
  }
19937
19435
  async function loadRobotsTxtFromDir(dir) {
19938
19436
  try {
19939
- const content = await fs4__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
19437
+ const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
19940
19438
  return parseRobotsTxt(content);
19941
19439
  } catch {
19942
19440
  return null;
@@ -19961,7 +19459,12 @@ function nonNegativeOrZero(value) {
19961
19459
  }
19962
19460
  return Math.max(0, value);
19963
19461
  }
19964
- function rankHits(hits, config) {
19462
+ function normalizeForTitleMatch(text) {
19463
+ return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
19464
+ }
19465
+ function rankHits(hits, config, query) {
19466
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
19467
+ const titleMatchWeight = config.ranking.weights.titleMatch;
19965
19468
  return hits.map((hit) => {
19966
19469
  let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
19967
19470
  if (config.ranking.enableIncomingLinkBoost) {
@@ -19972,6 +19475,12 @@ function rankHits(hits, config) {
19972
19475
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
19973
19476
  score += depthBoost * config.ranking.weights.depth;
19974
19477
  }
19478
+ if (normalizedQuery && titleMatchWeight > 0) {
19479
+ const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
19480
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
19481
+ score += titleMatchWeight;
19482
+ }
19483
+ }
19975
19484
  return {
19976
19485
  hit,
19977
19486
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
@@ -19981,6 +19490,30 @@ function rankHits(hits, config) {
19981
19490
  return Number.isNaN(delta) ? 0 : delta;
19982
19491
  });
19983
19492
  }
19493
+ function trimByScoreGap(results, config) {
19494
+ if (results.length === 0) return results;
19495
+ const threshold = config.ranking.scoreGapThreshold;
19496
+ const minScore = config.ranking.minScore;
19497
+ if (minScore > 0 && results.length > 0) {
19498
+ const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
19499
+ const mid = Math.floor(sortedScores.length / 2);
19500
+ const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
19501
+ if (median < minScore) return [];
19502
+ }
19503
+ if (threshold > 0 && results.length > 1) {
19504
+ for (let i = 1; i < results.length; i++) {
19505
+ const prev = results[i - 1].pageScore;
19506
+ const current = results[i].pageScore;
19507
+ if (prev > 0) {
19508
+ const gap = (prev - current) / prev;
19509
+ if (gap >= threshold) {
19510
+ return results.slice(0, i);
19511
+ }
19512
+ }
19513
+ }
19514
+ }
19515
+ return results;
19516
+ }
19984
19517
  function findPageWeight(url, pageWeights) {
19985
19518
  let bestPattern = "";
19986
19519
  let bestWeight = 1;
@@ -20035,6 +19568,61 @@ function aggregateByPage(ranked, config) {
20035
19568
  return Number.isNaN(delta) ? 0 : delta;
20036
19569
  });
20037
19570
  }
19571
+ function mergePageAndChunkResults(pageHits, rankedChunks, config) {
19572
+ if (pageHits.length === 0) return rankedChunks;
19573
+ const w = config.search.pageSearchWeight;
19574
+ const pageScoreMap = /* @__PURE__ */ new Map();
19575
+ for (const ph of pageHits) {
19576
+ pageScoreMap.set(ph.url, ph);
19577
+ }
19578
+ const pagesWithChunks = /* @__PURE__ */ new Set();
19579
+ const merged = rankedChunks.map((ranked) => {
19580
+ const url = ranked.hit.metadata.url;
19581
+ const pageHit = pageScoreMap.get(url);
19582
+ if (pageHit) {
19583
+ pagesWithChunks.add(url);
19584
+ const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
19585
+ return {
19586
+ hit: ranked.hit,
19587
+ finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
19588
+ };
19589
+ }
19590
+ return ranked;
19591
+ });
19592
+ for (const [url, pageHit] of pageScoreMap) {
19593
+ if (pagesWithChunks.has(url)) continue;
19594
+ const syntheticScore = pageHit.score * w;
19595
+ const syntheticHit = {
19596
+ id: `page:${url}`,
19597
+ score: pageHit.score,
19598
+ metadata: {
19599
+ projectId: "",
19600
+ scopeName: "",
19601
+ url: pageHit.url,
19602
+ path: pageHit.url,
19603
+ title: pageHit.title,
19604
+ sectionTitle: "",
19605
+ headingPath: [],
19606
+ snippet: pageHit.description || pageHit.title,
19607
+ chunkText: pageHit.description || pageHit.title,
19608
+ ordinal: 0,
19609
+ contentHash: "",
19610
+ depth: pageHit.depth,
19611
+ incomingLinks: pageHit.incomingLinks,
19612
+ routeFile: pageHit.routeFile,
19613
+ tags: pageHit.tags
19614
+ }
19615
+ };
19616
+ merged.push({
19617
+ hit: syntheticHit,
19618
+ finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
19619
+ });
19620
+ }
19621
+ return merged.sort((a, b) => {
19622
+ const delta = b.finalScore - a.finalScore;
19623
+ return Number.isNaN(delta) ? 0 : delta;
19624
+ });
19625
+ }
20038
19626
 
20039
19627
  // src/utils/time.ts
20040
19628
  function nowIso() {
@@ -20045,34 +19633,41 @@ function hrTimeMs(start) {
20045
19633
  }
20046
19634
 
20047
19635
  // src/indexing/pipeline.ts
20048
- var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
20049
- "jina-embeddings-v3": 2e-5,
20050
- "jina-embeddings-v5-text-small": 5e-5
20051
- };
20052
- var DEFAULT_EMBEDDING_PRICE_PER_1K = 5e-5;
19636
+ function buildPageSummary(page, maxChars = 3500) {
19637
+ const parts = [page.title];
19638
+ if (page.description) {
19639
+ parts.push(page.description);
19640
+ }
19641
+ if (page.keywords && page.keywords.length > 0) {
19642
+ parts.push(page.keywords.join(", "));
19643
+ }
19644
+ const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
19645
+ if (plainBody) {
19646
+ parts.push(plainBody);
19647
+ }
19648
+ const joined = parts.join("\n\n");
19649
+ if (joined.length <= maxChars) return joined;
19650
+ return joined.slice(0, maxChars).trim();
19651
+ }
20053
19652
  var IndexPipeline = class _IndexPipeline {
20054
19653
  cwd;
20055
19654
  config;
20056
- embeddings;
20057
- vectorStore;
19655
+ store;
20058
19656
  logger;
20059
19657
  constructor(options) {
20060
19658
  this.cwd = options.cwd;
20061
19659
  this.config = options.config;
20062
- this.embeddings = options.embeddings;
20063
- this.vectorStore = options.vectorStore;
19660
+ this.store = options.store;
20064
19661
  this.logger = options.logger;
20065
19662
  }
20066
19663
  static async create(options = {}) {
20067
19664
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
20068
19665
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
20069
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
20070
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
19666
+ const store = options.store ?? await createUpstashStore(config);
20071
19667
  return new _IndexPipeline({
20072
19668
  cwd,
20073
19669
  config,
20074
- embeddings,
20075
- vectorStore,
19670
+ store,
20076
19671
  logger: options.logger ?? new Logger()
20077
19672
  });
20078
19673
  }
@@ -20092,25 +19687,17 @@ var IndexPipeline = class _IndexPipeline {
20092
19687
  stageTimingsMs[name] = Math.round(hrTimeMs(start));
20093
19688
  };
20094
19689
  const scope = resolveScope(this.config, options.scopeOverride);
20095
- const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
19690
+ ensureStateDirs(this.cwd, this.config.state.dir);
20096
19691
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
20097
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
19692
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20098
19693
  if (options.force) {
20099
19694
  this.logger.info("Force mode enabled \u2014 full rebuild");
20100
- await cleanMirrorForScope(statePath, scope);
20101
19695
  }
20102
19696
  if (options.dryRun) {
20103
19697
  this.logger.info("Dry run \u2014 no writes will be performed");
20104
19698
  }
20105
19699
  const manifestStart = stageStart();
20106
- const existingHashes = await this.vectorStore.getContentHashes(scope);
20107
- const existingModelId = await this.vectorStore.getScopeModelId(scope);
20108
- if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
20109
- throw new SearchSocketError(
20110
- "EMBEDDING_MODEL_MISMATCH",
20111
- `Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
20112
- );
20113
- }
19700
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20114
19701
  stageEnd("manifest", manifestStart);
20115
19702
  this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20116
19703
  const sourceStart = stageStart();
@@ -20239,9 +19826,9 @@ var IndexPipeline = class _IndexPipeline {
20239
19826
  }
20240
19827
  stageEnd("links", linkStart);
20241
19828
  this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
20242
- const mirrorStart = stageStart();
20243
- this.logger.info("Writing mirror pages...");
20244
- const mirrorPages = [];
19829
+ const pagesStart = stageStart();
19830
+ this.logger.info("Building indexed pages...");
19831
+ const pages = [];
20245
19832
  let routeExact = 0;
20246
19833
  let routeBestEffort = 0;
20247
19834
  const precomputedRoutes = /* @__PURE__ */ new Map();
@@ -20270,7 +19857,7 @@ var IndexPipeline = class _IndexPipeline {
20270
19857
  } else {
20271
19858
  routeExact += 1;
20272
19859
  }
20273
- const mirror = {
19860
+ const indexedPage = {
20274
19861
  url: page.url,
20275
19862
  title: page.title,
20276
19863
  scope: scope.scopeName,
@@ -20285,35 +19872,38 @@ var IndexPipeline = class _IndexPipeline {
20285
19872
  description: page.description,
20286
19873
  keywords: page.keywords
20287
19874
  };
20288
- mirrorPages.push(mirror);
20289
- if (this.config.state.writeMirror) {
20290
- await writeMirrorPage(statePath, scope, mirror);
20291
- }
20292
- this.logger.event("markdown_written", { url: page.url });
19875
+ pages.push(indexedPage);
19876
+ this.logger.event("page_indexed", { url: page.url });
20293
19877
  }
20294
19878
  if (!options.dryRun) {
20295
- const pageRecords = mirrorPages.map((mp) => ({
20296
- url: mp.url,
20297
- title: mp.title,
20298
- markdown: mp.markdown,
20299
- projectId: scope.projectId,
20300
- scopeName: scope.scopeName,
20301
- routeFile: mp.routeFile,
20302
- routeResolution: mp.routeResolution,
20303
- incomingLinks: mp.incomingLinks,
20304
- outgoingLinks: mp.outgoingLinks,
20305
- depth: mp.depth,
20306
- tags: mp.tags,
20307
- indexedAt: mp.generatedAt
20308
- }));
20309
- await this.vectorStore.deletePages(scope);
20310
- await this.vectorStore.upsertPages(pageRecords, scope);
19879
+ const pageRecords = pages.map((p) => {
19880
+ const summary = buildPageSummary(p);
19881
+ return {
19882
+ url: p.url,
19883
+ title: p.title,
19884
+ markdown: p.markdown,
19885
+ projectId: scope.projectId,
19886
+ scopeName: scope.scopeName,
19887
+ routeFile: p.routeFile,
19888
+ routeResolution: p.routeResolution,
19889
+ incomingLinks: p.incomingLinks,
19890
+ outgoingLinks: p.outgoingLinks,
19891
+ depth: p.depth,
19892
+ tags: p.tags,
19893
+ indexedAt: p.generatedAt,
19894
+ summary,
19895
+ description: p.description,
19896
+ keywords: p.keywords
19897
+ };
19898
+ });
19899
+ await this.store.deletePages(scope);
19900
+ await this.store.upsertPages(pageRecords, scope);
20311
19901
  }
20312
- stageEnd("mirror", mirrorStart);
20313
- this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
19902
+ stageEnd("pages", pagesStart);
19903
+ this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
20314
19904
  const chunkStart = stageStart();
20315
19905
  this.logger.info("Chunking pages...");
20316
- let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
19906
+ let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
20317
19907
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
20318
19908
  if (typeof maxChunks === "number") {
20319
19909
  chunks = chunks.slice(0, maxChunks);
@@ -20345,125 +19935,59 @@ var IndexPipeline = class _IndexPipeline {
20345
19935
  });
20346
19936
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
20347
19937
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
20348
- const embedStart = stageStart();
20349
- const chunkTokenEstimates = /* @__PURE__ */ new Map();
20350
- for (const chunk of changedChunks) {
20351
- chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
20352
- }
20353
- const estimatedTokens = changedChunks.reduce(
20354
- (sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
20355
- 0
20356
- );
20357
- const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
20358
- const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
20359
- let newEmbeddings = 0;
20360
- const vectorsByChunk = /* @__PURE__ */ new Map();
19938
+ const upsertStart = stageStart();
19939
+ let documentsUpserted = 0;
20361
19940
  if (!options.dryRun && changedChunks.length > 0) {
20362
- this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
20363
- const embeddings = await this.embeddings.embedTexts(
20364
- changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
20365
- this.config.embeddings.model,
20366
- "retrieval.passage"
20367
- );
20368
- if (embeddings.length !== changedChunks.length) {
20369
- throw new SearchSocketError(
20370
- "VECTOR_BACKEND_UNAVAILABLE",
20371
- `Embedding provider returned ${embeddings.length} vectors for ${changedChunks.length} chunks.`
20372
- );
20373
- }
20374
- for (let i = 0; i < changedChunks.length; i += 1) {
20375
- const chunk = changedChunks[i];
20376
- const embedding = embeddings[i];
20377
- if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
20378
- throw new SearchSocketError(
20379
- "VECTOR_BACKEND_UNAVAILABLE",
20380
- `Embedding provider returned an invalid vector for chunk index ${i}.`
20381
- );
20382
- }
20383
- vectorsByChunk.set(chunk.chunkKey, embedding);
20384
- newEmbeddings += 1;
20385
- this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
20386
- }
20387
- }
20388
- stageEnd("embedding", embedStart);
20389
- if (changedChunks.length > 0) {
20390
- this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
20391
- } else {
20392
- this.logger.info("No chunks to embed \u2014 all up to date");
20393
- }
20394
- const syncStart = stageStart();
20395
- if (!options.dryRun) {
20396
- this.logger.info("Syncing vectors...");
20397
- const upserts = [];
20398
- for (const chunk of changedChunks) {
20399
- const vector = vectorsByChunk.get(chunk.chunkKey);
20400
- if (!vector) {
20401
- continue;
20402
- }
20403
- upserts.push({
19941
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
19942
+ const UPSTASH_CONTENT_LIMIT = 4096;
19943
+ const docs = changedChunks.map((chunk) => {
19944
+ const title = chunk.title;
19945
+ const sectionTitle = chunk.sectionTitle ?? "";
19946
+ const url = chunk.url;
19947
+ const tags = chunk.tags.join(",");
19948
+ const headingPath = chunk.headingPath.join(" > ");
19949
+ const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
19950
+ const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
19951
+ const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
19952
+ return {
20404
19953
  id: chunk.chunkKey,
20405
- vector,
19954
+ content: { title, sectionTitle, text, url, tags, headingPath },
20406
19955
  metadata: {
20407
19956
  projectId: scope.projectId,
20408
19957
  scopeName: scope.scopeName,
20409
- url: chunk.url,
20410
19958
  path: chunk.path,
20411
- title: chunk.title,
20412
- sectionTitle: chunk.sectionTitle ?? "",
20413
- headingPath: chunk.headingPath,
20414
19959
  snippet: chunk.snippet,
20415
- chunkText: chunk.chunkText.slice(0, 4e3),
20416
19960
  ordinal: chunk.ordinal,
20417
19961
  contentHash: chunk.contentHash,
20418
- modelId: this.config.embeddings.model,
20419
19962
  depth: chunk.depth,
20420
19963
  incomingLinks: chunk.incomingLinks,
20421
19964
  routeFile: chunk.routeFile,
20422
- tags: chunk.tags,
20423
- description: chunk.description,
20424
- keywords: chunk.keywords
19965
+ description: chunk.description ?? "",
19966
+ keywords: (chunk.keywords ?? []).join(",")
20425
19967
  }
20426
- });
20427
- }
20428
- if (upserts.length > 0) {
20429
- await this.vectorStore.upsert(upserts, scope);
20430
- this.logger.event("upserted", { count: upserts.length });
20431
- }
20432
- if (deletes.length > 0) {
20433
- await this.vectorStore.deleteByIds(deletes, scope);
20434
- this.logger.event("deleted", { count: deletes.length });
20435
- }
20436
- }
20437
- stageEnd("sync", syncStart);
20438
- this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
20439
- const finalizeStart = stageStart();
20440
- if (!options.dryRun) {
20441
- const scopeInfo = {
20442
- projectId: scope.projectId,
20443
- scopeName: scope.scopeName,
20444
- modelId: this.config.embeddings.model,
20445
- lastIndexedAt: nowIso(),
20446
- vectorCount: chunks.length,
20447
- lastEstimateTokens: estimatedTokens,
20448
- lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
20449
- lastEstimateChangedChunks: changedChunks.length
20450
- };
20451
- await this.vectorStore.recordScope(scopeInfo);
20452
- this.logger.event("registry_updated", {
20453
- scope: scope.scopeName,
20454
- vectorCount: chunks.length
19968
+ };
20455
19969
  });
19970
+ await this.store.upsertChunks(docs, scope);
19971
+ documentsUpserted = docs.length;
19972
+ this.logger.event("upserted", { count: docs.length });
19973
+ }
19974
+ if (!options.dryRun && deletes.length > 0) {
19975
+ await this.store.deleteByIds(deletes, scope);
19976
+ this.logger.event("deleted", { count: deletes.length });
19977
+ }
19978
+ stageEnd("upsert", upsertStart);
19979
+ if (changedChunks.length > 0) {
19980
+ this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
19981
+ } else {
19982
+ this.logger.info("No chunks to upsert \u2014 all up to date");
20456
19983
  }
20457
- stageEnd("finalize", finalizeStart);
20458
19984
  this.logger.info("Done.");
20459
19985
  return {
20460
- pagesProcessed: mirrorPages.length,
19986
+ pagesProcessed: pages.length,
20461
19987
  chunksTotal: chunks.length,
20462
19988
  chunksChanged: changedChunks.length,
20463
- newEmbeddings,
19989
+ documentsUpserted,
20464
19990
  deletes: deletes.length,
20465
- estimatedTokens,
20466
- estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
20467
19991
  routeExact,
20468
19992
  routeBestEffort,
20469
19993
  stageTimingsMs
@@ -20476,35 +20000,25 @@ var requestSchema = zod.z.object({
20476
20000
  scope: zod.z.string().optional(),
20477
20001
  pathPrefix: zod.z.string().optional(),
20478
20002
  tags: zod.z.array(zod.z.string()).optional(),
20479
- rerank: zod.z.boolean().optional(),
20480
- groupBy: zod.z.enum(["page", "chunk"]).optional(),
20481
- stream: zod.z.boolean().optional()
20003
+ groupBy: zod.z.enum(["page", "chunk"]).optional()
20482
20004
  });
20483
20005
  var SearchEngine = class _SearchEngine {
20484
20006
  cwd;
20485
20007
  config;
20486
- embeddings;
20487
- vectorStore;
20488
- reranker;
20008
+ store;
20489
20009
  constructor(options) {
20490
20010
  this.cwd = options.cwd;
20491
20011
  this.config = options.config;
20492
- this.embeddings = options.embeddings;
20493
- this.vectorStore = options.vectorStore;
20494
- this.reranker = options.reranker;
20012
+ this.store = options.store;
20495
20013
  }
20496
20014
  static async create(options = {}) {
20497
20015
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
20498
20016
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
20499
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
20500
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
20501
- const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
20017
+ const store = options.store ?? await createUpstashStore(config);
20502
20018
  return new _SearchEngine({
20503
20019
  cwd,
20504
20020
  config,
20505
- embeddings,
20506
- vectorStore,
20507
- reranker
20021
+ store
20508
20022
  });
20509
20023
  }
20510
20024
  getConfig() {
@@ -20518,142 +20032,90 @@ var SearchEngine = class _SearchEngine {
20518
20032
  const input = parsed.data;
20519
20033
  const totalStart = process.hrtime.bigint();
20520
20034
  const resolvedScope = resolveScope(this.config, input.scope);
20521
- await this.assertModelCompatibility(resolvedScope);
20522
20035
  const topK = input.topK ?? 10;
20523
- const wantsRerank = Boolean(input.rerank);
20524
20036
  const groupByPage = (input.groupBy ?? "page") === "page";
20525
20037
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20526
- const embedStart = process.hrtime.bigint();
20527
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
20528
- const queryVector = queryEmbeddings[0];
20529
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
20530
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
20531
- }
20532
- const embedMs = hrTimeMs(embedStart);
20533
- const vectorStart = process.hrtime.bigint();
20534
- const hits = await this.vectorStore.query(
20535
- queryVector,
20536
- {
20537
- topK: candidateK,
20538
- pathPrefix: input.pathPrefix,
20539
- tags: input.tags
20540
- },
20541
- resolvedScope
20542
- );
20543
- const vectorMs = hrTimeMs(vectorStart);
20544
- const ranked = rankHits(hits, this.config);
20545
- let usedRerank = false;
20546
- let rerankMs = 0;
20547
- let ordered = ranked;
20548
- if (wantsRerank) {
20549
- const rerankStart = process.hrtime.bigint();
20550
- ordered = await this.rerankHits(input.q, ranked, topK);
20551
- rerankMs = hrTimeMs(rerankStart);
20552
- usedRerank = true;
20038
+ const filterParts = [];
20039
+ if (input.pathPrefix) {
20040
+ const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
20041
+ filterParts.push(`url GLOB '${prefix}*'`);
20042
+ }
20043
+ if (input.tags && input.tags.length > 0) {
20044
+ for (const tag of input.tags) {
20045
+ filterParts.push(`tags GLOB '*${tag}*'`);
20046
+ }
20047
+ }
20048
+ const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
20049
+ const useDualSearch = this.config.search.dualSearch && groupByPage;
20050
+ const searchStart = process.hrtime.bigint();
20051
+ let ranked;
20052
+ if (useDualSearch) {
20053
+ const chunkLimit = Math.max(topK * 10, 100);
20054
+ const pageLimit = 20;
20055
+ const [pageHits, chunkHits] = await Promise.all([
20056
+ this.store.searchPages(
20057
+ input.q,
20058
+ {
20059
+ limit: pageLimit,
20060
+ semanticWeight: this.config.search.semanticWeight,
20061
+ inputEnrichment: this.config.search.inputEnrichment,
20062
+ filter
20063
+ },
20064
+ resolvedScope
20065
+ ),
20066
+ this.store.search(
20067
+ input.q,
20068
+ {
20069
+ limit: chunkLimit,
20070
+ semanticWeight: this.config.search.semanticWeight,
20071
+ inputEnrichment: this.config.search.inputEnrichment,
20072
+ reranking: false,
20073
+ filter
20074
+ },
20075
+ resolvedScope
20076
+ )
20077
+ ]);
20078
+ const rankedChunks = rankHits(chunkHits, this.config, input.q);
20079
+ ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
20080
+ } else {
20081
+ const hits = await this.store.search(
20082
+ input.q,
20083
+ {
20084
+ limit: candidateK,
20085
+ semanticWeight: this.config.search.semanticWeight,
20086
+ inputEnrichment: this.config.search.inputEnrichment,
20087
+ reranking: this.config.search.reranking,
20088
+ filter
20089
+ },
20090
+ resolvedScope
20091
+ );
20092
+ ranked = rankHits(hits, this.config, input.q);
20553
20093
  }
20554
- const results = this.buildResults(ordered, topK, groupByPage);
20094
+ const searchMs = hrTimeMs(searchStart);
20095
+ const results = this.buildResults(ranked, topK, groupByPage, input.q);
20555
20096
  return {
20556
20097
  q: input.q,
20557
20098
  scope: resolvedScope.scopeName,
20558
20099
  results,
20559
20100
  meta: {
20560
20101
  timingsMs: {
20561
- embed: Math.round(embedMs),
20562
- vector: Math.round(vectorMs),
20563
- rerank: Math.round(rerankMs),
20102
+ search: Math.round(searchMs),
20564
20103
  total: Math.round(hrTimeMs(totalStart))
20565
- },
20566
- usedRerank,
20567
- modelId: this.config.embeddings.model
20568
- }
20569
- };
20570
- }
20571
- async *searchStreaming(request) {
20572
- const parsed = requestSchema.safeParse(request);
20573
- if (!parsed.success) {
20574
- throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
20575
- }
20576
- const input = parsed.data;
20577
- const wantsRerank = Boolean(input.rerank);
20578
- if (!wantsRerank) {
20579
- const response = await this.search(request);
20580
- yield { phase: "initial", data: response };
20581
- return;
20582
- }
20583
- const totalStart = process.hrtime.bigint();
20584
- const resolvedScope = resolveScope(this.config, input.scope);
20585
- await this.assertModelCompatibility(resolvedScope);
20586
- const topK = input.topK ?? 10;
20587
- const groupByPage = (input.groupBy ?? "page") === "page";
20588
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
20589
- const embedStart = process.hrtime.bigint();
20590
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
20591
- const queryVector = queryEmbeddings[0];
20592
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
20593
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
20594
- }
20595
- const embedMs = hrTimeMs(embedStart);
20596
- const vectorStart = process.hrtime.bigint();
20597
- const hits = await this.vectorStore.query(
20598
- queryVector,
20599
- {
20600
- topK: candidateK,
20601
- pathPrefix: input.pathPrefix,
20602
- tags: input.tags
20603
- },
20604
- resolvedScope
20605
- );
20606
- const vectorMs = hrTimeMs(vectorStart);
20607
- const ranked = rankHits(hits, this.config);
20608
- const initialResults = this.buildResults(ranked, topK, groupByPage);
20609
- yield {
20610
- phase: "initial",
20611
- data: {
20612
- q: input.q,
20613
- scope: resolvedScope.scopeName,
20614
- results: initialResults,
20615
- meta: {
20616
- timingsMs: {
20617
- embed: Math.round(embedMs),
20618
- vector: Math.round(vectorMs),
20619
- rerank: 0,
20620
- total: Math.round(hrTimeMs(totalStart))
20621
- },
20622
- usedRerank: false,
20623
- modelId: this.config.embeddings.model
20624
- }
20625
- }
20626
- };
20627
- const rerankStart = process.hrtime.bigint();
20628
- const reranked = await this.rerankHits(input.q, ranked, topK);
20629
- const rerankMs = hrTimeMs(rerankStart);
20630
- const rerankedResults = this.buildResults(reranked, topK, groupByPage);
20631
- yield {
20632
- phase: "reranked",
20633
- data: {
20634
- q: input.q,
20635
- scope: resolvedScope.scopeName,
20636
- results: rerankedResults,
20637
- meta: {
20638
- timingsMs: {
20639
- embed: Math.round(embedMs),
20640
- vector: Math.round(vectorMs),
20641
- rerank: Math.round(rerankMs),
20642
- total: Math.round(hrTimeMs(totalStart))
20643
- },
20644
- usedRerank: true,
20645
- modelId: this.config.embeddings.model
20646
20104
  }
20647
20105
  }
20648
20106
  };
20649
20107
  }
20650
- buildResults(ordered, topK, groupByPage) {
20651
- const minScore = this.config.ranking.minScore;
20108
+ ensureSnippet(hit) {
20109
+ const snippet = hit.hit.metadata.snippet;
20110
+ if (snippet && snippet.length >= 30) return snippet;
20111
+ const chunkText = hit.hit.metadata.chunkText;
20112
+ if (chunkText) return toSnippet(chunkText);
20113
+ return snippet || "";
20114
+ }
20115
+ buildResults(ordered, topK, groupByPage, _query) {
20652
20116
  if (groupByPage) {
20653
20117
  let pages = aggregateByPage(ordered, this.config);
20654
- if (minScore > 0) {
20655
- pages = pages.filter((p) => p.pageScore >= minScore);
20656
- }
20118
+ pages = trimByScoreGap(pages, this.config);
20657
20119
  const minRatio = this.config.ranking.minChunkScoreRatio;
20658
20120
  return pages.slice(0, topK).map((page) => {
20659
20121
  const bestScore = page.bestChunk.finalScore;
@@ -20663,12 +20125,12 @@ var SearchEngine = class _SearchEngine {
20663
20125
  url: page.url,
20664
20126
  title: page.title,
20665
20127
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
20666
- snippet: page.bestChunk.hit.metadata.snippet,
20128
+ snippet: this.ensureSnippet(page.bestChunk),
20667
20129
  score: Number(page.pageScore.toFixed(6)),
20668
20130
  routeFile: page.routeFile,
20669
20131
  chunks: meaningful.length > 1 ? meaningful.map((c) => ({
20670
20132
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
20671
- snippet: c.hit.metadata.snippet,
20133
+ snippet: this.ensureSnippet(c),
20672
20134
  headingPath: c.hit.metadata.headingPath,
20673
20135
  score: Number(c.finalScore.toFixed(6))
20674
20136
  })) : void 0
@@ -20676,6 +20138,7 @@ var SearchEngine = class _SearchEngine {
20676
20138
  });
20677
20139
  } else {
20678
20140
  let filtered = ordered;
20141
+ const minScore = this.config.ranking.minScore;
20679
20142
  if (minScore > 0) {
20680
20143
  filtered = ordered.filter((entry) => entry.finalScore >= minScore);
20681
20144
  }
@@ -20683,7 +20146,7 @@ var SearchEngine = class _SearchEngine {
20683
20146
  url: hit.metadata.url,
20684
20147
  title: hit.metadata.title,
20685
20148
  sectionTitle: hit.metadata.sectionTitle || void 0,
20686
- snippet: hit.metadata.snippet,
20149
+ snippet: this.ensureSnippet({ hit, finalScore }),
20687
20150
  score: Number(finalScore.toFixed(6)),
20688
20151
  routeFile: hit.metadata.routeFile
20689
20152
  }));
@@ -20692,7 +20155,7 @@ var SearchEngine = class _SearchEngine {
20692
20155
  async getPage(pathOrUrl, scope) {
20693
20156
  const resolvedScope = resolveScope(this.config, scope);
20694
20157
  const urlPath = this.resolveInputPath(pathOrUrl);
20695
- const page = await this.vectorStore.getPage(urlPath, resolvedScope);
20158
+ const page = await this.store.getPage(urlPath, resolvedScope);
20696
20159
  if (!page) {
20697
20160
  throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
20698
20161
  }
@@ -20713,7 +20176,7 @@ var SearchEngine = class _SearchEngine {
20713
20176
  };
20714
20177
  }
20715
20178
  async health() {
20716
- return this.vectorStore.health();
20179
+ return this.store.health();
20717
20180
  }
20718
20181
  resolveInputPath(pathOrUrl) {
20719
20182
  try {
@@ -20725,92 +20188,8 @@ var SearchEngine = class _SearchEngine {
20725
20188
  const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
20726
20189
  return normalizeUrlPath(withoutQueryOrHash);
20727
20190
  }
20728
- async assertModelCompatibility(scope) {
20729
- const modelId = await this.vectorStore.getScopeModelId(scope);
20730
- if (modelId && modelId !== this.config.embeddings.model) {
20731
- throw new SearchSocketError(
20732
- "EMBEDDING_MODEL_MISMATCH",
20733
- `Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
20734
- );
20735
- }
20736
- }
20737
- async rerankHits(query, ranked, topK) {
20738
- if (!this.config.rerank.enabled) {
20739
- throw new SearchSocketError(
20740
- "INVALID_REQUEST",
20741
- "rerank=true requested but rerank.enabled is not set to true.",
20742
- 400
20743
- );
20744
- }
20745
- if (!this.reranker) {
20746
- throw new SearchSocketError(
20747
- "CONFIG_MISSING",
20748
- `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
20749
- 400
20750
- );
20751
- }
20752
- const pageGroups = /* @__PURE__ */ new Map();
20753
- for (const entry of ranked) {
20754
- const url = entry.hit.metadata.url;
20755
- const group = pageGroups.get(url);
20756
- if (group) group.push(entry);
20757
- else pageGroups.set(url, [entry]);
20758
- }
20759
- const MAX_CHUNKS_PER_PAGE = 5;
20760
- const MIN_CHUNKS_PER_PAGE = 1;
20761
- const MIN_CHUNK_SCORE_RATIO = 0.5;
20762
- const MAX_DOC_CHARS = 2e3;
20763
- const pageCandidates = [];
20764
- for (const [url, chunks] of pageGroups) {
20765
- const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
20766
- const bestScore = byScore[0].finalScore;
20767
- const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
20768
- const selected = byScore.filter(
20769
- (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
20770
- ).slice(0, MAX_CHUNKS_PER_PAGE);
20771
- selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
20772
- const first = selected[0].hit.metadata;
20773
- const parts = [first.title];
20774
- if (first.description) {
20775
- parts.push(first.description);
20776
- }
20777
- if (first.keywords && first.keywords.length > 0) {
20778
- parts.push(first.keywords.join(", "));
20779
- }
20780
- const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
20781
- parts.push(body);
20782
- let text = parts.join("\n\n");
20783
- if (text.length > MAX_DOC_CHARS) {
20784
- text = text.slice(0, MAX_DOC_CHARS);
20785
- }
20786
- pageCandidates.push({ id: url, text });
20787
- }
20788
- const maxCandidates = Math.max(topK, this.config.rerank.topN);
20789
- const cappedCandidates = pageCandidates.slice(0, maxCandidates);
20790
- const reranked = await this.reranker.rerank(
20791
- query,
20792
- cappedCandidates,
20793
- maxCandidates
20794
- );
20795
- const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
20796
- return ranked.map((entry) => {
20797
- const pageScore = scoreByUrl.get(entry.hit.metadata.url);
20798
- const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
20799
- if (pageScore === void 0 || !Number.isFinite(pageScore)) {
20800
- return { ...entry, finalScore: base };
20801
- }
20802
- const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
20803
- return {
20804
- ...entry,
20805
- finalScore: Number.isFinite(combined) ? combined : base
20806
- };
20807
- }).sort((a, b) => {
20808
- const delta = b.finalScore - a.finalScore;
20809
- return Number.isNaN(delta) ? 0 : delta;
20810
- });
20811
- }
20812
20191
  };
20813
- function createServer(engine, config) {
20192
+ function createServer(engine) {
20814
20193
  const server = new mcp_js.McpServer({
20815
20194
  name: "searchsocket-mcp",
20816
20195
  version: "0.1.0"
@@ -20818,15 +20197,14 @@ function createServer(engine, config) {
20818
20197
  server.registerTool(
20819
20198
  "search",
20820
20199
  {
20821
- description: "Semantic site search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and rerank. Enable rerank for better relevance on natural-language queries.",
20200
+ description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
20822
20201
  inputSchema: {
20823
20202
  query: zod.z.string().min(1),
20824
20203
  scope: zod.z.string().optional(),
20825
20204
  topK: zod.z.number().int().positive().max(100).optional(),
20826
20205
  pathPrefix: zod.z.string().optional(),
20827
20206
  tags: zod.z.array(zod.z.string()).optional(),
20828
- groupBy: zod.z.enum(["page", "chunk"]).optional(),
20829
- rerank: zod.z.boolean().optional().describe("Enable reranking for better relevance (uses Jina Reranker). Defaults to true when rerank is enabled in config.")
20207
+ groupBy: zod.z.enum(["page", "chunk"]).optional()
20830
20208
  }
20831
20209
  },
20832
20210
  async (input) => {
@@ -20836,8 +20214,7 @@ function createServer(engine, config) {
20836
20214
  scope: input.scope,
20837
20215
  pathPrefix: input.pathPrefix,
20838
20216
  tags: input.tags,
20839
- groupBy: input.groupBy,
20840
- rerank: input.rerank ?? config.rerank.enabled
20217
+ groupBy: input.groupBy
20841
20218
  });
20842
20219
  return {
20843
20220
  content: [
@@ -20963,10 +20340,10 @@ async function runMcpServer(options = {}) {
20963
20340
  config
20964
20341
  });
20965
20342
  if (resolvedTransport === "http") {
20966
- await startHttpServer(() => createServer(engine, config), config, options);
20343
+ await startHttpServer(() => createServer(engine), config, options);
20967
20344
  return;
20968
20345
  }
20969
- const server = createServer(engine, config);
20346
+ const server = createServer(engine);
20970
20347
  const stdioTransport = new stdio_js.StdioServerTransport();
20971
20348
  await server.connect(stdioTransport);
20972
20349
  }
@@ -21123,42 +20500,6 @@ function searchsocketHandle(options = {}) {
21123
20500
  }
21124
20501
  const engine = await getEngine();
21125
20502
  const searchRequest = body;
21126
- if (searchRequest.stream && searchRequest.rerank) {
21127
- const encoder = new TextEncoder();
21128
- const stream = new ReadableStream({
21129
- async start(controller) {
21130
- try {
21131
- for await (const event2 of engine.searchStreaming(searchRequest)) {
21132
- const line = JSON.stringify(event2) + "\n";
21133
- controller.enqueue(encoder.encode(line));
21134
- }
21135
- } catch (streamError) {
21136
- const errorEvent = {
21137
- phase: "error",
21138
- data: {
21139
- error: {
21140
- code: streamError instanceof SearchSocketError ? streamError.code : "INTERNAL_ERROR",
21141
- message: streamError instanceof Error ? streamError.message : "Unknown error"
21142
- }
21143
- }
21144
- };
21145
- controller.enqueue(encoder.encode(JSON.stringify(errorEvent) + "\n"));
21146
- } finally {
21147
- controller.close();
21148
- }
21149
- }
21150
- });
21151
- return withCors(
21152
- new Response(stream, {
21153
- status: 200,
21154
- headers: {
21155
- "content-type": "application/x-ndjson"
21156
- }
21157
- }),
21158
- event.request,
21159
- config
21160
- );
21161
- }
21162
20503
  const result = await engine.search(searchRequest);
21163
20504
  return withCors(
21164
20505
  new Response(JSON.stringify(result), {
@@ -21244,13 +20585,6 @@ function searchsocketVitePlugin(options = {}) {
21244
20585
  let running = false;
21245
20586
  return {
21246
20587
  name: "searchsocket:auto-index",
21247
- config() {
21248
- return {
21249
- ssr: {
21250
- external: ["@libsql/client", "libsql"]
21251
- }
21252
- };
21253
- },
21254
20588
  async closeBundle() {
21255
20589
  if (executed || running) {
21256
20590
  return;
@@ -21278,9 +20612,8 @@ function searchsocketVitePlugin(options = {}) {
21278
20612
  verbose: options.verbose
21279
20613
  });
21280
20614
  logger3.info(
21281
- `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} embedded=${stats.newEmbeddings}`
20615
+ `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
21282
20616
  );
21283
- logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
21284
20617
  executed = true;
21285
20618
  } finally {
21286
20619
  running = false;
@@ -21289,60 +20622,6 @@ function searchsocketVitePlugin(options = {}) {
21289
20622
  };
21290
20623
  }
21291
20624
 
21292
- // src/merge.ts
21293
- function mergeSearchResults(initial, reranked, options) {
21294
- const maxDisplacement = options?.maxDisplacement ?? 3;
21295
- const initialUrls = initial.results.map((r) => r.url);
21296
- const rerankedUrls = reranked.results.map((r) => r.url);
21297
- const initialPos = /* @__PURE__ */ new Map();
21298
- for (let i = 0; i < initialUrls.length; i++) {
21299
- initialPos.set(initialUrls[i], i);
21300
- }
21301
- const rerankedPos = /* @__PURE__ */ new Map();
21302
- for (let i = 0; i < rerankedUrls.length; i++) {
21303
- rerankedPos.set(rerankedUrls[i], i);
21304
- }
21305
- const displacements = [];
21306
- for (const url of initialUrls) {
21307
- const iPos = initialPos.get(url);
21308
- const rPos = rerankedPos.get(url);
21309
- const displacement = rPos !== void 0 ? Math.abs(iPos - rPos) : 0;
21310
- displacements.push({ url, displacement });
21311
- }
21312
- const totalResults = displacements.length;
21313
- if (totalResults === 0) {
21314
- return {
21315
- response: reranked,
21316
- usedRerankedOrder: true,
21317
- displacements
21318
- };
21319
- }
21320
- const hasLargeDisplacement = displacements.some((d) => d.displacement > maxDisplacement);
21321
- if (hasLargeDisplacement) {
21322
- return {
21323
- response: reranked,
21324
- usedRerankedOrder: true,
21325
- displacements
21326
- };
21327
- }
21328
- const rerankedScoreMap = /* @__PURE__ */ new Map();
21329
- for (const result of reranked.results) {
21330
- rerankedScoreMap.set(result.url, result.score);
21331
- }
21332
- const mergedResults = initial.results.map((result) => ({
21333
- ...result,
21334
- score: rerankedScoreMap.get(result.url) ?? result.score
21335
- }));
21336
- return {
21337
- response: {
21338
- ...reranked,
21339
- results: mergedResults
21340
- },
21341
- usedRerankedOrder: false,
21342
- displacements
21343
- };
21344
- }
21345
-
21346
20625
  // src/client.ts
21347
20626
  function createSearchClient(options = {}) {
21348
20627
  const endpoint = options.endpoint ?? "/api/search";
@@ -21370,72 +20649,6 @@ function createSearchClient(options = {}) {
21370
20649
  throw new Error(message);
21371
20650
  }
21372
20651
  return payload;
21373
- },
21374
- async streamSearch(request, onPhase) {
21375
- const response = await fetchImpl(endpoint, {
21376
- method: "POST",
21377
- headers: {
21378
- "content-type": "application/json"
21379
- },
21380
- body: JSON.stringify(request)
21381
- });
21382
- if (!response.ok) {
21383
- let payload;
21384
- try {
21385
- payload = await response.json();
21386
- } catch {
21387
- throw new Error("Search failed");
21388
- }
21389
- const message = payload.error?.message ?? "Search failed";
21390
- throw new Error(message);
21391
- }
21392
- const contentType = response.headers.get("content-type") ?? "";
21393
- if (contentType.includes("application/json")) {
21394
- const data = await response.json();
21395
- onPhase({ phase: "initial", data });
21396
- return data;
21397
- }
21398
- if (!response.body) {
21399
- throw new Error("Response body is not readable");
21400
- }
21401
- const reader = response.body.getReader();
21402
- const decoder = new TextDecoder();
21403
- let buffer = "";
21404
- let lastResponse = null;
21405
- for (; ; ) {
21406
- const { done, value } = await reader.read();
21407
- if (done) break;
21408
- buffer += decoder.decode(value, { stream: true });
21409
- let newlineIdx;
21410
- while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
21411
- const line = buffer.slice(0, newlineIdx).trim();
21412
- buffer = buffer.slice(newlineIdx + 1);
21413
- if (line.length === 0) continue;
21414
- const event = JSON.parse(line);
21415
- if (event.phase === "error") {
21416
- const errData = event.data;
21417
- throw new Error(errData.error.message ?? "Streaming search error");
21418
- }
21419
- const searchEvent = event;
21420
- onPhase(searchEvent);
21421
- lastResponse = searchEvent.data;
21422
- }
21423
- }
21424
- const remaining = buffer.trim();
21425
- if (remaining.length > 0) {
21426
- const event = JSON.parse(remaining);
21427
- if (event.phase === "error") {
21428
- const errData = event.data;
21429
- throw new Error(errData.error.message ?? "Streaming search error");
21430
- }
21431
- const searchEvent = event;
21432
- onPhase(searchEvent);
21433
- lastResponse = searchEvent.data;
21434
- }
21435
- if (!lastResponse) {
21436
- throw new Error("No search results received");
21437
- }
21438
- return lastResponse;
21439
20652
  }
21440
20653
  };
21441
20654
  }
@@ -21452,17 +20665,14 @@ function createSearchClient(options = {}) {
21452
20665
  */
21453
20666
 
21454
20667
  exports.IndexPipeline = IndexPipeline;
21455
- exports.JinaReranker = JinaReranker;
21456
20668
  exports.SearchEngine = SearchEngine;
21457
- exports.createEmbeddingsProvider = createEmbeddingsProvider;
21458
- exports.createReranker = createReranker;
20669
+ exports.UpstashSearchStore = UpstashSearchStore;
21459
20670
  exports.createSearchClient = createSearchClient;
21460
- exports.createVectorStore = createVectorStore;
20671
+ exports.createUpstashStore = createUpstashStore;
21461
20672
  exports.isServerless = isServerless;
21462
20673
  exports.loadConfig = loadConfig;
21463
20674
  exports.mergeConfig = mergeConfig;
21464
20675
  exports.mergeConfigServerless = mergeConfigServerless;
21465
- exports.mergeSearchResults = mergeSearchResults;
21466
20676
  exports.resolveScope = resolveScope;
21467
20677
  exports.runMcpServer = runMcpServer;
21468
20678
  exports.searchsocketHandle = searchsocketHandle;