searchsocket 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,13 @@ var fs = require('fs');
4
4
  var path = require('path');
5
5
  var jiti = require('jiti');
6
6
  var zod = require('zod');
7
- var pLimit2 = require('p-limit');
8
7
  var child_process = require('child_process');
9
8
  var crypto = require('crypto');
10
9
  var cheerio = require('cheerio');
11
10
  var matter = require('gray-matter');
12
- var fs4 = require('fs/promises');
13
11
  var fg = require('fast-glob');
12
+ var pLimit = require('p-limit');
13
+ var fs3 = require('fs/promises');
14
14
  var net = require('net');
15
15
  var zlib = require('zlib');
16
16
 
@@ -18,10 +18,10 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
18
18
 
19
19
  var fs__default = /*#__PURE__*/_interopDefault(fs);
20
20
  var path__default = /*#__PURE__*/_interopDefault(path);
21
- var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
22
21
  var matter__default = /*#__PURE__*/_interopDefault(matter);
23
- var fs4__default = /*#__PURE__*/_interopDefault(fs4);
24
22
  var fg__default = /*#__PURE__*/_interopDefault(fg);
23
+ var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
24
+ var fs3__default = /*#__PURE__*/_interopDefault(fs3);
25
25
  var net__default = /*#__PURE__*/_interopDefault(net);
26
26
 
27
27
  var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -2767,12 +2767,12 @@ var require_ChildNode = __commonJS({
2767
2767
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
2768
2768
  var Node2 = require_Node();
2769
2769
  var LinkedList = require_LinkedList();
2770
- var createDocumentFragmentFromArguments = function(document, args) {
2771
- var docFrag = document.createDocumentFragment();
2770
+ var createDocumentFragmentFromArguments = function(document2, args) {
2771
+ var docFrag = document2.createDocumentFragment();
2772
2772
  for (var i = 0; i < args.length; i++) {
2773
2773
  var argItem = args[i];
2774
2774
  var isNode = argItem instanceof Node2;
2775
- docFrag.appendChild(isNode ? argItem : document.createTextNode(String(argItem)));
2775
+ docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
2776
2776
  }
2777
2777
  return docFrag;
2778
2778
  };
@@ -2930,7 +2930,7 @@ var require_NamedNodeMap = __commonJS({
2930
2930
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
2931
2931
  var require_Element = __commonJS({
2932
2932
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
2933
- module.exports = Element;
2933
+ module.exports = Element2;
2934
2934
  var xml = require_xmlnames();
2935
2935
  var utils = require_utils();
2936
2936
  var NAMESPACE = utils.NAMESPACE;
@@ -2947,7 +2947,7 @@ var require_Element = __commonJS({
2947
2947
  var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
2948
2948
  var NamedNodeMap = require_NamedNodeMap();
2949
2949
  var uppercaseCache = /* @__PURE__ */ Object.create(null);
2950
- function Element(doc, localName, namespaceURI, prefix) {
2950
+ function Element2(doc, localName, namespaceURI, prefix) {
2951
2951
  ContainerNode.call(this);
2952
2952
  this.nodeType = Node2.ELEMENT_NODE;
2953
2953
  this.ownerDocument = doc;
@@ -2967,7 +2967,7 @@ var require_Element = __commonJS({
2967
2967
  recursiveGetText(node.childNodes[i], a);
2968
2968
  }
2969
2969
  }
2970
- Element.prototype = Object.create(ContainerNode.prototype, {
2970
+ Element2.prototype = Object.create(ContainerNode.prototype, {
2971
2971
  isHTML: { get: function isHTML() {
2972
2972
  return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
2973
2973
  } },
@@ -3037,7 +3037,7 @@ var require_Element = __commonJS({
3037
3037
  return NodeUtils.serializeOne(this, { nodeType: 0 });
3038
3038
  },
3039
3039
  set: function(v) {
3040
- var document = this.ownerDocument;
3040
+ var document2 = this.ownerDocument;
3041
3041
  var parent = this.parentNode;
3042
3042
  if (parent === null) {
3043
3043
  return;
@@ -3048,8 +3048,8 @@ var require_Element = __commonJS({
3048
3048
  if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
3049
3049
  parent = parent.ownerDocument.createElement("body");
3050
3050
  }
3051
- var parser = document.implementation.mozHTMLParser(
3052
- document._address,
3051
+ var parser = document2.implementation.mozHTMLParser(
3052
+ document2._address,
3053
3053
  parent
3054
3054
  );
3055
3055
  parser.parse(v === null ? "" : String(v), true);
@@ -3108,7 +3108,7 @@ var require_Element = __commonJS({
3108
3108
  default:
3109
3109
  utils.SyntaxError();
3110
3110
  }
3111
- if (!(context instanceof Element) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3111
+ if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3112
3112
  context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
3113
3113
  }
3114
3114
  var parser = this.ownerDocument.implementation.mozHTMLParser(
@@ -3716,10 +3716,10 @@ var require_Element = __commonJS({
3716
3716
  return nodes.item ? nodes : new NodeList(nodes);
3717
3717
  } }
3718
3718
  });
3719
- Object.defineProperties(Element.prototype, ChildNode);
3720
- Object.defineProperties(Element.prototype, NonDocumentTypeChildNode);
3719
+ Object.defineProperties(Element2.prototype, ChildNode);
3720
+ Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
3721
3721
  attributes.registerChangeHandler(
3722
- Element,
3722
+ Element2,
3723
3723
  "id",
3724
3724
  function(element, lname, oldval, newval) {
3725
3725
  if (element.rooted) {
@@ -3733,7 +3733,7 @@ var require_Element = __commonJS({
3733
3733
  }
3734
3734
  );
3735
3735
  attributes.registerChangeHandler(
3736
- Element,
3736
+ Element2,
3737
3737
  "class",
3738
3738
  function(element, lname, oldval, newval) {
3739
3739
  if (element._classList) {
@@ -3832,7 +3832,7 @@ var require_Element = __commonJS({
3832
3832
  }
3833
3833
  }
3834
3834
  });
3835
- Element._Attr = Attr;
3835
+ Element2._Attr = Attr;
3836
3836
  function AttributesArray(elt) {
3837
3837
  NamedNodeMap.call(this, elt);
3838
3838
  for (var name in elt._attrsByQName) {
@@ -4234,7 +4234,7 @@ var require_DocumentFragment = __commonJS({
4234
4234
  var Node2 = require_Node();
4235
4235
  var NodeList = require_NodeList();
4236
4236
  var ContainerNode = require_ContainerNode();
4237
- var Element = require_Element();
4237
+ var Element2 = require_Element();
4238
4238
  var select = require_select();
4239
4239
  var utils = require_utils();
4240
4240
  function DocumentFragment(doc) {
@@ -4252,9 +4252,9 @@ var require_DocumentFragment = __commonJS({
4252
4252
  }
4253
4253
  },
4254
4254
  // Copy the text content getter/setter from Element
4255
- textContent: Object.getOwnPropertyDescriptor(Element.prototype, "textContent"),
4255
+ textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
4256
4256
  // Copy the text content getter/setter from Element
4257
- innerText: Object.getOwnPropertyDescriptor(Element.prototype, "innerText"),
4257
+ innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
4258
4258
  querySelector: { value: function(selector) {
4259
4259
  var nodes = this.querySelectorAll(selector);
4260
4260
  return nodes.length ? nodes[0] : null;
@@ -4262,8 +4262,8 @@ var require_DocumentFragment = __commonJS({
4262
4262
  querySelectorAll: { value: function(selector) {
4263
4263
  var context = Object.create(this);
4264
4264
  context.isHTML = true;
4265
- context.getElementsByTagName = Element.prototype.getElementsByTagName;
4266
- context.nextElement = Object.getOwnPropertyDescriptor(Element.prototype, "firstElementChild").get;
4265
+ context.getElementsByTagName = Element2.prototype.getElementsByTagName;
4266
+ context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
4267
4267
  var nodes = select(selector, context);
4268
4268
  return nodes.item ? nodes : new NodeList(nodes);
4269
4269
  } },
@@ -4345,7 +4345,7 @@ var require_ProcessingInstruction = __commonJS({
4345
4345
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
4346
4346
  var require_NodeFilter = __commonJS({
4347
4347
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
4348
- var NodeFilter = {
4348
+ var NodeFilter2 = {
4349
4349
  // Constants for acceptNode()
4350
4350
  FILTER_ACCEPT: 1,
4351
4351
  FILTER_REJECT: 2,
@@ -4370,7 +4370,7 @@ var require_NodeFilter = __commonJS({
4370
4370
  SHOW_NOTATION: 2048
4371
4371
  // historical
4372
4372
  };
4373
- module.exports = NodeFilter.constructor = NodeFilter.prototype = NodeFilter;
4373
+ module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
4374
4374
  }
4375
4375
  });
4376
4376
 
@@ -4445,7 +4445,7 @@ var require_TreeWalker = __commonJS({
4445
4445
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
4446
4446
  module.exports = TreeWalker;
4447
4447
  var Node2 = require_Node();
4448
- var NodeFilter = require_NodeFilter();
4448
+ var NodeFilter2 = require_NodeFilter();
4449
4449
  var NodeTraversal = require_NodeTraversal();
4450
4450
  var utils = require_utils();
4451
4451
  var mapChild = {
@@ -4465,11 +4465,11 @@ var require_TreeWalker = __commonJS({
4465
4465
  node = tw._currentNode[mapChild[type]];
4466
4466
  while (node !== null) {
4467
4467
  result = tw._internalFilter(node);
4468
- if (result === NodeFilter.FILTER_ACCEPT) {
4468
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4469
4469
  tw._currentNode = node;
4470
4470
  return node;
4471
4471
  }
4472
- if (result === NodeFilter.FILTER_SKIP) {
4472
+ if (result === NodeFilter2.FILTER_SKIP) {
4473
4473
  child = node[mapChild[type]];
4474
4474
  if (child !== null) {
4475
4475
  node = child;
@@ -4503,12 +4503,12 @@ var require_TreeWalker = __commonJS({
4503
4503
  while (sibling !== null) {
4504
4504
  node = sibling;
4505
4505
  result = tw._internalFilter(node);
4506
- if (result === NodeFilter.FILTER_ACCEPT) {
4506
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4507
4507
  tw._currentNode = node;
4508
4508
  return node;
4509
4509
  }
4510
4510
  sibling = node[mapChild[type]];
4511
- if (result === NodeFilter.FILTER_REJECT || sibling === null) {
4511
+ if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
4512
4512
  sibling = node[mapSibling[type]];
4513
4513
  }
4514
4514
  }
@@ -4516,7 +4516,7 @@ var require_TreeWalker = __commonJS({
4516
4516
  if (node === null || node === tw.root) {
4517
4517
  return null;
4518
4518
  }
4519
- if (tw._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4519
+ if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4520
4520
  return null;
4521
4521
  }
4522
4522
  }
@@ -4564,11 +4564,11 @@ var require_TreeWalker = __commonJS({
4564
4564
  utils.InvalidStateError();
4565
4565
  }
4566
4566
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4567
- return NodeFilter.FILTER_SKIP;
4567
+ return NodeFilter2.FILTER_SKIP;
4568
4568
  }
4569
4569
  filter = this._filter;
4570
4570
  if (filter === null) {
4571
- result = NodeFilter.FILTER_ACCEPT;
4571
+ result = NodeFilter2.FILTER_ACCEPT;
4572
4572
  } else {
4573
4573
  this._active = true;
4574
4574
  try {
@@ -4597,7 +4597,7 @@ var require_TreeWalker = __commonJS({
4597
4597
  if (node === null) {
4598
4598
  return null;
4599
4599
  }
4600
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4600
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4601
4601
  this._currentNode = node;
4602
4602
  return node;
4603
4603
  }
@@ -4650,17 +4650,17 @@ var require_TreeWalker = __commonJS({
4650
4650
  for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
4651
4651
  node = previousSibling;
4652
4652
  result = this._internalFilter(node);
4653
- if (result === NodeFilter.FILTER_REJECT) {
4653
+ if (result === NodeFilter2.FILTER_REJECT) {
4654
4654
  continue;
4655
4655
  }
4656
4656
  for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
4657
4657
  node = lastChild;
4658
4658
  result = this._internalFilter(node);
4659
- if (result === NodeFilter.FILTER_REJECT) {
4659
+ if (result === NodeFilter2.FILTER_REJECT) {
4660
4660
  break;
4661
4661
  }
4662
4662
  }
4663
- if (result === NodeFilter.FILTER_ACCEPT) {
4663
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4664
4664
  this._currentNode = node;
4665
4665
  return node;
4666
4666
  }
@@ -4669,7 +4669,7 @@ var require_TreeWalker = __commonJS({
4669
4669
  return null;
4670
4670
  }
4671
4671
  node = node.parentNode;
4672
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4672
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4673
4673
  this._currentNode = node;
4674
4674
  return node;
4675
4675
  }
@@ -4686,26 +4686,26 @@ var require_TreeWalker = __commonJS({
4686
4686
  nextNode: { value: function nextNode() {
4687
4687
  var node, result, firstChild, nextSibling;
4688
4688
  node = this._currentNode;
4689
- result = NodeFilter.FILTER_ACCEPT;
4689
+ result = NodeFilter2.FILTER_ACCEPT;
4690
4690
  CHILDREN:
4691
4691
  while (true) {
4692
4692
  for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
4693
4693
  node = firstChild;
4694
4694
  result = this._internalFilter(node);
4695
- if (result === NodeFilter.FILTER_ACCEPT) {
4695
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4696
4696
  this._currentNode = node;
4697
4697
  return node;
4698
- } else if (result === NodeFilter.FILTER_REJECT) {
4698
+ } else if (result === NodeFilter2.FILTER_REJECT) {
4699
4699
  break;
4700
4700
  }
4701
4701
  }
4702
4702
  for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
4703
4703
  node = nextSibling;
4704
4704
  result = this._internalFilter(node);
4705
- if (result === NodeFilter.FILTER_ACCEPT) {
4705
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4706
4706
  this._currentNode = node;
4707
4707
  return node;
4708
- } else if (result === NodeFilter.FILTER_SKIP) {
4708
+ } else if (result === NodeFilter2.FILTER_SKIP) {
4709
4709
  continue CHILDREN;
4710
4710
  }
4711
4711
  }
@@ -4724,7 +4724,7 @@ var require_TreeWalker = __commonJS({
4724
4724
  var require_NodeIterator = __commonJS({
4725
4725
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
4726
4726
  module.exports = NodeIterator;
4727
- var NodeFilter = require_NodeFilter();
4727
+ var NodeFilter2 = require_NodeFilter();
4728
4728
  var NodeTraversal = require_NodeTraversal();
4729
4729
  var utils = require_utils();
4730
4730
  function move(node, stayWithin, directionIsNext) {
@@ -4759,7 +4759,7 @@ var require_NodeIterator = __commonJS({
4759
4759
  }
4760
4760
  }
4761
4761
  var result = ni._internalFilter(node);
4762
- if (result === NodeFilter.FILTER_ACCEPT) {
4762
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4763
4763
  break;
4764
4764
  }
4765
4765
  }
@@ -4807,11 +4807,11 @@ var require_NodeIterator = __commonJS({
4807
4807
  utils.InvalidStateError();
4808
4808
  }
4809
4809
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4810
- return NodeFilter.FILTER_SKIP;
4810
+ return NodeFilter2.FILTER_SKIP;
4811
4811
  }
4812
4812
  filter = this._filter;
4813
4813
  if (filter === null) {
4814
- result = NodeFilter.FILTER_ACCEPT;
4814
+ result = NodeFilter2.FILTER_ACCEPT;
4815
4815
  } else {
4816
4816
  this._active = true;
4817
4817
  try {
@@ -5021,32 +5021,32 @@ var require_URL = __commonJS({
5021
5021
  else
5022
5022
  return basepath.substring(0, lastslash + 1) + refpath;
5023
5023
  }
5024
- function remove_dot_segments(path15) {
5025
- if (!path15) return path15;
5024
+ function remove_dot_segments(path13) {
5025
+ if (!path13) return path13;
5026
5026
  var output = "";
5027
- while (path15.length > 0) {
5028
- if (path15 === "." || path15 === "..") {
5029
- path15 = "";
5027
+ while (path13.length > 0) {
5028
+ if (path13 === "." || path13 === "..") {
5029
+ path13 = "";
5030
5030
  break;
5031
5031
  }
5032
- var twochars = path15.substring(0, 2);
5033
- var threechars = path15.substring(0, 3);
5034
- var fourchars = path15.substring(0, 4);
5032
+ var twochars = path13.substring(0, 2);
5033
+ var threechars = path13.substring(0, 3);
5034
+ var fourchars = path13.substring(0, 4);
5035
5035
  if (threechars === "../") {
5036
- path15 = path15.substring(3);
5036
+ path13 = path13.substring(3);
5037
5037
  } else if (twochars === "./") {
5038
- path15 = path15.substring(2);
5038
+ path13 = path13.substring(2);
5039
5039
  } else if (threechars === "/./") {
5040
- path15 = "/" + path15.substring(3);
5041
- } else if (twochars === "/." && path15.length === 2) {
5042
- path15 = "/";
5043
- } else if (fourchars === "/../" || threechars === "/.." && path15.length === 3) {
5044
- path15 = "/" + path15.substring(4);
5040
+ path13 = "/" + path13.substring(3);
5041
+ } else if (twochars === "/." && path13.length === 2) {
5042
+ path13 = "/";
5043
+ } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5044
+ path13 = "/" + path13.substring(4);
5045
5045
  output = output.replace(/\/?[^\/]*$/, "");
5046
5046
  } else {
5047
- var segment = path15.match(/(\/?([^\/]*))/)[0];
5047
+ var segment = path13.match(/(\/?([^\/]*))/)[0];
5048
5048
  output += segment;
5049
- path15 = path15.substring(segment.length);
5049
+ path13 = path13.substring(segment.length);
5050
5050
  }
5051
5051
  }
5052
5052
  return output;
@@ -5611,9 +5611,9 @@ var require_defineElement = __commonJS({
5611
5611
  });
5612
5612
  return c;
5613
5613
  };
5614
- function EventHandlerBuilder(body, document, form, element) {
5614
+ function EventHandlerBuilder(body, document2, form, element) {
5615
5615
  this.body = body;
5616
- this.document = document;
5616
+ this.document = document2;
5617
5617
  this.form = form;
5618
5618
  this.element = element;
5619
5619
  }
@@ -5647,7 +5647,7 @@ var require_defineElement = __commonJS({
5647
5647
  var require_htmlelts = __commonJS({
5648
5648
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
5649
5649
  var Node2 = require_Node();
5650
- var Element = require_Element();
5650
+ var Element2 = require_Element();
5651
5651
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
5652
5652
  var utils = require_utils();
5653
5653
  var URLUtils = require_URLUtils();
@@ -5715,10 +5715,10 @@ var require_htmlelts = __commonJS({
5715
5715
  this._form = null;
5716
5716
  };
5717
5717
  var HTMLElement = exports$1.HTMLElement = define({
5718
- superclass: Element,
5718
+ superclass: Element2,
5719
5719
  name: "HTMLElement",
5720
5720
  ctor: function HTMLElement2(doc, localName, prefix) {
5721
- Element.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5721
+ Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5722
5722
  },
5723
5723
  props: {
5724
5724
  dangerouslySetInnerHTML: {
@@ -7200,7 +7200,7 @@ var require_htmlelts = __commonJS({
7200
7200
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
7201
7201
  var require_svg = __commonJS({
7202
7202
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
7203
- var Element = require_Element();
7203
+ var Element2 = require_Element();
7204
7204
  var defineElement = require_defineElement();
7205
7205
  var utils = require_utils();
7206
7206
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
@@ -7214,10 +7214,10 @@ var require_svg = __commonJS({
7214
7214
  return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
7215
7215
  }
7216
7216
  var SVGElement = define({
7217
- superclass: Element,
7217
+ superclass: Element2,
7218
7218
  name: "SVGElement",
7219
7219
  ctor: function SVGElement2(doc, localName, prefix) {
7220
- Element.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7220
+ Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7221
7221
  },
7222
7222
  props: {
7223
7223
  style: { get: function() {
@@ -7352,7 +7352,7 @@ var require_Document = __commonJS({
7352
7352
  var Node2 = require_Node();
7353
7353
  var NodeList = require_NodeList();
7354
7354
  var ContainerNode = require_ContainerNode();
7355
- var Element = require_Element();
7355
+ var Element2 = require_Element();
7356
7356
  var Text = require_Text();
7357
7357
  var Comment = require_Comment();
7358
7358
  var Event = require_Event();
@@ -7361,7 +7361,7 @@ var require_Document = __commonJS({
7361
7361
  var DOMImplementation = require_DOMImplementation();
7362
7362
  var TreeWalker = require_TreeWalker();
7363
7363
  var NodeIterator = require_NodeIterator();
7364
- var NodeFilter = require_NodeFilter();
7364
+ var NodeFilter2 = require_NodeFilter();
7365
7365
  var URL2 = require_URL();
7366
7366
  var select = require_select();
7367
7367
  var events = require_events();
@@ -7500,13 +7500,13 @@ var require_Document = __commonJS({
7500
7500
  if (this.isHTML) {
7501
7501
  localName = utils.toASCIILowerCase(localName);
7502
7502
  }
7503
- return new Element._Attr(null, localName, null, null, "");
7503
+ return new Element2._Attr(null, localName, null, null, "");
7504
7504
  } },
7505
7505
  createAttributeNS: { value: function(namespace, qualifiedName) {
7506
7506
  namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
7507
7507
  qualifiedName = String(qualifiedName);
7508
7508
  var ve = validateAndExtract(namespace, qualifiedName);
7509
- return new Element._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7509
+ return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7510
7510
  } },
7511
7511
  createElement: { value: function(localName) {
7512
7512
  localName = String(localName);
@@ -7518,7 +7518,7 @@ var require_Document = __commonJS({
7518
7518
  } else if (this.contentType === "application/xhtml+xml") {
7519
7519
  return html.createElement(this, localName, null);
7520
7520
  } else {
7521
- return new Element(this, localName, null, null);
7521
+ return new Element2(this, localName, null, null);
7522
7522
  }
7523
7523
  }, writable: isApiWritable },
7524
7524
  createElementNS: { value: function(namespace, qualifiedName) {
@@ -7535,7 +7535,7 @@ var require_Document = __commonJS({
7535
7535
  } else if (namespace === NAMESPACE.SVG) {
7536
7536
  return svg.createElement(this, localName, prefix);
7537
7537
  }
7538
- return new Element(this, localName, namespace, prefix);
7538
+ return new Element2(this, localName, namespace, prefix);
7539
7539
  } },
7540
7540
  createEvent: { value: function createEvent(interfaceName) {
7541
7541
  interfaceName = interfaceName.toLowerCase();
@@ -7557,7 +7557,7 @@ var require_Document = __commonJS({
7557
7557
  if (!(root3 instanceof Node2)) {
7558
7558
  throw new TypeError("root not a node");
7559
7559
  }
7560
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7560
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7561
7561
  filter = filter === void 0 ? null : filter;
7562
7562
  return new TreeWalker(root3, whatToShow, filter);
7563
7563
  } },
@@ -7569,7 +7569,7 @@ var require_Document = __commonJS({
7569
7569
  if (!(root3 instanceof Node2)) {
7570
7570
  throw new TypeError("root not a node");
7571
7571
  }
7572
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7572
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7573
7573
  filter = filter === void 0 ? null : filter;
7574
7574
  return new NodeIterator(root3, whatToShow, filter);
7575
7575
  } },
@@ -7630,10 +7630,10 @@ var require_Document = __commonJS({
7630
7630
  return this.byId[id] instanceof MultiId;
7631
7631
  } },
7632
7632
  // Just copy this method from the Element prototype
7633
- getElementsByName: { value: Element.prototype.getElementsByName },
7634
- getElementsByTagName: { value: Element.prototype.getElementsByTagName },
7635
- getElementsByTagNameNS: { value: Element.prototype.getElementsByTagNameNS },
7636
- getElementsByClassName: { value: Element.prototype.getElementsByClassName },
7633
+ getElementsByName: { value: Element2.prototype.getElementsByName },
7634
+ getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
7635
+ getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
7636
+ getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
7637
7637
  adoptNode: { value: function adoptNode(node) {
7638
7638
  if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
7639
7639
  if (node.nodeType === Node2.ATTRIBUTE_NODE) {
@@ -16459,8 +16459,8 @@ var require_Window = __commonJS({
16459
16459
  var Location = require_Location();
16460
16460
  var utils = require_utils();
16461
16461
  module.exports = Window;
16462
- function Window(document) {
16463
- this.document = document || new DOMImplementation(null).createHTMLDocument("");
16462
+ function Window(document2) {
16463
+ this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
16464
16464
  this.document._scripting_enabled = true;
16465
16465
  this.document.defaultView = this;
16466
16466
  this.location = new Location(this, this.document._address || "about:blank");
@@ -16590,11 +16590,11 @@ var require_lib = __commonJS({
16590
16590
  };
16591
16591
  };
16592
16592
  exports$1.createWindow = function(html, address) {
16593
- var document = exports$1.createDocument(html);
16593
+ var document2 = exports$1.createDocument(html);
16594
16594
  if (address !== void 0) {
16595
- document._address = address;
16595
+ document2._address = address;
16596
16596
  }
16597
- return new impl.Window(document);
16597
+ return new impl.Window(document2);
16598
16598
  };
16599
16599
  exports$1.impl = impl;
16600
16600
  }
@@ -16659,29 +16659,18 @@ var searchSocketConfigSchema = zod.z.object({
16659
16659
  prependTitle: zod.z.boolean().optional(),
16660
16660
  pageSummaryChunk: zod.z.boolean().optional()
16661
16661
  }).optional(),
16662
- embeddings: zod.z.object({
16663
- provider: zod.z.literal("jina").optional(),
16664
- model: zod.z.string().min(1).optional(),
16665
- apiKey: zod.z.string().min(1).optional(),
16666
- apiKeyEnv: zod.z.string().min(1).optional(),
16667
- batchSize: zod.z.number().int().positive().optional(),
16668
- concurrency: zod.z.number().int().positive().optional(),
16669
- pricePer1kTokens: zod.z.number().positive().optional()
16662
+ upstash: zod.z.object({
16663
+ url: zod.z.string().url().optional(),
16664
+ token: zod.z.string().min(1).optional(),
16665
+ urlEnv: zod.z.string().min(1).optional(),
16666
+ tokenEnv: zod.z.string().min(1).optional()
16670
16667
  }).optional(),
16671
- vector: zod.z.object({
16672
- dimension: zod.z.number().int().positive().optional(),
16673
- turso: zod.z.object({
16674
- url: zod.z.string().url().optional(),
16675
- authToken: zod.z.string().min(1).optional(),
16676
- urlEnv: zod.z.string().optional(),
16677
- authTokenEnv: zod.z.string().optional(),
16678
- localPath: zod.z.string().optional()
16679
- }).optional()
16680
- }).optional(),
16681
- rerank: zod.z.object({
16682
- enabled: zod.z.boolean().optional(),
16683
- topN: zod.z.number().int().positive().optional(),
16684
- model: zod.z.string().optional()
16668
+ search: zod.z.object({
16669
+ semanticWeight: zod.z.number().min(0).max(1).optional(),
16670
+ inputEnrichment: zod.z.boolean().optional(),
16671
+ reranking: zod.z.boolean().optional(),
16672
+ dualSearch: zod.z.boolean().optional(),
16673
+ pageSearchWeight: zod.z.number().min(0).max(1).optional()
16685
16674
  }).optional(),
16686
16675
  ranking: zod.z.object({
16687
16676
  enableIncomingLinkBoost: zod.z.boolean().optional(),
@@ -16691,11 +16680,12 @@ var searchSocketConfigSchema = zod.z.object({
16691
16680
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16692
16681
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
16693
16682
  minScore: zod.z.number().min(0).max(1).optional(),
16683
+ scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
16694
16684
  weights: zod.z.object({
16695
16685
  incomingLinks: zod.z.number().optional(),
16696
16686
  depth: zod.z.number().optional(),
16697
- rerank: zod.z.number().optional(),
16698
- aggregation: zod.z.number().optional()
16687
+ aggregation: zod.z.number().optional(),
16688
+ titleMatch: zod.z.number().optional()
16699
16689
  }).optional()
16700
16690
  }).optional(),
16701
16691
  api: zod.z.object({
@@ -16717,8 +16707,7 @@ var searchSocketConfigSchema = zod.z.object({
16717
16707
  }).optional()
16718
16708
  }).optional(),
16719
16709
  state: zod.z.object({
16720
- dir: zod.z.string().optional(),
16721
- writeMirror: zod.z.boolean().optional()
16710
+ dir: zod.z.string().optional()
16722
16711
  }).optional()
16723
16712
  });
16724
16713
 
@@ -16772,24 +16761,16 @@ function createDefaultConfig(projectId) {
16772
16761
  prependTitle: true,
16773
16762
  pageSummaryChunk: true
16774
16763
  },
16775
- embeddings: {
16776
- provider: "jina",
16777
- model: "jina-embeddings-v5-text-small",
16778
- apiKeyEnv: "JINA_API_KEY",
16779
- batchSize: 64,
16780
- concurrency: 4
16781
- },
16782
- vector: {
16783
- turso: {
16784
- urlEnv: "TURSO_DATABASE_URL",
16785
- authTokenEnv: "TURSO_AUTH_TOKEN",
16786
- localPath: ".searchsocket/vectors.db"
16787
- }
16764
+ upstash: {
16765
+ urlEnv: "UPSTASH_SEARCH_REST_URL",
16766
+ tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16788
16767
  },
16789
- rerank: {
16790
- enabled: true,
16791
- topN: 20,
16792
- model: "jina-reranker-v3"
16768
+ search: {
16769
+ semanticWeight: 0.75,
16770
+ inputEnrichment: true,
16771
+ reranking: true,
16772
+ dualSearch: true,
16773
+ pageSearchWeight: 0.3
16793
16774
  },
16794
16775
  ranking: {
16795
16776
  enableIncomingLinkBoost: true,
@@ -16798,12 +16779,13 @@ function createDefaultConfig(projectId) {
16798
16779
  aggregationCap: 5,
16799
16780
  aggregationDecay: 0.5,
16800
16781
  minChunkScoreRatio: 0.5,
16801
- minScore: 0,
16782
+ minScore: 0.3,
16783
+ scoreGapThreshold: 0.4,
16802
16784
  weights: {
16803
16785
  incomingLinks: 0.05,
16804
16786
  depth: 0.03,
16805
- rerank: 1,
16806
- aggregation: 0.1
16787
+ aggregation: 0.1,
16788
+ titleMatch: 0.15
16807
16789
  }
16808
16790
  },
16809
16791
  api: {
@@ -16821,8 +16803,7 @@ function createDefaultConfig(projectId) {
16821
16803
  }
16822
16804
  },
16823
16805
  state: {
16824
- dir: ".searchsocket",
16825
- writeMirror: false
16806
+ dir: ".searchsocket"
16826
16807
  }
16827
16808
  };
16828
16809
  }
@@ -16946,21 +16927,13 @@ ${issues}`
16946
16927
  ...defaults.chunking,
16947
16928
  ...parsed.chunking
16948
16929
  },
16949
- embeddings: {
16950
- ...defaults.embeddings,
16951
- ...parsed.embeddings
16930
+ upstash: {
16931
+ ...defaults.upstash,
16932
+ ...parsed.upstash
16952
16933
  },
16953
- vector: {
16954
- ...defaults.vector,
16955
- ...parsed.vector,
16956
- turso: {
16957
- ...defaults.vector.turso,
16958
- ...parsed.vector?.turso
16959
- }
16960
- },
16961
- rerank: {
16962
- ...defaults.rerank,
16963
- ...parsed.rerank
16934
+ search: {
16935
+ ...defaults.search,
16936
+ ...parsed.search
16964
16937
  },
16965
16938
  ranking: {
16966
16939
  ...defaults.ranking,
@@ -17051,128 +17024,6 @@ async function loadConfig(options = {}) {
17051
17024
  function isServerless() {
17052
17025
  return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
17053
17026
  }
17054
- function sleep(ms) {
17055
- return new Promise((resolve) => {
17056
- setTimeout(resolve, ms);
17057
- });
17058
- }
17059
- var JinaEmbeddingsProvider = class {
17060
- apiKey;
17061
- batchSize;
17062
- concurrency;
17063
- defaultTask;
17064
- constructor(options) {
17065
- if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
17066
- throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
17067
- }
17068
- if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
17069
- throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
17070
- }
17071
- this.apiKey = options.apiKey;
17072
- this.batchSize = options.batchSize;
17073
- this.concurrency = options.concurrency;
17074
- this.defaultTask = options.task ?? "retrieval.passage";
17075
- }
17076
- estimateTokens(text) {
17077
- const normalized = text.trim();
17078
- if (!normalized) {
17079
- return 0;
17080
- }
17081
- const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
17082
- const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
17083
- const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
17084
- const charEstimate = Math.ceil(normalized.length / 4);
17085
- const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
17086
- return Math.max(1, Math.max(charEstimate, lexicalEstimate));
17087
- }
17088
- async embedTexts(texts, modelId, task) {
17089
- if (texts.length === 0) {
17090
- return [];
17091
- }
17092
- const batches = [];
17093
- for (let i = 0; i < texts.length; i += this.batchSize) {
17094
- batches.push({
17095
- index: i,
17096
- values: texts.slice(i, i + this.batchSize)
17097
- });
17098
- }
17099
- const outputs = new Array(batches.length);
17100
- const limit = pLimit2__default.default(this.concurrency);
17101
- await Promise.all(
17102
- batches.map(
17103
- (batch, position) => limit(async () => {
17104
- outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
17105
- })
17106
- )
17107
- );
17108
- return outputs.flat();
17109
- }
17110
- async embedWithRetry(texts, modelId, task) {
17111
- const maxAttempts = 5;
17112
- let attempt = 0;
17113
- while (attempt < maxAttempts) {
17114
- attempt += 1;
17115
- let response;
17116
- try {
17117
- response = await fetch("https://api.jina.ai/v1/embeddings", {
17118
- method: "POST",
17119
- headers: {
17120
- "content-type": "application/json",
17121
- authorization: `Bearer ${this.apiKey}`
17122
- },
17123
- body: JSON.stringify({
17124
- model: modelId,
17125
- input: texts,
17126
- task
17127
- })
17128
- });
17129
- } catch (error) {
17130
- if (attempt >= maxAttempts) {
17131
- throw error;
17132
- }
17133
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17134
- continue;
17135
- }
17136
- if (!response.ok) {
17137
- const retryable = response.status === 429 || response.status >= 500;
17138
- if (!retryable || attempt >= maxAttempts) {
17139
- const errorBody = await response.text();
17140
- throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
17141
- }
17142
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17143
- continue;
17144
- }
17145
- const payload = await response.json();
17146
- if (!payload.data || !Array.isArray(payload.data)) {
17147
- throw new Error("Invalid Jina embeddings response format");
17148
- }
17149
- return payload.data.map((entry) => entry.embedding);
17150
- }
17151
- throw new Error("Unreachable retry state");
17152
- }
17153
- };
17154
-
17155
- // src/embeddings/factory.ts
17156
- function createEmbeddingsProvider(config) {
17157
- if (config.embeddings.provider !== "jina") {
17158
- throw new SearchSocketError(
17159
- "CONFIG_MISSING",
17160
- `Unsupported embeddings provider ${config.embeddings.provider}`
17161
- );
17162
- }
17163
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17164
- if (!apiKey) {
17165
- throw new SearchSocketError(
17166
- "CONFIG_MISSING",
17167
- `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
17168
- );
17169
- }
17170
- return new JinaEmbeddingsProvider({
17171
- apiKey,
17172
- batchSize: config.embeddings.batchSize,
17173
- concurrency: config.embeddings.concurrency
17174
- });
17175
- }
17176
17027
 
17177
17028
  // src/utils/text.ts
17178
17029
  function normalizeText(input) {
@@ -17247,103 +17098,6 @@ function resolveScope(config, override) {
17247
17098
  };
17248
17099
  }
17249
17100
 
17250
- // src/rerank/jina.ts
17251
- function sleep2(ms) {
17252
- return new Promise((resolve) => {
17253
- setTimeout(resolve, ms);
17254
- });
17255
- }
17256
- var JinaReranker = class {
17257
- apiKey;
17258
- model;
17259
- maxRetries;
17260
- constructor(options) {
17261
- this.apiKey = options.apiKey;
17262
- this.model = options.model;
17263
- this.maxRetries = options.maxRetries ?? 2;
17264
- }
17265
- async rerank(query, candidates, topN) {
17266
- if (candidates.length === 0) {
17267
- return [];
17268
- }
17269
- const body = {
17270
- model: this.model,
17271
- query,
17272
- documents: candidates.map((candidate) => candidate.text),
17273
- top_n: topN ?? candidates.length,
17274
- return_documents: false
17275
- };
17276
- let attempt = 0;
17277
- while (attempt <= this.maxRetries) {
17278
- attempt += 1;
17279
- let response;
17280
- try {
17281
- response = await fetch("https://api.jina.ai/v1/rerank", {
17282
- method: "POST",
17283
- headers: {
17284
- "content-type": "application/json",
17285
- authorization: `Bearer ${this.apiKey}`
17286
- },
17287
- body: JSON.stringify(body)
17288
- });
17289
- } catch (error) {
17290
- if (attempt <= this.maxRetries) {
17291
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17292
- continue;
17293
- }
17294
- throw error;
17295
- }
17296
- if (!response.ok) {
17297
- const retryable = response.status === 429 || response.status >= 500;
17298
- if (retryable && attempt <= this.maxRetries) {
17299
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17300
- continue;
17301
- }
17302
- const errorBody = await response.text();
17303
- throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
17304
- }
17305
- const payload = await response.json();
17306
- const rawResults = payload.results ?? payload.data ?? [];
17307
- if (!Array.isArray(rawResults)) {
17308
- throw new Error("Invalid Jina rerank response format");
17309
- }
17310
- return rawResults.flatMap((item) => {
17311
- const index = item.index;
17312
- if (typeof index !== "number" || index < 0 || index >= candidates.length) {
17313
- return [];
17314
- }
17315
- const candidate = candidates[index];
17316
- if (!candidate) {
17317
- return [];
17318
- }
17319
- const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
17320
- return [
17321
- {
17322
- id: candidate.id,
17323
- score
17324
- }
17325
- ];
17326
- }).sort((a, b) => b.score - a.score);
17327
- }
17328
- throw new Error("Jina rerank request failed after retries");
17329
- }
17330
- };
17331
-
17332
- // src/rerank/factory.ts
17333
- function createReranker(config) {
17334
- if (!config.rerank.enabled) {
17335
- return null;
17336
- }
17337
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17338
- if (!apiKey) {
17339
- return null;
17340
- }
17341
- return new JinaReranker({
17342
- apiKey,
17343
- model: config.rerank.model
17344
- });
17345
- }
17346
-
17347
17101
  // src/utils/time.ts
17348
17102
  function nowIso() {
17349
17103
  return (/* @__PURE__ */ new Date()).toISOString();
@@ -17362,13 +17116,6 @@ function normalizeUrlPath(rawPath) {
17362
17116
  }
17363
17117
  return out;
17364
17118
  }
17365
- function urlPathToMirrorRelative(urlPath) {
17366
- const normalized = normalizeUrlPath(urlPath);
17367
- if (normalized === "/") {
17368
- return "index.md";
17369
- }
17370
- return `${normalized.slice(1)}.md`;
17371
- }
17372
17119
  function staticHtmlFileToUrl(filePath, rootDir) {
17373
17120
  const relative = path__default.default.relative(rootDir, filePath).replace(/\\/g, "/");
17374
17121
  if (relative === "index.html") {
@@ -17402,434 +17149,239 @@ function joinUrl(baseUrl, route) {
17402
17149
  return `${base}${routePart}`;
17403
17150
  }
17404
17151
 
17405
- // src/vector/turso.ts
17406
- var TursoVectorStore = class {
17152
+ // src/vector/upstash.ts
17153
+ function chunkIndexName(scope) {
17154
+ return `${scope.projectId}--${scope.scopeName}`;
17155
+ }
17156
+ function pageIndexName(scope) {
17157
+ return `${scope.projectId}--${scope.scopeName}--pages`;
17158
+ }
17159
+ var UpstashSearchStore = class {
17407
17160
  client;
17408
- dimension;
17409
- chunksReady = false;
17410
- registryReady = false;
17411
- pagesReady = false;
17412
17161
  constructor(opts) {
17413
17162
  this.client = opts.client;
17414
- this.dimension = opts.dimension;
17415
- }
17416
- async ensureRegistry() {
17417
- if (this.registryReady) return;
17418
- await this.client.execute(`
17419
- CREATE TABLE IF NOT EXISTS registry (
17420
- scope_key TEXT PRIMARY KEY,
17421
- project_id TEXT NOT NULL,
17422
- scope_name TEXT NOT NULL,
17423
- model_id TEXT NOT NULL,
17424
- last_indexed_at TEXT NOT NULL,
17425
- vector_count INTEGER,
17426
- last_estimate_tokens INTEGER,
17427
- last_estimate_cost_usd REAL,
17428
- last_estimate_changed_chunks INTEGER
17429
- )
17430
- `);
17431
- const estimateCols = [
17432
- { name: "last_estimate_tokens", def: "INTEGER" },
17433
- { name: "last_estimate_cost_usd", def: "REAL" },
17434
- { name: "last_estimate_changed_chunks", def: "INTEGER" }
17435
- ];
17436
- for (const col of estimateCols) {
17437
- try {
17438
- await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
17439
- } catch (error) {
17440
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17441
- throw error;
17442
- }
17443
- }
17444
- }
17445
- this.registryReady = true;
17446
- }
17447
- async ensureChunks(dim) {
17448
- if (this.chunksReady) return;
17449
- const exists = await this.chunksTableExists();
17450
- if (exists) {
17451
- const currentDim = await this.getChunksDimension();
17452
- if (currentDim !== null && currentDim !== dim) {
17453
- await this.client.batch([
17454
- "DROP INDEX IF EXISTS idx",
17455
- "DROP TABLE IF EXISTS chunks"
17456
- ]);
17457
- }
17458
- }
17459
- await this.client.batch([
17460
- `CREATE TABLE IF NOT EXISTS chunks (
17461
- id TEXT PRIMARY KEY,
17462
- project_id TEXT NOT NULL,
17463
- scope_name TEXT NOT NULL,
17464
- url TEXT NOT NULL,
17465
- path TEXT NOT NULL,
17466
- title TEXT NOT NULL,
17467
- section_title TEXT NOT NULL DEFAULT '',
17468
- heading_path TEXT NOT NULL DEFAULT '[]',
17469
- snippet TEXT NOT NULL DEFAULT '',
17470
- chunk_text TEXT NOT NULL DEFAULT '',
17471
- ordinal INTEGER NOT NULL DEFAULT 0,
17472
- content_hash TEXT NOT NULL DEFAULT '',
17473
- model_id TEXT NOT NULL DEFAULT '',
17474
- depth INTEGER NOT NULL DEFAULT 0,
17475
- incoming_links INTEGER NOT NULL DEFAULT 0,
17476
- route_file TEXT NOT NULL DEFAULT '',
17477
- tags TEXT NOT NULL DEFAULT '[]',
17478
- description TEXT NOT NULL DEFAULT '',
17479
- keywords TEXT NOT NULL DEFAULT '[]',
17480
- embedding F32_BLOB(${dim})
17481
- )`,
17482
- `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17483
- ]);
17484
- this.chunksReady = true;
17485
- }
17486
- async ensurePages() {
17487
- if (this.pagesReady) return;
17488
- await this.client.execute(`
17489
- CREATE TABLE IF NOT EXISTS pages (
17490
- project_id TEXT NOT NULL,
17491
- scope_name TEXT NOT NULL,
17492
- url TEXT NOT NULL,
17493
- title TEXT NOT NULL,
17494
- markdown TEXT NOT NULL,
17495
- route_file TEXT NOT NULL DEFAULT '',
17496
- route_resolution TEXT NOT NULL DEFAULT 'exact',
17497
- incoming_links INTEGER NOT NULL DEFAULT 0,
17498
- outgoing_links INTEGER NOT NULL DEFAULT 0,
17499
- depth INTEGER NOT NULL DEFAULT 0,
17500
- tags TEXT NOT NULL DEFAULT '[]',
17501
- indexed_at TEXT NOT NULL,
17502
- PRIMARY KEY (project_id, scope_name, url)
17503
- )
17504
- `);
17505
- this.pagesReady = true;
17506
17163
  }
17507
- async chunksTableExists() {
17508
- try {
17509
- await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
17510
- return true;
17511
- } catch (error) {
17512
- if (error instanceof Error && error.message.includes("no such table")) {
17513
- return false;
17514
- }
17515
- throw error;
17516
- }
17164
+ chunkIndex(scope) {
17165
+ return this.client.index(chunkIndexName(scope));
17517
17166
  }
17518
- /**
17519
- * Read the current F32_BLOB dimension from the chunks table schema.
17520
- * Returns null if the table doesn't exist or the dimension can't be parsed.
17521
- */
17522
- async getChunksDimension() {
17523
- try {
17524
- const rs = await this.client.execute(
17525
- "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
17526
- );
17527
- if (rs.rows.length === 0) return null;
17528
- const sql = rs.rows[0].sql;
17529
- const match = sql.match(/F32_BLOB\((\d+)\)/i);
17530
- return match ? parseInt(match[1], 10) : null;
17531
- } catch {
17532
- return null;
17533
- }
17167
+ pageIndex(scope) {
17168
+ return this.client.index(pageIndexName(scope));
17534
17169
  }
17535
- /**
17536
- * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
17537
- * Used by `clean --remote` for a full reset.
17538
- */
17539
- async dropAllTables() {
17540
- await this.client.batch([
17541
- "DROP INDEX IF EXISTS idx",
17542
- "DROP TABLE IF EXISTS chunks",
17543
- "DROP TABLE IF EXISTS registry",
17544
- "DROP TABLE IF EXISTS pages"
17545
- ]);
17546
- this.chunksReady = false;
17547
- this.registryReady = false;
17548
- this.pagesReady = false;
17549
- }
17550
- async upsert(records, _scope) {
17551
- if (records.length === 0) return;
17552
- const dim = this.dimension ?? records[0].vector.length;
17553
- await this.ensureChunks(dim);
17170
+ async upsertChunks(chunks, scope) {
17171
+ if (chunks.length === 0) return;
17172
+ const index = this.chunkIndex(scope);
17554
17173
  const BATCH_SIZE = 100;
17555
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
17556
- const batch = records.slice(i, i + BATCH_SIZE);
17557
- const stmts = batch.map((r) => ({
17558
- sql: `INSERT OR REPLACE INTO chunks
17559
- (id, project_id, scope_name, url, path, title, section_title,
17560
- heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17561
- incoming_links, route_file, tags, description, keywords, embedding)
17562
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17563
- args: [
17564
- r.id,
17565
- r.metadata.projectId,
17566
- r.metadata.scopeName,
17567
- r.metadata.url,
17568
- r.metadata.path,
17569
- r.metadata.title,
17570
- r.metadata.sectionTitle,
17571
- JSON.stringify(r.metadata.headingPath),
17572
- r.metadata.snippet,
17573
- r.metadata.chunkText,
17574
- r.metadata.ordinal,
17575
- r.metadata.contentHash,
17576
- r.metadata.modelId,
17577
- r.metadata.depth,
17578
- r.metadata.incomingLinks,
17579
- r.metadata.routeFile,
17580
- JSON.stringify(r.metadata.tags),
17581
- r.metadata.description ?? "",
17582
- JSON.stringify(r.metadata.keywords ?? []),
17583
- JSON.stringify(r.vector)
17584
- ]
17585
- }));
17586
- await this.client.batch(stmts);
17174
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17175
+ const batch = chunks.slice(i, i + BATCH_SIZE);
17176
+ await index.upsert(batch);
17587
17177
  }
17588
17178
  }
17589
- async query(queryVector, opts, scope) {
17590
- const dim = this.dimension ?? queryVector.length;
17591
- await this.ensureChunks(dim);
17592
- const queryJson = JSON.stringify(queryVector);
17593
- const rs = await this.client.execute({
17594
- sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
17595
- c.section_title, c.heading_path, c.snippet, c.chunk_text,
17596
- c.ordinal, c.content_hash,
17597
- c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17598
- c.description, c.keywords,
17599
- vector_distance_cos(c.embedding, vector(?)) AS distance
17600
- FROM vector_top_k('idx', vector(?), ?) AS v
17601
- JOIN chunks AS c ON c.rowid = v.id`,
17602
- args: [queryJson, queryJson, opts.topK]
17179
+ async search(query, opts, scope) {
17180
+ const index = this.chunkIndex(scope);
17181
+ const results = await index.search({
17182
+ query,
17183
+ limit: opts.limit,
17184
+ semanticWeight: opts.semanticWeight,
17185
+ inputEnrichment: opts.inputEnrichment,
17186
+ reranking: opts.reranking,
17187
+ filter: opts.filter
17603
17188
  });
17604
- let hits = [];
17605
- for (const row of rs.rows) {
17606
- const projectId = row.project_id;
17607
- const scopeName = row.scope_name;
17608
- if (projectId !== scope.projectId || scopeName !== scope.scopeName) {
17609
- continue;
17610
- }
17611
- const rowPath = row.path;
17612
- if (opts.pathPrefix) {
17613
- const rawPrefix = opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}`;
17614
- const prefix = rawPrefix.endsWith("/") ? rawPrefix : `${rawPrefix}/`;
17615
- const normalizedPath = rowPath.replace(/\/$/, "");
17616
- const normalizedPrefix = rawPrefix.replace(/\/$/, "");
17617
- if (normalizedPath !== normalizedPrefix && !rowPath.startsWith(prefix)) {
17618
- continue;
17619
- }
17620
- }
17621
- const tags = JSON.parse(row.tags || "[]");
17622
- if (opts.tags && opts.tags.length > 0) {
17623
- if (!opts.tags.every((t) => tags.includes(t))) {
17624
- continue;
17625
- }
17189
+ return results.map((doc) => ({
17190
+ id: doc.id,
17191
+ score: doc.score,
17192
+ metadata: {
17193
+ projectId: doc.metadata?.projectId ?? "",
17194
+ scopeName: doc.metadata?.scopeName ?? "",
17195
+ url: doc.content.url,
17196
+ path: doc.metadata?.path ?? "",
17197
+ title: doc.content.title,
17198
+ sectionTitle: doc.content.sectionTitle,
17199
+ headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17200
+ snippet: doc.metadata?.snippet ?? "",
17201
+ chunkText: doc.content.text,
17202
+ ordinal: doc.metadata?.ordinal ?? 0,
17203
+ contentHash: doc.metadata?.contentHash ?? "",
17204
+ depth: doc.metadata?.depth ?? 0,
17205
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17206
+ routeFile: doc.metadata?.routeFile ?? "",
17207
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17208
+ description: doc.metadata?.description || void 0,
17209
+ keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17626
17210
  }
17627
- const distance = row.distance;
17628
- const score = 1 - distance;
17629
- const description = row.description || void 0;
17630
- const keywords = (() => {
17631
- const raw = row.keywords || "[]";
17632
- const parsed = JSON.parse(raw);
17633
- return parsed.length > 0 ? parsed : void 0;
17634
- })();
17635
- hits.push({
17636
- id: row.id,
17637
- score,
17638
- metadata: {
17639
- projectId,
17640
- scopeName,
17641
- url: row.url,
17642
- path: rowPath,
17643
- title: row.title,
17644
- sectionTitle: row.section_title,
17645
- headingPath: JSON.parse(row.heading_path || "[]"),
17646
- snippet: row.snippet,
17647
- chunkText: row.chunk_text || "",
17648
- ordinal: row.ordinal || 0,
17649
- contentHash: row.content_hash,
17650
- modelId: row.model_id,
17651
- depth: row.depth,
17652
- incomingLinks: row.incoming_links,
17653
- routeFile: row.route_file,
17654
- tags,
17655
- description,
17656
- keywords
17657
- }
17211
+ }));
17212
+ }
17213
+ async searchPages(query, opts, scope) {
17214
+ const index = this.pageIndex(scope);
17215
+ let results;
17216
+ try {
17217
+ results = await index.search({
17218
+ query,
17219
+ limit: opts.limit,
17220
+ semanticWeight: opts.semanticWeight,
17221
+ inputEnrichment: opts.inputEnrichment,
17222
+ reranking: true,
17223
+ filter: opts.filter
17658
17224
  });
17225
+ } catch {
17226
+ return [];
17659
17227
  }
17660
- hits.sort((a, b) => b.score - a.score);
17661
- return hits;
17228
+ return results.map((doc) => ({
17229
+ id: doc.id,
17230
+ score: doc.score,
17231
+ title: doc.content.title,
17232
+ url: doc.content.url,
17233
+ description: doc.content.description ?? "",
17234
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17235
+ depth: doc.metadata?.depth ?? 0,
17236
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17237
+ routeFile: doc.metadata?.routeFile ?? ""
17238
+ }));
17662
17239
  }
17663
17240
  async deleteByIds(ids, scope) {
17664
17241
  if (ids.length === 0) return;
17242
+ const index = this.chunkIndex(scope);
17665
17243
  const BATCH_SIZE = 500;
17666
17244
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17667
17245
  const batch = ids.slice(i, i + BATCH_SIZE);
17668
- const placeholders = batch.map(() => "?").join(", ");
17669
- await this.client.execute({
17670
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
17671
- args: [scope.projectId, scope.scopeName, ...batch]
17672
- });
17246
+ await index.delete(batch);
17673
17247
  }
17674
17248
  }
17675
17249
  async deleteScope(scope) {
17676
- await this.ensureRegistry();
17677
17250
  try {
17678
- await this.client.execute({
17679
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ?`,
17680
- args: [scope.projectId, scope.scopeName]
17681
- });
17682
- } catch (error) {
17683
- if (error instanceof Error && !error.message.includes("no such table")) {
17684
- throw error;
17685
- }
17251
+ const chunkIdx = this.chunkIndex(scope);
17252
+ await chunkIdx.deleteIndex();
17253
+ } catch {
17686
17254
  }
17687
17255
  try {
17688
- await this.client.execute({
17689
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17690
- args: [scope.projectId, scope.scopeName]
17691
- });
17692
- } catch (error) {
17693
- if (error instanceof Error && !error.message.includes("no such table")) {
17694
- throw error;
17695
- }
17256
+ const pageIdx = this.pageIndex(scope);
17257
+ await pageIdx.deleteIndex();
17258
+ } catch {
17696
17259
  }
17697
- await this.client.execute({
17698
- sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
17699
- args: [scope.projectId, scope.scopeName]
17700
- });
17701
- }
17702
- async listScopes(scopeProjectId) {
17703
- await this.ensureRegistry();
17704
- const rs = await this.client.execute({
17705
- sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
17706
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
17707
- FROM registry WHERE project_id = ?`,
17708
- args: [scopeProjectId]
17709
- });
17710
- return rs.rows.map((row) => ({
17711
- projectId: row.project_id,
17712
- scopeName: row.scope_name,
17713
- modelId: row.model_id,
17714
- lastIndexedAt: row.last_indexed_at,
17715
- vectorCount: row.vector_count,
17716
- lastEstimateTokens: row.last_estimate_tokens,
17717
- lastEstimateCostUSD: row.last_estimate_cost_usd,
17718
- lastEstimateChangedChunks: row.last_estimate_changed_chunks
17719
- }));
17720
17260
  }
17721
- async recordScope(info) {
17722
- await this.ensureRegistry();
17723
- const key = `${info.projectId}:${info.scopeName}`;
17724
- await this.client.execute({
17725
- sql: `INSERT OR REPLACE INTO registry
17726
- (scope_key, project_id, scope_name, model_id, last_indexed_at, vector_count,
17727
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks)
17728
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17729
- args: [
17730
- key,
17731
- info.projectId,
17732
- info.scopeName,
17733
- info.modelId,
17734
- info.lastIndexedAt,
17735
- info.vectorCount ?? null,
17736
- info.lastEstimateTokens ?? null,
17737
- info.lastEstimateCostUSD ?? null,
17738
- info.lastEstimateChangedChunks ?? null
17739
- ]
17740
- });
17261
+ async listScopes(projectId) {
17262
+ const allIndexes = await this.client.listIndexes();
17263
+ const prefix = `${projectId}--`;
17264
+ const scopeNames = /* @__PURE__ */ new Set();
17265
+ for (const name of allIndexes) {
17266
+ if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17267
+ const scopeName = name.slice(prefix.length);
17268
+ scopeNames.add(scopeName);
17269
+ }
17270
+ }
17271
+ const scopes = [];
17272
+ for (const scopeName of scopeNames) {
17273
+ const scope = {
17274
+ projectId,
17275
+ scopeName,
17276
+ scopeId: `${projectId}:${scopeName}`
17277
+ };
17278
+ try {
17279
+ const info = await this.chunkIndex(scope).info();
17280
+ scopes.push({
17281
+ projectId,
17282
+ scopeName,
17283
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17284
+ documentCount: info.documentCount
17285
+ });
17286
+ } catch {
17287
+ scopes.push({
17288
+ projectId,
17289
+ scopeName,
17290
+ lastIndexedAt: "unknown",
17291
+ documentCount: 0
17292
+ });
17293
+ }
17294
+ }
17295
+ return scopes;
17741
17296
  }
17742
17297
  async getContentHashes(scope) {
17743
- const exists = await this.chunksTableExists();
17744
- if (!exists) return /* @__PURE__ */ new Map();
17745
- const rs = await this.client.execute({
17746
- sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
17747
- args: [scope.projectId, scope.scopeName]
17748
- });
17749
17298
  const map = /* @__PURE__ */ new Map();
17750
- for (const row of rs.rows) {
17751
- map.set(row.id, row.content_hash);
17299
+ const index = this.chunkIndex(scope);
17300
+ let cursor = "0";
17301
+ try {
17302
+ for (; ; ) {
17303
+ const result = await index.range({ cursor, limit: 100 });
17304
+ for (const doc of result.documents) {
17305
+ if (doc.metadata?.contentHash) {
17306
+ map.set(doc.id, doc.metadata.contentHash);
17307
+ }
17308
+ }
17309
+ if (!result.nextCursor || result.nextCursor === "0") break;
17310
+ cursor = result.nextCursor;
17311
+ }
17312
+ } catch {
17752
17313
  }
17753
17314
  return map;
17754
17315
  }
17755
17316
  async upsertPages(pages, scope) {
17756
17317
  if (pages.length === 0) return;
17757
- await this.ensurePages();
17758
- for (const page of pages) {
17759
- if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
17760
- throw new Error(
17761
- `Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
17762
- );
17763
- }
17764
- }
17765
- const BATCH_SIZE = 100;
17318
+ const index = this.pageIndex(scope);
17319
+ const BATCH_SIZE = 50;
17766
17320
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17767
17321
  const batch = pages.slice(i, i + BATCH_SIZE);
17768
- const stmts = batch.map((p) => ({
17769
- sql: `INSERT OR REPLACE INTO pages
17770
- (project_id, scope_name, url, title, markdown, route_file,
17771
- route_resolution, incoming_links, outgoing_links, depth, tags, indexed_at)
17772
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17773
- args: [
17774
- p.projectId,
17775
- p.scopeName,
17776
- p.url,
17777
- p.title,
17778
- p.markdown,
17779
- p.routeFile,
17780
- p.routeResolution,
17781
- p.incomingLinks,
17782
- p.outgoingLinks,
17783
- p.depth,
17784
- JSON.stringify(p.tags),
17785
- p.indexedAt
17786
- ]
17322
+ const docs = batch.map((p) => ({
17323
+ id: p.url,
17324
+ content: {
17325
+ title: p.title,
17326
+ url: p.url,
17327
+ type: "page",
17328
+ description: p.description ?? "",
17329
+ keywords: (p.keywords ?? []).join(","),
17330
+ summary: p.summary ?? "",
17331
+ tags: p.tags.join(",")
17332
+ },
17333
+ metadata: {
17334
+ markdown: p.markdown,
17335
+ projectId: p.projectId,
17336
+ scopeName: p.scopeName,
17337
+ routeFile: p.routeFile,
17338
+ routeResolution: p.routeResolution,
17339
+ incomingLinks: p.incomingLinks,
17340
+ outgoingLinks: p.outgoingLinks,
17341
+ depth: p.depth,
17342
+ indexedAt: p.indexedAt
17343
+ }
17787
17344
  }));
17788
- await this.client.batch(stmts);
17345
+ await index.upsert(docs);
17789
17346
  }
17790
17347
  }
17791
17348
  async getPage(url, scope) {
17792
- await this.ensurePages();
17793
- const rs = await this.client.execute({
17794
- sql: `SELECT * FROM pages WHERE project_id = ? AND scope_name = ? AND url = ?`,
17795
- args: [scope.projectId, scope.scopeName, url]
17796
- });
17797
- if (rs.rows.length === 0) return null;
17798
- const row = rs.rows[0];
17799
- return {
17800
- url: row.url,
17801
- title: row.title,
17802
- markdown: row.markdown,
17803
- projectId: row.project_id,
17804
- scopeName: row.scope_name,
17805
- routeFile: row.route_file,
17806
- routeResolution: row.route_resolution,
17807
- incomingLinks: row.incoming_links,
17808
- outgoingLinks: row.outgoing_links,
17809
- depth: row.depth,
17810
- tags: JSON.parse(row.tags || "[]"),
17811
- indexedAt: row.indexed_at
17812
- };
17349
+ const index = this.pageIndex(scope);
17350
+ try {
17351
+ const results = await index.fetch([url]);
17352
+ const doc = results[0];
17353
+ if (!doc) return null;
17354
+ return {
17355
+ url: doc.content.url,
17356
+ title: doc.content.title,
17357
+ markdown: doc.metadata.markdown,
17358
+ projectId: doc.metadata.projectId,
17359
+ scopeName: doc.metadata.scopeName,
17360
+ routeFile: doc.metadata.routeFile,
17361
+ routeResolution: doc.metadata.routeResolution,
17362
+ incomingLinks: doc.metadata.incomingLinks,
17363
+ outgoingLinks: doc.metadata.outgoingLinks,
17364
+ depth: doc.metadata.depth,
17365
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17366
+ indexedAt: doc.metadata.indexedAt,
17367
+ summary: doc.content.summary || void 0,
17368
+ description: doc.content.description || void 0,
17369
+ keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17370
+ };
17371
+ } catch {
17372
+ return null;
17373
+ }
17813
17374
  }
17814
17375
  async deletePages(scope) {
17815
- await this.ensurePages();
17816
- await this.client.execute({
17817
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17818
- args: [scope.projectId, scope.scopeName]
17819
- });
17820
- }
17821
- async getScopeModelId(scope) {
17822
- await this.ensureRegistry();
17823
- const rs = await this.client.execute({
17824
- sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
17825
- args: [scope.projectId, scope.scopeName]
17826
- });
17827
- if (rs.rows.length === 0) return null;
17828
- return rs.rows[0].model_id;
17376
+ try {
17377
+ const index = this.pageIndex(scope);
17378
+ await index.reset();
17379
+ } catch {
17380
+ }
17829
17381
  }
17830
17382
  async health() {
17831
17383
  try {
17832
- await this.client.execute("SELECT 1");
17384
+ await this.client.info();
17833
17385
  return { ok: true };
17834
17386
  } catch (error) {
17835
17387
  return {
@@ -17838,40 +17390,34 @@ var TursoVectorStore = class {
17838
17390
  };
17839
17391
  }
17840
17392
  }
17393
+ async dropAllIndexes(projectId) {
17394
+ const allIndexes = await this.client.listIndexes();
17395
+ const prefix = `${projectId}--`;
17396
+ for (const name of allIndexes) {
17397
+ if (name.startsWith(prefix)) {
17398
+ try {
17399
+ const index = this.client.index(name);
17400
+ await index.deleteIndex();
17401
+ } catch {
17402
+ }
17403
+ }
17404
+ }
17405
+ }
17841
17406
  };
17842
17407
 
17843
17408
  // src/vector/factory.ts
17844
- async function createVectorStore(config, cwd) {
17845
- const turso = config.vector.turso;
17846
- const remoteUrl = turso.url ?? process.env[turso.urlEnv];
17847
- if (remoteUrl) {
17848
- const { createClient: createClient2 } = await import('@libsql/client/http');
17849
- const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
17850
- const client2 = createClient2({
17851
- url: remoteUrl,
17852
- authToken
17853
- });
17854
- return new TursoVectorStore({
17855
- client: client2,
17856
- dimension: config.vector.dimension
17857
- });
17858
- }
17859
- if (isServerless()) {
17409
+ async function createUpstashStore(config) {
17410
+ const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
17411
+ const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
17412
+ if (!url || !token) {
17860
17413
  throw new SearchSocketError(
17861
17414
  "VECTOR_BACKEND_UNAVAILABLE",
17862
- `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
17415
+ `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17863
17416
  );
17864
17417
  }
17865
- const { createClient } = await import('@libsql/client');
17866
- const localPath = path__default.default.resolve(cwd, turso.localPath);
17867
- fs__default.default.mkdirSync(path__default.default.dirname(localPath), { recursive: true });
17868
- const client = createClient({
17869
- url: `file:${localPath}`
17870
- });
17871
- return new TursoVectorStore({
17872
- client,
17873
- dimension: config.vector.dimension
17874
- });
17418
+ const { Search } = await import('@upstash/search');
17419
+ const client = new Search({ url, token });
17420
+ return new UpstashSearchStore({ client });
17875
17421
  }
17876
17422
 
17877
17423
  // src/utils/pattern.ts
@@ -17911,7 +17457,12 @@ function nonNegativeOrZero(value) {
17911
17457
  }
17912
17458
  return Math.max(0, value);
17913
17459
  }
17914
- function rankHits(hits, config) {
17460
+ function normalizeForTitleMatch(text) {
17461
+ return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
17462
+ }
17463
+ function rankHits(hits, config, query) {
17464
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
17465
+ const titleMatchWeight = config.ranking.weights.titleMatch;
17915
17466
  return hits.map((hit) => {
17916
17467
  let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
17917
17468
  if (config.ranking.enableIncomingLinkBoost) {
@@ -17922,6 +17473,12 @@ function rankHits(hits, config) {
17922
17473
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
17923
17474
  score += depthBoost * config.ranking.weights.depth;
17924
17475
  }
17476
+ if (normalizedQuery && titleMatchWeight > 0) {
17477
+ const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
17478
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
17479
+ score += titleMatchWeight;
17480
+ }
17481
+ }
17925
17482
  return {
17926
17483
  hit,
17927
17484
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
@@ -17931,6 +17488,30 @@ function rankHits(hits, config) {
17931
17488
  return Number.isNaN(delta) ? 0 : delta;
17932
17489
  });
17933
17490
  }
17491
+ function trimByScoreGap(results, config) {
17492
+ if (results.length === 0) return results;
17493
+ const threshold = config.ranking.scoreGapThreshold;
17494
+ const minScore = config.ranking.minScore;
17495
+ if (minScore > 0 && results.length > 0) {
17496
+ const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
17497
+ const mid = Math.floor(sortedScores.length / 2);
17498
+ const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
17499
+ if (median < minScore) return [];
17500
+ }
17501
+ if (threshold > 0 && results.length > 1) {
17502
+ for (let i = 1; i < results.length; i++) {
17503
+ const prev = results[i - 1].pageScore;
17504
+ const current = results[i].pageScore;
17505
+ if (prev > 0) {
17506
+ const gap = (prev - current) / prev;
17507
+ if (gap >= threshold) {
17508
+ return results.slice(0, i);
17509
+ }
17510
+ }
17511
+ }
17512
+ }
17513
+ return results;
17514
+ }
17934
17515
  function findPageWeight(url, pageWeights) {
17935
17516
  let bestPattern = "";
17936
17517
  let bestWeight = 1;
@@ -17985,6 +17566,61 @@ function aggregateByPage(ranked, config) {
17985
17566
  return Number.isNaN(delta) ? 0 : delta;
17986
17567
  });
17987
17568
  }
17569
+ function mergePageAndChunkResults(pageHits, rankedChunks, config) {
17570
+ if (pageHits.length === 0) return rankedChunks;
17571
+ const w = config.search.pageSearchWeight;
17572
+ const pageScoreMap = /* @__PURE__ */ new Map();
17573
+ for (const ph of pageHits) {
17574
+ pageScoreMap.set(ph.url, ph);
17575
+ }
17576
+ const pagesWithChunks = /* @__PURE__ */ new Set();
17577
+ const merged = rankedChunks.map((ranked) => {
17578
+ const url = ranked.hit.metadata.url;
17579
+ const pageHit = pageScoreMap.get(url);
17580
+ if (pageHit) {
17581
+ pagesWithChunks.add(url);
17582
+ const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
17583
+ return {
17584
+ hit: ranked.hit,
17585
+ finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
17586
+ };
17587
+ }
17588
+ return ranked;
17589
+ });
17590
+ for (const [url, pageHit] of pageScoreMap) {
17591
+ if (pagesWithChunks.has(url)) continue;
17592
+ const syntheticScore = pageHit.score * w;
17593
+ const syntheticHit = {
17594
+ id: `page:${url}`,
17595
+ score: pageHit.score,
17596
+ metadata: {
17597
+ projectId: "",
17598
+ scopeName: "",
17599
+ url: pageHit.url,
17600
+ path: pageHit.url,
17601
+ title: pageHit.title,
17602
+ sectionTitle: "",
17603
+ headingPath: [],
17604
+ snippet: pageHit.description || pageHit.title,
17605
+ chunkText: pageHit.description || pageHit.title,
17606
+ ordinal: 0,
17607
+ contentHash: "",
17608
+ depth: pageHit.depth,
17609
+ incomingLinks: pageHit.incomingLinks,
17610
+ routeFile: pageHit.routeFile,
17611
+ tags: pageHit.tags
17612
+ }
17613
+ };
17614
+ merged.push({
17615
+ hit: syntheticHit,
17616
+ finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
17617
+ });
17618
+ }
17619
+ return merged.sort((a, b) => {
17620
+ const delta = b.finalScore - a.finalScore;
17621
+ return Number.isNaN(delta) ? 0 : delta;
17622
+ });
17623
+ }
17988
17624
 
17989
17625
  // src/search/engine.ts
17990
17626
  var requestSchema = zod.z.object({
@@ -17993,35 +17629,25 @@ var requestSchema = zod.z.object({
17993
17629
  scope: zod.z.string().optional(),
17994
17630
  pathPrefix: zod.z.string().optional(),
17995
17631
  tags: zod.z.array(zod.z.string()).optional(),
17996
- rerank: zod.z.boolean().optional(),
17997
- groupBy: zod.z.enum(["page", "chunk"]).optional(),
17998
- stream: zod.z.boolean().optional()
17632
+ groupBy: zod.z.enum(["page", "chunk"]).optional()
17999
17633
  });
18000
17634
  var SearchEngine = class _SearchEngine {
18001
17635
  cwd;
18002
17636
  config;
18003
- embeddings;
18004
- vectorStore;
18005
- reranker;
17637
+ store;
18006
17638
  constructor(options) {
18007
17639
  this.cwd = options.cwd;
18008
17640
  this.config = options.config;
18009
- this.embeddings = options.embeddings;
18010
- this.vectorStore = options.vectorStore;
18011
- this.reranker = options.reranker;
17641
+ this.store = options.store;
18012
17642
  }
18013
17643
  static async create(options = {}) {
18014
17644
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
18015
17645
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
18016
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
18017
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
18018
- const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
17646
+ const store = options.store ?? await createUpstashStore(config);
18019
17647
  return new _SearchEngine({
18020
17648
  cwd,
18021
17649
  config,
18022
- embeddings,
18023
- vectorStore,
18024
- reranker
17650
+ store
18025
17651
  });
18026
17652
  }
18027
17653
  getConfig() {
@@ -18035,142 +17661,90 @@ var SearchEngine = class _SearchEngine {
18035
17661
  const input = parsed.data;
18036
17662
  const totalStart = process.hrtime.bigint();
18037
17663
  const resolvedScope = resolveScope(this.config, input.scope);
18038
- await this.assertModelCompatibility(resolvedScope);
18039
17664
  const topK = input.topK ?? 10;
18040
- const wantsRerank = Boolean(input.rerank);
18041
17665
  const groupByPage = (input.groupBy ?? "page") === "page";
18042
17666
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
18043
- const embedStart = process.hrtime.bigint();
18044
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
18045
- const queryVector = queryEmbeddings[0];
18046
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
18047
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
18048
- }
18049
- const embedMs = hrTimeMs(embedStart);
18050
- const vectorStart = process.hrtime.bigint();
18051
- const hits = await this.vectorStore.query(
18052
- queryVector,
18053
- {
18054
- topK: candidateK,
18055
- pathPrefix: input.pathPrefix,
18056
- tags: input.tags
18057
- },
18058
- resolvedScope
18059
- );
18060
- const vectorMs = hrTimeMs(vectorStart);
18061
- const ranked = rankHits(hits, this.config);
18062
- let usedRerank = false;
18063
- let rerankMs = 0;
18064
- let ordered = ranked;
18065
- if (wantsRerank) {
18066
- const rerankStart = process.hrtime.bigint();
18067
- ordered = await this.rerankHits(input.q, ranked, topK);
18068
- rerankMs = hrTimeMs(rerankStart);
18069
- usedRerank = true;
17667
+ const filterParts = [];
17668
+ if (input.pathPrefix) {
17669
+ const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
17670
+ filterParts.push(`url GLOB '${prefix}*'`);
17671
+ }
17672
+ if (input.tags && input.tags.length > 0) {
17673
+ for (const tag of input.tags) {
17674
+ filterParts.push(`tags GLOB '*${tag}*'`);
17675
+ }
17676
+ }
17677
+ const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
17678
+ const useDualSearch = this.config.search.dualSearch && groupByPage;
17679
+ const searchStart = process.hrtime.bigint();
17680
+ let ranked;
17681
+ if (useDualSearch) {
17682
+ const chunkLimit = Math.max(topK * 10, 100);
17683
+ const pageLimit = 20;
17684
+ const [pageHits, chunkHits] = await Promise.all([
17685
+ this.store.searchPages(
17686
+ input.q,
17687
+ {
17688
+ limit: pageLimit,
17689
+ semanticWeight: this.config.search.semanticWeight,
17690
+ inputEnrichment: this.config.search.inputEnrichment,
17691
+ filter
17692
+ },
17693
+ resolvedScope
17694
+ ),
17695
+ this.store.search(
17696
+ input.q,
17697
+ {
17698
+ limit: chunkLimit,
17699
+ semanticWeight: this.config.search.semanticWeight,
17700
+ inputEnrichment: this.config.search.inputEnrichment,
17701
+ reranking: false,
17702
+ filter
17703
+ },
17704
+ resolvedScope
17705
+ )
17706
+ ]);
17707
+ const rankedChunks = rankHits(chunkHits, this.config, input.q);
17708
+ ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
17709
+ } else {
17710
+ const hits = await this.store.search(
17711
+ input.q,
17712
+ {
17713
+ limit: candidateK,
17714
+ semanticWeight: this.config.search.semanticWeight,
17715
+ inputEnrichment: this.config.search.inputEnrichment,
17716
+ reranking: this.config.search.reranking,
17717
+ filter
17718
+ },
17719
+ resolvedScope
17720
+ );
17721
+ ranked = rankHits(hits, this.config, input.q);
18070
17722
  }
18071
- const results = this.buildResults(ordered, topK, groupByPage);
17723
+ const searchMs = hrTimeMs(searchStart);
17724
+ const results = this.buildResults(ranked, topK, groupByPage, input.q);
18072
17725
  return {
18073
17726
  q: input.q,
18074
17727
  scope: resolvedScope.scopeName,
18075
17728
  results,
18076
17729
  meta: {
18077
17730
  timingsMs: {
18078
- embed: Math.round(embedMs),
18079
- vector: Math.round(vectorMs),
18080
- rerank: Math.round(rerankMs),
17731
+ search: Math.round(searchMs),
18081
17732
  total: Math.round(hrTimeMs(totalStart))
18082
- },
18083
- usedRerank,
18084
- modelId: this.config.embeddings.model
18085
- }
18086
- };
18087
- }
18088
- async *searchStreaming(request) {
18089
- const parsed = requestSchema.safeParse(request);
18090
- if (!parsed.success) {
18091
- throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
18092
- }
18093
- const input = parsed.data;
18094
- const wantsRerank = Boolean(input.rerank);
18095
- if (!wantsRerank) {
18096
- const response = await this.search(request);
18097
- yield { phase: "initial", data: response };
18098
- return;
18099
- }
18100
- const totalStart = process.hrtime.bigint();
18101
- const resolvedScope = resolveScope(this.config, input.scope);
18102
- await this.assertModelCompatibility(resolvedScope);
18103
- const topK = input.topK ?? 10;
18104
- const groupByPage = (input.groupBy ?? "page") === "page";
18105
- const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
18106
- const embedStart = process.hrtime.bigint();
18107
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
18108
- const queryVector = queryEmbeddings[0];
18109
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
18110
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
18111
- }
18112
- const embedMs = hrTimeMs(embedStart);
18113
- const vectorStart = process.hrtime.bigint();
18114
- const hits = await this.vectorStore.query(
18115
- queryVector,
18116
- {
18117
- topK: candidateK,
18118
- pathPrefix: input.pathPrefix,
18119
- tags: input.tags
18120
- },
18121
- resolvedScope
18122
- );
18123
- const vectorMs = hrTimeMs(vectorStart);
18124
- const ranked = rankHits(hits, this.config);
18125
- const initialResults = this.buildResults(ranked, topK, groupByPage);
18126
- yield {
18127
- phase: "initial",
18128
- data: {
18129
- q: input.q,
18130
- scope: resolvedScope.scopeName,
18131
- results: initialResults,
18132
- meta: {
18133
- timingsMs: {
18134
- embed: Math.round(embedMs),
18135
- vector: Math.round(vectorMs),
18136
- rerank: 0,
18137
- total: Math.round(hrTimeMs(totalStart))
18138
- },
18139
- usedRerank: false,
18140
- modelId: this.config.embeddings.model
18141
- }
18142
- }
18143
- };
18144
- const rerankStart = process.hrtime.bigint();
18145
- const reranked = await this.rerankHits(input.q, ranked, topK);
18146
- const rerankMs = hrTimeMs(rerankStart);
18147
- const rerankedResults = this.buildResults(reranked, topK, groupByPage);
18148
- yield {
18149
- phase: "reranked",
18150
- data: {
18151
- q: input.q,
18152
- scope: resolvedScope.scopeName,
18153
- results: rerankedResults,
18154
- meta: {
18155
- timingsMs: {
18156
- embed: Math.round(embedMs),
18157
- vector: Math.round(vectorMs),
18158
- rerank: Math.round(rerankMs),
18159
- total: Math.round(hrTimeMs(totalStart))
18160
- },
18161
- usedRerank: true,
18162
- modelId: this.config.embeddings.model
18163
17733
  }
18164
17734
  }
18165
17735
  };
18166
17736
  }
18167
- buildResults(ordered, topK, groupByPage) {
18168
- const minScore = this.config.ranking.minScore;
17737
+ ensureSnippet(hit) {
17738
+ const snippet = hit.hit.metadata.snippet;
17739
+ if (snippet && snippet.length >= 30) return snippet;
17740
+ const chunkText = hit.hit.metadata.chunkText;
17741
+ if (chunkText) return toSnippet(chunkText);
17742
+ return snippet || "";
17743
+ }
17744
+ buildResults(ordered, topK, groupByPage, _query) {
18169
17745
  if (groupByPage) {
18170
17746
  let pages = aggregateByPage(ordered, this.config);
18171
- if (minScore > 0) {
18172
- pages = pages.filter((p) => p.pageScore >= minScore);
18173
- }
17747
+ pages = trimByScoreGap(pages, this.config);
18174
17748
  const minRatio = this.config.ranking.minChunkScoreRatio;
18175
17749
  return pages.slice(0, topK).map((page) => {
18176
17750
  const bestScore = page.bestChunk.finalScore;
@@ -18180,12 +17754,12 @@ var SearchEngine = class _SearchEngine {
18180
17754
  url: page.url,
18181
17755
  title: page.title,
18182
17756
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
18183
- snippet: page.bestChunk.hit.metadata.snippet,
17757
+ snippet: this.ensureSnippet(page.bestChunk),
18184
17758
  score: Number(page.pageScore.toFixed(6)),
18185
17759
  routeFile: page.routeFile,
18186
17760
  chunks: meaningful.length > 1 ? meaningful.map((c) => ({
18187
17761
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
18188
- snippet: c.hit.metadata.snippet,
17762
+ snippet: this.ensureSnippet(c),
18189
17763
  headingPath: c.hit.metadata.headingPath,
18190
17764
  score: Number(c.finalScore.toFixed(6))
18191
17765
  })) : void 0
@@ -18193,6 +17767,7 @@ var SearchEngine = class _SearchEngine {
18193
17767
  });
18194
17768
  } else {
18195
17769
  let filtered = ordered;
17770
+ const minScore = this.config.ranking.minScore;
18196
17771
  if (minScore > 0) {
18197
17772
  filtered = ordered.filter((entry) => entry.finalScore >= minScore);
18198
17773
  }
@@ -18200,7 +17775,7 @@ var SearchEngine = class _SearchEngine {
18200
17775
  url: hit.metadata.url,
18201
17776
  title: hit.metadata.title,
18202
17777
  sectionTitle: hit.metadata.sectionTitle || void 0,
18203
- snippet: hit.metadata.snippet,
17778
+ snippet: this.ensureSnippet({ hit, finalScore }),
18204
17779
  score: Number(finalScore.toFixed(6)),
18205
17780
  routeFile: hit.metadata.routeFile
18206
17781
  }));
@@ -18209,7 +17784,7 @@ var SearchEngine = class _SearchEngine {
18209
17784
  async getPage(pathOrUrl, scope) {
18210
17785
  const resolvedScope = resolveScope(this.config, scope);
18211
17786
  const urlPath = this.resolveInputPath(pathOrUrl);
18212
- const page = await this.vectorStore.getPage(urlPath, resolvedScope);
17787
+ const page = await this.store.getPage(urlPath, resolvedScope);
18213
17788
  if (!page) {
18214
17789
  throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
18215
17790
  }
@@ -18230,7 +17805,7 @@ var SearchEngine = class _SearchEngine {
18230
17805
  };
18231
17806
  }
18232
17807
  async health() {
18233
- return this.vectorStore.health();
17808
+ return this.store.health();
18234
17809
  }
18235
17810
  resolveInputPath(pathOrUrl) {
18236
17811
  try {
@@ -18242,90 +17817,6 @@ var SearchEngine = class _SearchEngine {
18242
17817
  const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
18243
17818
  return normalizeUrlPath(withoutQueryOrHash);
18244
17819
  }
18245
- async assertModelCompatibility(scope) {
18246
- const modelId = await this.vectorStore.getScopeModelId(scope);
18247
- if (modelId && modelId !== this.config.embeddings.model) {
18248
- throw new SearchSocketError(
18249
- "EMBEDDING_MODEL_MISMATCH",
18250
- `Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
18251
- );
18252
- }
18253
- }
18254
- async rerankHits(query, ranked, topK) {
18255
- if (!this.config.rerank.enabled) {
18256
- throw new SearchSocketError(
18257
- "INVALID_REQUEST",
18258
- "rerank=true requested but rerank.enabled is not set to true.",
18259
- 400
18260
- );
18261
- }
18262
- if (!this.reranker) {
18263
- throw new SearchSocketError(
18264
- "CONFIG_MISSING",
18265
- `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
18266
- 400
18267
- );
18268
- }
18269
- const pageGroups = /* @__PURE__ */ new Map();
18270
- for (const entry of ranked) {
18271
- const url = entry.hit.metadata.url;
18272
- const group = pageGroups.get(url);
18273
- if (group) group.push(entry);
18274
- else pageGroups.set(url, [entry]);
18275
- }
18276
- const MAX_CHUNKS_PER_PAGE = 5;
18277
- const MIN_CHUNKS_PER_PAGE = 1;
18278
- const MIN_CHUNK_SCORE_RATIO = 0.5;
18279
- const MAX_DOC_CHARS = 2e3;
18280
- const pageCandidates = [];
18281
- for (const [url, chunks] of pageGroups) {
18282
- const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
18283
- const bestScore = byScore[0].finalScore;
18284
- const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
18285
- const selected = byScore.filter(
18286
- (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
18287
- ).slice(0, MAX_CHUNKS_PER_PAGE);
18288
- selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
18289
- const first = selected[0].hit.metadata;
18290
- const parts = [first.title];
18291
- if (first.description) {
18292
- parts.push(first.description);
18293
- }
18294
- if (first.keywords && first.keywords.length > 0) {
18295
- parts.push(first.keywords.join(", "));
18296
- }
18297
- const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18298
- parts.push(body);
18299
- let text = parts.join("\n\n");
18300
- if (text.length > MAX_DOC_CHARS) {
18301
- text = text.slice(0, MAX_DOC_CHARS);
18302
- }
18303
- pageCandidates.push({ id: url, text });
18304
- }
18305
- const maxCandidates = Math.max(topK, this.config.rerank.topN);
18306
- const cappedCandidates = pageCandidates.slice(0, maxCandidates);
18307
- const reranked = await this.reranker.rerank(
18308
- query,
18309
- cappedCandidates,
18310
- maxCandidates
18311
- );
18312
- const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
18313
- return ranked.map((entry) => {
18314
- const pageScore = scoreByUrl.get(entry.hit.metadata.url);
18315
- const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
18316
- if (pageScore === void 0 || !Number.isFinite(pageScore)) {
18317
- return { ...entry, finalScore: base };
18318
- }
18319
- const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
18320
- return {
18321
- ...entry,
18322
- finalScore: Number.isFinite(combined) ? combined : base
18323
- };
18324
- }).sort((a, b) => {
18325
- const delta = b.finalScore - a.finalScore;
18326
- return Number.isNaN(delta) ? 0 : delta;
18327
- });
18328
- }
18329
17820
  };
18330
17821
 
18331
17822
  // src/sveltekit/handle.ts
@@ -18480,42 +17971,6 @@ function searchsocketHandle(options = {}) {
18480
17971
  }
18481
17972
  const engine = await getEngine();
18482
17973
  const searchRequest = body;
18483
- if (searchRequest.stream && searchRequest.rerank) {
18484
- const encoder = new TextEncoder();
18485
- const stream = new ReadableStream({
18486
- async start(controller) {
18487
- try {
18488
- for await (const event2 of engine.searchStreaming(searchRequest)) {
18489
- const line = JSON.stringify(event2) + "\n";
18490
- controller.enqueue(encoder.encode(line));
18491
- }
18492
- } catch (streamError) {
18493
- const errorEvent = {
18494
- phase: "error",
18495
- data: {
18496
- error: {
18497
- code: streamError instanceof SearchSocketError ? streamError.code : "INTERNAL_ERROR",
18498
- message: streamError instanceof Error ? streamError.message : "Unknown error"
18499
- }
18500
- }
18501
- };
18502
- controller.enqueue(encoder.encode(JSON.stringify(errorEvent) + "\n"));
18503
- } finally {
18504
- controller.close();
18505
- }
18506
- }
18507
- });
18508
- return withCors(
18509
- new Response(stream, {
18510
- status: 200,
18511
- headers: {
18512
- "content-type": "application/x-ndjson"
18513
- }
18514
- }),
18515
- event.request,
18516
- config
18517
- );
18518
- }
18519
17974
  const result = await engine.search(searchRequest);
18520
17975
  return withCors(
18521
17976
  new Response(JSON.stringify(result), {
@@ -18576,9 +18031,8 @@ function withCors(response, request, config) {
18576
18031
  }
18577
18032
  function ensureStateDirs(cwd, stateDir, scope) {
18578
18033
  const statePath = path__default.default.resolve(cwd, stateDir);
18579
- const pagesPath = path__default.default.join(statePath, "pages", scope.scopeName);
18580
- fs__default.default.mkdirSync(pagesPath, { recursive: true });
18581
- return { statePath, pagesPath };
18034
+ fs__default.default.mkdirSync(statePath, { recursive: true });
18035
+ return { statePath };
18582
18036
  }
18583
18037
  function sha1(input) {
18584
18038
  return crypto.createHash("sha1").update(input).digest("hex");
@@ -18828,7 +18282,7 @@ function buildEmbeddingText(chunk, prependTitle) {
18828
18282
 
18829
18283
  ${chunk.chunkText}`;
18830
18284
  }
18831
- function chunkMirrorPage(page, config, scope) {
18285
+ function chunkPage(page, config, scope) {
18832
18286
  const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
18833
18287
  const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
18834
18288
  const chunks = [];
@@ -19859,53 +19313,6 @@ function extractFromMarkdown(url, markdown, title) {
19859
19313
  weight: mdWeight
19860
19314
  };
19861
19315
  }
19862
- function yamlString(value) {
19863
- return JSON.stringify(value);
19864
- }
19865
- function yamlArray(values) {
19866
- return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
19867
- }
19868
- function buildMirrorMarkdown(page) {
19869
- const frontmatterLines = [
19870
- "---",
19871
- `url: ${yamlString(page.url)}`,
19872
- `title: ${yamlString(page.title)}`,
19873
- `scope: ${yamlString(page.scope)}`,
19874
- `routeFile: ${yamlString(page.routeFile)}`,
19875
- `routeResolution: ${yamlString(page.routeResolution)}`,
19876
- `generatedAt: ${yamlString(page.generatedAt)}`,
19877
- `incomingLinks: ${page.incomingLinks}`,
19878
- `outgoingLinks: ${page.outgoingLinks}`,
19879
- `depth: ${page.depth}`,
19880
- `tags: ${yamlArray(page.tags)}`,
19881
- "---",
19882
- ""
19883
- ];
19884
- return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
19885
- }
19886
- function stripGeneratedAt(content) {
19887
- return content.replace(/^generatedAt: .*$/m, "");
19888
- }
19889
- async function writeMirrorPage(statePath, scope, page) {
19890
- const relative = urlPathToMirrorRelative(page.url);
19891
- const outputPath = path__default.default.join(statePath, "pages", scope.scopeName, relative);
19892
- await fs4__default.default.mkdir(path__default.default.dirname(outputPath), { recursive: true });
19893
- const newContent = buildMirrorMarkdown(page);
19894
- try {
19895
- const existing = await fs4__default.default.readFile(outputPath, "utf8");
19896
- if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
19897
- return outputPath;
19898
- }
19899
- } catch {
19900
- }
19901
- await fs4__default.default.writeFile(outputPath, newContent, "utf8");
19902
- return outputPath;
19903
- }
19904
- async function cleanMirrorForScope(statePath, scope) {
19905
- const target = path__default.default.join(statePath, "pages", scope.scopeName);
19906
- await fs4__default.default.rm(target, { recursive: true, force: true });
19907
- await fs4__default.default.mkdir(target, { recursive: true });
19908
- }
19909
19316
  function segmentToRegex(segment) {
19910
19317
  if (segment.startsWith("(") && segment.endsWith(")")) {
19911
19318
  return { regex: "", score: 0 };
@@ -20066,7 +19473,7 @@ async function parseManifest(cwd, outputDir) {
20066
19473
  const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
20067
19474
  let content;
20068
19475
  try {
20069
- content = await fs4__default.default.readFile(manifestPath, "utf8");
19476
+ content = await fs3__default.default.readFile(manifestPath, "utf8");
20070
19477
  } catch {
20071
19478
  throw new SearchSocketError(
20072
19479
  "BUILD_MANIFEST_NOT_FOUND",
@@ -20239,7 +19646,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
20239
19646
  const visited = /* @__PURE__ */ new Set();
20240
19647
  const pages = [];
20241
19648
  const queue = [];
20242
- const limit = pLimit2__default.default(8);
19649
+ const limit = pLimit__default.default(8);
20243
19650
  for (const seed of seedUrls) {
20244
19651
  const normalized = normalizeUrlPath(seed);
20245
19652
  if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
@@ -20321,7 +19728,7 @@ async function loadBuildPages(cwd, config, maxPages) {
20321
19728
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
20322
19729
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
20323
19730
  try {
20324
- const concurrencyLimit = pLimit2__default.default(8);
19731
+ const concurrencyLimit = pLimit__default.default(8);
20325
19732
  const results = await Promise.allSettled(
20326
19733
  selected.map(
20327
19734
  (route) => concurrencyLimit(async () => {
@@ -20395,7 +19802,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
20395
19802
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
20396
19803
  const pages = [];
20397
19804
  for (const filePath of selected) {
20398
- const raw = await fs4__default.default.readFile(filePath, "utf8");
19805
+ const raw = await fs3__default.default.readFile(filePath, "utf8");
20399
19806
  const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
20400
19807
  pages.push({
20401
19808
  url: filePathToUrl(filePath, baseDir),
@@ -20490,7 +19897,7 @@ async function loadCrawledPages(config, maxPages) {
20490
19897
  const routes = await resolveRoutes(config);
20491
19898
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
20492
19899
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
20493
- const concurrencyLimit = pLimit2__default.default(8);
19900
+ const concurrencyLimit = pLimit__default.default(8);
20494
19901
  const results = await Promise.allSettled(
20495
19902
  selected.map(
20496
19903
  (route) => concurrencyLimit(async () => {
@@ -20531,7 +19938,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
20531
19938
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
20532
19939
  const pages = [];
20533
19940
  for (const filePath of selected) {
20534
- const html = await fs4__default.default.readFile(filePath, "utf8");
19941
+ const html = await fs3__default.default.readFile(filePath, "utf8");
20535
19942
  pages.push({
20536
19943
  url: staticHtmlFileToUrl(filePath, outputDir),
20537
19944
  html,
@@ -20594,7 +20001,7 @@ function isBlockedByRobots(urlPath, rules3) {
20594
20001
  }
20595
20002
  async function loadRobotsTxtFromDir(dir) {
20596
20003
  try {
20597
- const content = await fs4__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
20004
+ const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
20598
20005
  return parseRobotsTxt(content);
20599
20006
  } catch {
20600
20007
  return null;
@@ -20613,34 +20020,41 @@ async function fetchRobotsTxt(baseUrl) {
20613
20020
  }
20614
20021
 
20615
20022
  // src/indexing/pipeline.ts
20616
- var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
20617
- "jina-embeddings-v3": 2e-5,
20618
- "jina-embeddings-v5-text-small": 5e-5
20619
- };
20620
- var DEFAULT_EMBEDDING_PRICE_PER_1K = 5e-5;
20023
+ function buildPageSummary(page, maxChars = 3500) {
20024
+ const parts = [page.title];
20025
+ if (page.description) {
20026
+ parts.push(page.description);
20027
+ }
20028
+ if (page.keywords && page.keywords.length > 0) {
20029
+ parts.push(page.keywords.join(", "));
20030
+ }
20031
+ const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
20032
+ if (plainBody) {
20033
+ parts.push(plainBody);
20034
+ }
20035
+ const joined = parts.join("\n\n");
20036
+ if (joined.length <= maxChars) return joined;
20037
+ return joined.slice(0, maxChars).trim();
20038
+ }
20621
20039
  var IndexPipeline = class _IndexPipeline {
20622
20040
  cwd;
20623
20041
  config;
20624
- embeddings;
20625
- vectorStore;
20042
+ store;
20626
20043
  logger;
20627
20044
  constructor(options) {
20628
20045
  this.cwd = options.cwd;
20629
20046
  this.config = options.config;
20630
- this.embeddings = options.embeddings;
20631
- this.vectorStore = options.vectorStore;
20047
+ this.store = options.store;
20632
20048
  this.logger = options.logger;
20633
20049
  }
20634
20050
  static async create(options = {}) {
20635
20051
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
20636
20052
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
20637
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
20638
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
20053
+ const store = options.store ?? await createUpstashStore(config);
20639
20054
  return new _IndexPipeline({
20640
20055
  cwd,
20641
20056
  config,
20642
- embeddings,
20643
- vectorStore,
20057
+ store,
20644
20058
  logger: options.logger ?? new Logger()
20645
20059
  });
20646
20060
  }
@@ -20660,25 +20074,17 @@ var IndexPipeline = class _IndexPipeline {
20660
20074
  stageTimingsMs[name] = Math.round(hrTimeMs(start));
20661
20075
  };
20662
20076
  const scope = resolveScope(this.config, options.scopeOverride);
20663
- const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
20077
+ ensureStateDirs(this.cwd, this.config.state.dir);
20664
20078
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
20665
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
20079
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20666
20080
  if (options.force) {
20667
20081
  this.logger.info("Force mode enabled \u2014 full rebuild");
20668
- await cleanMirrorForScope(statePath, scope);
20669
20082
  }
20670
20083
  if (options.dryRun) {
20671
20084
  this.logger.info("Dry run \u2014 no writes will be performed");
20672
20085
  }
20673
20086
  const manifestStart = stageStart();
20674
- const existingHashes = await this.vectorStore.getContentHashes(scope);
20675
- const existingModelId = await this.vectorStore.getScopeModelId(scope);
20676
- if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
20677
- throw new SearchSocketError(
20678
- "EMBEDDING_MODEL_MISMATCH",
20679
- `Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
20680
- );
20681
- }
20087
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20682
20088
  stageEnd("manifest", manifestStart);
20683
20089
  this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20684
20090
  const sourceStart = stageStart();
@@ -20807,9 +20213,9 @@ var IndexPipeline = class _IndexPipeline {
20807
20213
  }
20808
20214
  stageEnd("links", linkStart);
20809
20215
  this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
20810
- const mirrorStart = stageStart();
20811
- this.logger.info("Writing mirror pages...");
20812
- const mirrorPages = [];
20216
+ const pagesStart = stageStart();
20217
+ this.logger.info("Building indexed pages...");
20218
+ const pages = [];
20813
20219
  let routeExact = 0;
20814
20220
  let routeBestEffort = 0;
20815
20221
  const precomputedRoutes = /* @__PURE__ */ new Map();
@@ -20838,7 +20244,7 @@ var IndexPipeline = class _IndexPipeline {
20838
20244
  } else {
20839
20245
  routeExact += 1;
20840
20246
  }
20841
- const mirror = {
20247
+ const indexedPage = {
20842
20248
  url: page.url,
20843
20249
  title: page.title,
20844
20250
  scope: scope.scopeName,
@@ -20853,35 +20259,38 @@ var IndexPipeline = class _IndexPipeline {
20853
20259
  description: page.description,
20854
20260
  keywords: page.keywords
20855
20261
  };
20856
- mirrorPages.push(mirror);
20857
- if (this.config.state.writeMirror) {
20858
- await writeMirrorPage(statePath, scope, mirror);
20859
- }
20860
- this.logger.event("markdown_written", { url: page.url });
20262
+ pages.push(indexedPage);
20263
+ this.logger.event("page_indexed", { url: page.url });
20861
20264
  }
20862
20265
  if (!options.dryRun) {
20863
- const pageRecords = mirrorPages.map((mp) => ({
20864
- url: mp.url,
20865
- title: mp.title,
20866
- markdown: mp.markdown,
20867
- projectId: scope.projectId,
20868
- scopeName: scope.scopeName,
20869
- routeFile: mp.routeFile,
20870
- routeResolution: mp.routeResolution,
20871
- incomingLinks: mp.incomingLinks,
20872
- outgoingLinks: mp.outgoingLinks,
20873
- depth: mp.depth,
20874
- tags: mp.tags,
20875
- indexedAt: mp.generatedAt
20876
- }));
20877
- await this.vectorStore.deletePages(scope);
20878
- await this.vectorStore.upsertPages(pageRecords, scope);
20266
+ const pageRecords = pages.map((p) => {
20267
+ const summary = buildPageSummary(p);
20268
+ return {
20269
+ url: p.url,
20270
+ title: p.title,
20271
+ markdown: p.markdown,
20272
+ projectId: scope.projectId,
20273
+ scopeName: scope.scopeName,
20274
+ routeFile: p.routeFile,
20275
+ routeResolution: p.routeResolution,
20276
+ incomingLinks: p.incomingLinks,
20277
+ outgoingLinks: p.outgoingLinks,
20278
+ depth: p.depth,
20279
+ tags: p.tags,
20280
+ indexedAt: p.generatedAt,
20281
+ summary,
20282
+ description: p.description,
20283
+ keywords: p.keywords
20284
+ };
20285
+ });
20286
+ await this.store.deletePages(scope);
20287
+ await this.store.upsertPages(pageRecords, scope);
20879
20288
  }
20880
- stageEnd("mirror", mirrorStart);
20881
- this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
20289
+ stageEnd("pages", pagesStart);
20290
+ this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
20882
20291
  const chunkStart = stageStart();
20883
20292
  this.logger.info("Chunking pages...");
20884
- let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
20293
+ let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
20885
20294
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
20886
20295
  if (typeof maxChunks === "number") {
20887
20296
  chunks = chunks.slice(0, maxChunks);
@@ -20913,125 +20322,59 @@ var IndexPipeline = class _IndexPipeline {
20913
20322
  });
20914
20323
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
20915
20324
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
20916
- const embedStart = stageStart();
20917
- const chunkTokenEstimates = /* @__PURE__ */ new Map();
20918
- for (const chunk of changedChunks) {
20919
- chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
20920
- }
20921
- const estimatedTokens = changedChunks.reduce(
20922
- (sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
20923
- 0
20924
- );
20925
- const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
20926
- const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
20927
- let newEmbeddings = 0;
20928
- const vectorsByChunk = /* @__PURE__ */ new Map();
20325
+ const upsertStart = stageStart();
20326
+ let documentsUpserted = 0;
20929
20327
  if (!options.dryRun && changedChunks.length > 0) {
20930
- this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
20931
- const embeddings = await this.embeddings.embedTexts(
20932
- changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
20933
- this.config.embeddings.model,
20934
- "retrieval.passage"
20935
- );
20936
- if (embeddings.length !== changedChunks.length) {
20937
- throw new SearchSocketError(
20938
- "VECTOR_BACKEND_UNAVAILABLE",
20939
- `Embedding provider returned ${embeddings.length} vectors for ${changedChunks.length} chunks.`
20940
- );
20941
- }
20942
- for (let i = 0; i < changedChunks.length; i += 1) {
20943
- const chunk = changedChunks[i];
20944
- const embedding = embeddings[i];
20945
- if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
20946
- throw new SearchSocketError(
20947
- "VECTOR_BACKEND_UNAVAILABLE",
20948
- `Embedding provider returned an invalid vector for chunk index ${i}.`
20949
- );
20950
- }
20951
- vectorsByChunk.set(chunk.chunkKey, embedding);
20952
- newEmbeddings += 1;
20953
- this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
20954
- }
20955
- }
20956
- stageEnd("embedding", embedStart);
20957
- if (changedChunks.length > 0) {
20958
- this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
20959
- } else {
20960
- this.logger.info("No chunks to embed \u2014 all up to date");
20961
- }
20962
- const syncStart = stageStart();
20963
- if (!options.dryRun) {
20964
- this.logger.info("Syncing vectors...");
20965
- const upserts = [];
20966
- for (const chunk of changedChunks) {
20967
- const vector = vectorsByChunk.get(chunk.chunkKey);
20968
- if (!vector) {
20969
- continue;
20970
- }
20971
- upserts.push({
20328
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
20329
+ const UPSTASH_CONTENT_LIMIT = 4096;
20330
+ const docs = changedChunks.map((chunk) => {
20331
+ const title = chunk.title;
20332
+ const sectionTitle = chunk.sectionTitle ?? "";
20333
+ const url = chunk.url;
20334
+ const tags = chunk.tags.join(",");
20335
+ const headingPath = chunk.headingPath.join(" > ");
20336
+ const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
20337
+ const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
20338
+ const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
20339
+ return {
20972
20340
  id: chunk.chunkKey,
20973
- vector,
20341
+ content: { title, sectionTitle, text, url, tags, headingPath },
20974
20342
  metadata: {
20975
20343
  projectId: scope.projectId,
20976
20344
  scopeName: scope.scopeName,
20977
- url: chunk.url,
20978
20345
  path: chunk.path,
20979
- title: chunk.title,
20980
- sectionTitle: chunk.sectionTitle ?? "",
20981
- headingPath: chunk.headingPath,
20982
20346
  snippet: chunk.snippet,
20983
- chunkText: chunk.chunkText.slice(0, 4e3),
20984
20347
  ordinal: chunk.ordinal,
20985
20348
  contentHash: chunk.contentHash,
20986
- modelId: this.config.embeddings.model,
20987
20349
  depth: chunk.depth,
20988
20350
  incomingLinks: chunk.incomingLinks,
20989
20351
  routeFile: chunk.routeFile,
20990
- tags: chunk.tags,
20991
- description: chunk.description,
20992
- keywords: chunk.keywords
20352
+ description: chunk.description ?? "",
20353
+ keywords: (chunk.keywords ?? []).join(",")
20993
20354
  }
20994
- });
20995
- }
20996
- if (upserts.length > 0) {
20997
- await this.vectorStore.upsert(upserts, scope);
20998
- this.logger.event("upserted", { count: upserts.length });
20999
- }
21000
- if (deletes.length > 0) {
21001
- await this.vectorStore.deleteByIds(deletes, scope);
21002
- this.logger.event("deleted", { count: deletes.length });
21003
- }
21004
- }
21005
- stageEnd("sync", syncStart);
21006
- this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
21007
- const finalizeStart = stageStart();
21008
- if (!options.dryRun) {
21009
- const scopeInfo = {
21010
- projectId: scope.projectId,
21011
- scopeName: scope.scopeName,
21012
- modelId: this.config.embeddings.model,
21013
- lastIndexedAt: nowIso(),
21014
- vectorCount: chunks.length,
21015
- lastEstimateTokens: estimatedTokens,
21016
- lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
21017
- lastEstimateChangedChunks: changedChunks.length
21018
- };
21019
- await this.vectorStore.recordScope(scopeInfo);
21020
- this.logger.event("registry_updated", {
21021
- scope: scope.scopeName,
21022
- vectorCount: chunks.length
20355
+ };
21023
20356
  });
20357
+ await this.store.upsertChunks(docs, scope);
20358
+ documentsUpserted = docs.length;
20359
+ this.logger.event("upserted", { count: docs.length });
20360
+ }
20361
+ if (!options.dryRun && deletes.length > 0) {
20362
+ await this.store.deleteByIds(deletes, scope);
20363
+ this.logger.event("deleted", { count: deletes.length });
20364
+ }
20365
+ stageEnd("upsert", upsertStart);
20366
+ if (changedChunks.length > 0) {
20367
+ this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
20368
+ } else {
20369
+ this.logger.info("No chunks to upsert \u2014 all up to date");
21024
20370
  }
21025
- stageEnd("finalize", finalizeStart);
21026
20371
  this.logger.info("Done.");
21027
20372
  return {
21028
- pagesProcessed: mirrorPages.length,
20373
+ pagesProcessed: pages.length,
21029
20374
  chunksTotal: chunks.length,
21030
20375
  chunksChanged: changedChunks.length,
21031
- newEmbeddings,
20376
+ documentsUpserted,
21032
20377
  deletes: deletes.length,
21033
- estimatedTokens,
21034
- estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
21035
20378
  routeExact,
21036
20379
  routeBestEffort,
21037
20380
  stageTimingsMs
@@ -21062,30 +20405,11 @@ function shouldRunAutoIndex(options) {
21062
20405
  }
21063
20406
  return false;
21064
20407
  }
21065
- function searchsocketViteConfig() {
21066
- return {
21067
- name: "searchsocket:config",
21068
- config() {
21069
- return {
21070
- ssr: {
21071
- external: ["@libsql/client", "libsql"]
21072
- }
21073
- };
21074
- }
21075
- };
21076
- }
21077
20408
  function searchsocketVitePlugin(options = {}) {
21078
20409
  let executed = false;
21079
20410
  let running = false;
21080
20411
  return {
21081
20412
  name: "searchsocket:auto-index",
21082
- config() {
21083
- return {
21084
- ssr: {
21085
- external: ["@libsql/client", "libsql"]
21086
- }
21087
- };
21088
- },
21089
20413
  async closeBundle() {
21090
20414
  if (executed || running) {
21091
20415
  return;
@@ -21113,9 +20437,8 @@ function searchsocketVitePlugin(options = {}) {
21113
20437
  verbose: options.verbose
21114
20438
  });
21115
20439
  logger3.info(
21116
- `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} embedded=${stats.newEmbeddings}`
20440
+ `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
21117
20441
  );
21118
- logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
21119
20442
  executed = true;
21120
20443
  } finally {
21121
20444
  running = false;
@@ -21123,6 +20446,186 @@ function searchsocketVitePlugin(options = {}) {
21123
20446
  }
21124
20447
  };
21125
20448
  }
20449
+
20450
+ // src/sveltekit/scroll-to-text.ts
20451
+ var HIGHLIGHT_CLASS = "ssk-highlight";
20452
+ var HIGHLIGHT_DURATION = 2e3;
20453
+ var HIGHLIGHT_MARKER_ATTR = "data-ssk-highlight-marker";
20454
+ var HIGHLIGHT_NAME = "ssk-search-match";
20455
+ var styleInjected = false;
20456
+ function ensureHighlightStyle() {
20457
+ if (styleInjected || typeof document === "undefined") return;
20458
+ styleInjected = true;
20459
+ const style = document.createElement("style");
20460
+ style.textContent = `
20461
+ @keyframes ssk-highlight-fade {
20462
+ 0% { background-color: rgba(16, 185, 129, 0.18); }
20463
+ 100% { background-color: transparent; }
20464
+ }
20465
+ .${HIGHLIGHT_CLASS} {
20466
+ animation: ssk-highlight-fade ${HIGHLIGHT_DURATION}ms ease-out forwards;
20467
+ border-radius: 4px;
20468
+ }
20469
+ ::highlight(${HIGHLIGHT_NAME}) {
20470
+ background-color: rgba(16, 185, 129, 0.18);
20471
+ }
20472
+ `;
20473
+ document.head.appendChild(style);
20474
+ }
20475
+ var IGNORED_TAGS = /* @__PURE__ */ new Set(["SCRIPT", "STYLE", "NOSCRIPT", "TEMPLATE"]);
20476
+ function buildTextMap(root2) {
20477
+ const walker = document.createTreeWalker(root2, NodeFilter.SHOW_TEXT, {
20478
+ acceptNode(node) {
20479
+ const parent = node.parentElement;
20480
+ if (!parent || IGNORED_TAGS.has(parent.tagName)) return NodeFilter.FILTER_REJECT;
20481
+ return NodeFilter.FILTER_ACCEPT;
20482
+ }
20483
+ });
20484
+ const chunks = [];
20485
+ let text = "";
20486
+ let current;
20487
+ while (current = walker.nextNode()) {
20488
+ const value = current.nodeValue ?? "";
20489
+ if (!value) continue;
20490
+ chunks.push({ node: current, start: text.length, end: text.length + value.length });
20491
+ text += value;
20492
+ }
20493
+ return { text, chunks };
20494
+ }
20495
+ function normalize(text) {
20496
+ return text.toLowerCase().replace(/\s+/g, " ").trim();
20497
+ }
20498
+ function escapeRegExp(value) {
20499
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
20500
+ }
20501
+ function buildNeedleRegex(needle) {
20502
+ const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
20503
+ if (tokenParts.length > 1) {
20504
+ const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]+");
20505
+ return new RegExp(pattern, "iu");
20506
+ }
20507
+ if (tokenParts.length === 1) {
20508
+ return new RegExp(escapeRegExp(tokenParts[0]), "iu");
20509
+ }
20510
+ if (!needle) return null;
20511
+ return new RegExp(escapeRegExp(needle).replace(/\s+/g, "\\s+"), "i");
20512
+ }
20513
+ function buildLenientRegex(needle) {
20514
+ const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
20515
+ if (tokenParts.length <= 1) return null;
20516
+ const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]*");
20517
+ return new RegExp(pattern, "iu");
20518
+ }
20519
+ function findMatch(fullText, needle) {
20520
+ const regex = buildNeedleRegex(needle);
20521
+ if (regex) {
20522
+ const m = regex.exec(fullText);
20523
+ if (m && typeof m.index === "number") {
20524
+ return { start: m.index, end: m.index + m[0].length };
20525
+ }
20526
+ }
20527
+ const lenient = buildLenientRegex(needle);
20528
+ if (lenient) {
20529
+ const m = lenient.exec(fullText);
20530
+ if (m && typeof m.index === "number") {
20531
+ return { start: m.index, end: m.index + m[0].length };
20532
+ }
20533
+ }
20534
+ return null;
20535
+ }
20536
+ function resolveRange(map, offsets) {
20537
+ let startChunk;
20538
+ let endChunk;
20539
+ for (const chunk of map.chunks) {
20540
+ if (!startChunk && offsets.start >= chunk.start && offsets.start < chunk.end) {
20541
+ startChunk = chunk;
20542
+ }
20543
+ if (offsets.end > chunk.start && offsets.end <= chunk.end) {
20544
+ endChunk = chunk;
20545
+ }
20546
+ if (startChunk && endChunk) break;
20547
+ }
20548
+ if (!startChunk || !endChunk) return null;
20549
+ const range = document.createRange();
20550
+ range.setStart(startChunk.node, offsets.start - startChunk.start);
20551
+ range.setEnd(endChunk.node, offsets.end - endChunk.start);
20552
+ return range;
20553
+ }
20554
+ function hasCustomHighlightAPI() {
20555
+ return typeof CSS !== "undefined" && typeof CSS.highlights !== "undefined";
20556
+ }
20557
+ var highlightTimer = null;
20558
+ function highlightWithCSS(range) {
20559
+ ensureHighlightStyle();
20560
+ const hl = new globalThis.Highlight(range);
20561
+ CSS.highlights.set(HIGHLIGHT_NAME, hl);
20562
+ if (highlightTimer) clearTimeout(highlightTimer);
20563
+ highlightTimer = setTimeout(() => {
20564
+ CSS.highlights.delete(HIGHLIGHT_NAME);
20565
+ highlightTimer = null;
20566
+ }, HIGHLIGHT_DURATION);
20567
+ }
20568
+ function unwrapMarker(marker) {
20569
+ if (!marker.isConnected) return;
20570
+ const parent = marker.parentNode;
20571
+ if (!parent) return;
20572
+ while (marker.firstChild) parent.insertBefore(marker.firstChild, marker);
20573
+ parent.removeChild(marker);
20574
+ if (parent instanceof Element) parent.normalize();
20575
+ }
20576
+ function highlightWithDOM(range) {
20577
+ ensureHighlightStyle();
20578
+ try {
20579
+ const marker = document.createElement("span");
20580
+ marker.classList.add(HIGHLIGHT_CLASS);
20581
+ marker.setAttribute(HIGHLIGHT_MARKER_ATTR, "true");
20582
+ range.surroundContents(marker);
20583
+ setTimeout(() => unwrapMarker(marker), HIGHLIGHT_DURATION);
20584
+ return marker;
20585
+ } catch {
20586
+ const ancestor = range.commonAncestorContainer;
20587
+ const el = ancestor instanceof Element ? ancestor : ancestor.parentElement;
20588
+ if (el) {
20589
+ el.classList.add(HIGHLIGHT_CLASS);
20590
+ setTimeout(() => el.classList.remove(HIGHLIGHT_CLASS), HIGHLIGHT_DURATION);
20591
+ return el;
20592
+ }
20593
+ return document.body;
20594
+ }
20595
+ }
20596
+ function scrollToRange(range) {
20597
+ const rect = range.getBoundingClientRect();
20598
+ window.scrollTo({
20599
+ top: window.scrollY + rect.top - window.innerHeight / 3,
20600
+ behavior: "smooth"
20601
+ });
20602
+ }
20603
+ function scrollIntoViewIfPossible(el) {
20604
+ if (typeof el.scrollIntoView === "function") {
20605
+ el.scrollIntoView({ behavior: "smooth", block: "start" });
20606
+ }
20607
+ }
20608
+ function searchsocketScrollToText(navigation) {
20609
+ if (typeof document === "undefined") return;
20610
+ const params = navigation.to?.url.searchParams;
20611
+ const raw = params?.get("_sskt") ?? params?.get("_ssk");
20612
+ if (!raw) return;
20613
+ const needle = normalize(raw);
20614
+ if (!needle) return;
20615
+ const map = buildTextMap(document.body);
20616
+ const offsets = findMatch(map.text, needle);
20617
+ if (!offsets) return;
20618
+ const range = resolveRange(map, offsets);
20619
+ if (!range) return;
20620
+ if (hasCustomHighlightAPI()) {
20621
+ highlightWithCSS(range);
20622
+ scrollToRange(range);
20623
+ } else {
20624
+ const marker = highlightWithDOM(range);
20625
+ const target = typeof marker.scrollIntoView === "function" ? marker : marker.parentElement;
20626
+ if (target) scrollIntoViewIfPossible(target);
20627
+ }
20628
+ }
21126
20629
  /*! Bundled license information:
21127
20630
 
21128
20631
  @mixmark-io/domino/lib/style_parser.js:
@@ -21136,7 +20639,7 @@ function searchsocketVitePlugin(options = {}) {
21136
20639
  */
21137
20640
 
21138
20641
  exports.searchsocketHandle = searchsocketHandle;
21139
- exports.searchsocketViteConfig = searchsocketViteConfig;
20642
+ exports.searchsocketScrollToText = searchsocketScrollToText;
21140
20643
  exports.searchsocketVitePlugin = searchsocketVitePlugin;
21141
20644
  //# sourceMappingURL=sveltekit.cjs.map
21142
20645
  //# sourceMappingURL=sveltekit.cjs.map