searchsocket 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,13 @@ var fs = require('fs');
4
4
  var path = require('path');
5
5
  var jiti = require('jiti');
6
6
  var zod = require('zod');
7
- var pLimit2 = require('p-limit');
8
7
  var child_process = require('child_process');
9
8
  var crypto = require('crypto');
10
9
  var cheerio = require('cheerio');
11
10
  var matter = require('gray-matter');
12
- var fs4 = require('fs/promises');
13
11
  var fg = require('fast-glob');
12
+ var pLimit = require('p-limit');
13
+ var fs3 = require('fs/promises');
14
14
  var net = require('net');
15
15
  var zlib = require('zlib');
16
16
 
@@ -18,10 +18,10 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
18
18
 
19
19
  var fs__default = /*#__PURE__*/_interopDefault(fs);
20
20
  var path__default = /*#__PURE__*/_interopDefault(path);
21
- var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
22
21
  var matter__default = /*#__PURE__*/_interopDefault(matter);
23
- var fs4__default = /*#__PURE__*/_interopDefault(fs4);
24
22
  var fg__default = /*#__PURE__*/_interopDefault(fg);
23
+ var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
24
+ var fs3__default = /*#__PURE__*/_interopDefault(fs3);
25
25
  var net__default = /*#__PURE__*/_interopDefault(net);
26
26
 
27
27
  var __getOwnPropNames = Object.getOwnPropertyNames;
@@ -2767,12 +2767,12 @@ var require_ChildNode = __commonJS({
2767
2767
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
2768
2768
  var Node2 = require_Node();
2769
2769
  var LinkedList = require_LinkedList();
2770
- var createDocumentFragmentFromArguments = function(document, args) {
2771
- var docFrag = document.createDocumentFragment();
2770
+ var createDocumentFragmentFromArguments = function(document2, args) {
2771
+ var docFrag = document2.createDocumentFragment();
2772
2772
  for (var i = 0; i < args.length; i++) {
2773
2773
  var argItem = args[i];
2774
2774
  var isNode = argItem instanceof Node2;
2775
- docFrag.appendChild(isNode ? argItem : document.createTextNode(String(argItem)));
2775
+ docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
2776
2776
  }
2777
2777
  return docFrag;
2778
2778
  };
@@ -2930,7 +2930,7 @@ var require_NamedNodeMap = __commonJS({
2930
2930
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
2931
2931
  var require_Element = __commonJS({
2932
2932
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
2933
- module.exports = Element;
2933
+ module.exports = Element2;
2934
2934
  var xml = require_xmlnames();
2935
2935
  var utils = require_utils();
2936
2936
  var NAMESPACE = utils.NAMESPACE;
@@ -2947,7 +2947,7 @@ var require_Element = __commonJS({
2947
2947
  var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
2948
2948
  var NamedNodeMap = require_NamedNodeMap();
2949
2949
  var uppercaseCache = /* @__PURE__ */ Object.create(null);
2950
- function Element(doc, localName, namespaceURI, prefix) {
2950
+ function Element2(doc, localName, namespaceURI, prefix) {
2951
2951
  ContainerNode.call(this);
2952
2952
  this.nodeType = Node2.ELEMENT_NODE;
2953
2953
  this.ownerDocument = doc;
@@ -2967,7 +2967,7 @@ var require_Element = __commonJS({
2967
2967
  recursiveGetText(node.childNodes[i], a);
2968
2968
  }
2969
2969
  }
2970
- Element.prototype = Object.create(ContainerNode.prototype, {
2970
+ Element2.prototype = Object.create(ContainerNode.prototype, {
2971
2971
  isHTML: { get: function isHTML() {
2972
2972
  return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
2973
2973
  } },
@@ -3037,7 +3037,7 @@ var require_Element = __commonJS({
3037
3037
  return NodeUtils.serializeOne(this, { nodeType: 0 });
3038
3038
  },
3039
3039
  set: function(v) {
3040
- var document = this.ownerDocument;
3040
+ var document2 = this.ownerDocument;
3041
3041
  var parent = this.parentNode;
3042
3042
  if (parent === null) {
3043
3043
  return;
@@ -3048,8 +3048,8 @@ var require_Element = __commonJS({
3048
3048
  if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
3049
3049
  parent = parent.ownerDocument.createElement("body");
3050
3050
  }
3051
- var parser = document.implementation.mozHTMLParser(
3052
- document._address,
3051
+ var parser = document2.implementation.mozHTMLParser(
3052
+ document2._address,
3053
3053
  parent
3054
3054
  );
3055
3055
  parser.parse(v === null ? "" : String(v), true);
@@ -3108,7 +3108,7 @@ var require_Element = __commonJS({
3108
3108
  default:
3109
3109
  utils.SyntaxError();
3110
3110
  }
3111
- if (!(context instanceof Element) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3111
+ if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
3112
3112
  context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
3113
3113
  }
3114
3114
  var parser = this.ownerDocument.implementation.mozHTMLParser(
@@ -3716,10 +3716,10 @@ var require_Element = __commonJS({
3716
3716
  return nodes.item ? nodes : new NodeList(nodes);
3717
3717
  } }
3718
3718
  });
3719
- Object.defineProperties(Element.prototype, ChildNode);
3720
- Object.defineProperties(Element.prototype, NonDocumentTypeChildNode);
3719
+ Object.defineProperties(Element2.prototype, ChildNode);
3720
+ Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
3721
3721
  attributes.registerChangeHandler(
3722
- Element,
3722
+ Element2,
3723
3723
  "id",
3724
3724
  function(element, lname, oldval, newval) {
3725
3725
  if (element.rooted) {
@@ -3733,7 +3733,7 @@ var require_Element = __commonJS({
3733
3733
  }
3734
3734
  );
3735
3735
  attributes.registerChangeHandler(
3736
- Element,
3736
+ Element2,
3737
3737
  "class",
3738
3738
  function(element, lname, oldval, newval) {
3739
3739
  if (element._classList) {
@@ -3832,7 +3832,7 @@ var require_Element = __commonJS({
3832
3832
  }
3833
3833
  }
3834
3834
  });
3835
- Element._Attr = Attr;
3835
+ Element2._Attr = Attr;
3836
3836
  function AttributesArray(elt) {
3837
3837
  NamedNodeMap.call(this, elt);
3838
3838
  for (var name in elt._attrsByQName) {
@@ -4234,7 +4234,7 @@ var require_DocumentFragment = __commonJS({
4234
4234
  var Node2 = require_Node();
4235
4235
  var NodeList = require_NodeList();
4236
4236
  var ContainerNode = require_ContainerNode();
4237
- var Element = require_Element();
4237
+ var Element2 = require_Element();
4238
4238
  var select = require_select();
4239
4239
  var utils = require_utils();
4240
4240
  function DocumentFragment(doc) {
@@ -4252,9 +4252,9 @@ var require_DocumentFragment = __commonJS({
4252
4252
  }
4253
4253
  },
4254
4254
  // Copy the text content getter/setter from Element
4255
- textContent: Object.getOwnPropertyDescriptor(Element.prototype, "textContent"),
4255
+ textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
4256
4256
  // Copy the text content getter/setter from Element
4257
- innerText: Object.getOwnPropertyDescriptor(Element.prototype, "innerText"),
4257
+ innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
4258
4258
  querySelector: { value: function(selector) {
4259
4259
  var nodes = this.querySelectorAll(selector);
4260
4260
  return nodes.length ? nodes[0] : null;
@@ -4262,8 +4262,8 @@ var require_DocumentFragment = __commonJS({
4262
4262
  querySelectorAll: { value: function(selector) {
4263
4263
  var context = Object.create(this);
4264
4264
  context.isHTML = true;
4265
- context.getElementsByTagName = Element.prototype.getElementsByTagName;
4266
- context.nextElement = Object.getOwnPropertyDescriptor(Element.prototype, "firstElementChild").get;
4265
+ context.getElementsByTagName = Element2.prototype.getElementsByTagName;
4266
+ context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
4267
4267
  var nodes = select(selector, context);
4268
4268
  return nodes.item ? nodes : new NodeList(nodes);
4269
4269
  } },
@@ -4345,7 +4345,7 @@ var require_ProcessingInstruction = __commonJS({
4345
4345
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
4346
4346
  var require_NodeFilter = __commonJS({
4347
4347
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
4348
- var NodeFilter = {
4348
+ var NodeFilter2 = {
4349
4349
  // Constants for acceptNode()
4350
4350
  FILTER_ACCEPT: 1,
4351
4351
  FILTER_REJECT: 2,
@@ -4370,7 +4370,7 @@ var require_NodeFilter = __commonJS({
4370
4370
  SHOW_NOTATION: 2048
4371
4371
  // historical
4372
4372
  };
4373
- module.exports = NodeFilter.constructor = NodeFilter.prototype = NodeFilter;
4373
+ module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
4374
4374
  }
4375
4375
  });
4376
4376
 
@@ -4445,7 +4445,7 @@ var require_TreeWalker = __commonJS({
4445
4445
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
4446
4446
  module.exports = TreeWalker;
4447
4447
  var Node2 = require_Node();
4448
- var NodeFilter = require_NodeFilter();
4448
+ var NodeFilter2 = require_NodeFilter();
4449
4449
  var NodeTraversal = require_NodeTraversal();
4450
4450
  var utils = require_utils();
4451
4451
  var mapChild = {
@@ -4465,11 +4465,11 @@ var require_TreeWalker = __commonJS({
4465
4465
  node = tw._currentNode[mapChild[type]];
4466
4466
  while (node !== null) {
4467
4467
  result = tw._internalFilter(node);
4468
- if (result === NodeFilter.FILTER_ACCEPT) {
4468
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4469
4469
  tw._currentNode = node;
4470
4470
  return node;
4471
4471
  }
4472
- if (result === NodeFilter.FILTER_SKIP) {
4472
+ if (result === NodeFilter2.FILTER_SKIP) {
4473
4473
  child = node[mapChild[type]];
4474
4474
  if (child !== null) {
4475
4475
  node = child;
@@ -4503,12 +4503,12 @@ var require_TreeWalker = __commonJS({
4503
4503
  while (sibling !== null) {
4504
4504
  node = sibling;
4505
4505
  result = tw._internalFilter(node);
4506
- if (result === NodeFilter.FILTER_ACCEPT) {
4506
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4507
4507
  tw._currentNode = node;
4508
4508
  return node;
4509
4509
  }
4510
4510
  sibling = node[mapChild[type]];
4511
- if (result === NodeFilter.FILTER_REJECT || sibling === null) {
4511
+ if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
4512
4512
  sibling = node[mapSibling[type]];
4513
4513
  }
4514
4514
  }
@@ -4516,7 +4516,7 @@ var require_TreeWalker = __commonJS({
4516
4516
  if (node === null || node === tw.root) {
4517
4517
  return null;
4518
4518
  }
4519
- if (tw._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4519
+ if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4520
4520
  return null;
4521
4521
  }
4522
4522
  }
@@ -4564,11 +4564,11 @@ var require_TreeWalker = __commonJS({
4564
4564
  utils.InvalidStateError();
4565
4565
  }
4566
4566
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4567
- return NodeFilter.FILTER_SKIP;
4567
+ return NodeFilter2.FILTER_SKIP;
4568
4568
  }
4569
4569
  filter = this._filter;
4570
4570
  if (filter === null) {
4571
- result = NodeFilter.FILTER_ACCEPT;
4571
+ result = NodeFilter2.FILTER_ACCEPT;
4572
4572
  } else {
4573
4573
  this._active = true;
4574
4574
  try {
@@ -4597,7 +4597,7 @@ var require_TreeWalker = __commonJS({
4597
4597
  if (node === null) {
4598
4598
  return null;
4599
4599
  }
4600
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4600
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4601
4601
  this._currentNode = node;
4602
4602
  return node;
4603
4603
  }
@@ -4650,17 +4650,17 @@ var require_TreeWalker = __commonJS({
4650
4650
  for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
4651
4651
  node = previousSibling;
4652
4652
  result = this._internalFilter(node);
4653
- if (result === NodeFilter.FILTER_REJECT) {
4653
+ if (result === NodeFilter2.FILTER_REJECT) {
4654
4654
  continue;
4655
4655
  }
4656
4656
  for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
4657
4657
  node = lastChild;
4658
4658
  result = this._internalFilter(node);
4659
- if (result === NodeFilter.FILTER_REJECT) {
4659
+ if (result === NodeFilter2.FILTER_REJECT) {
4660
4660
  break;
4661
4661
  }
4662
4662
  }
4663
- if (result === NodeFilter.FILTER_ACCEPT) {
4663
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4664
4664
  this._currentNode = node;
4665
4665
  return node;
4666
4666
  }
@@ -4669,7 +4669,7 @@ var require_TreeWalker = __commonJS({
4669
4669
  return null;
4670
4670
  }
4671
4671
  node = node.parentNode;
4672
- if (this._internalFilter(node) === NodeFilter.FILTER_ACCEPT) {
4672
+ if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
4673
4673
  this._currentNode = node;
4674
4674
  return node;
4675
4675
  }
@@ -4686,26 +4686,26 @@ var require_TreeWalker = __commonJS({
4686
4686
  nextNode: { value: function nextNode() {
4687
4687
  var node, result, firstChild, nextSibling;
4688
4688
  node = this._currentNode;
4689
- result = NodeFilter.FILTER_ACCEPT;
4689
+ result = NodeFilter2.FILTER_ACCEPT;
4690
4690
  CHILDREN:
4691
4691
  while (true) {
4692
4692
  for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
4693
4693
  node = firstChild;
4694
4694
  result = this._internalFilter(node);
4695
- if (result === NodeFilter.FILTER_ACCEPT) {
4695
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4696
4696
  this._currentNode = node;
4697
4697
  return node;
4698
- } else if (result === NodeFilter.FILTER_REJECT) {
4698
+ } else if (result === NodeFilter2.FILTER_REJECT) {
4699
4699
  break;
4700
4700
  }
4701
4701
  }
4702
4702
  for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
4703
4703
  node = nextSibling;
4704
4704
  result = this._internalFilter(node);
4705
- if (result === NodeFilter.FILTER_ACCEPT) {
4705
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4706
4706
  this._currentNode = node;
4707
4707
  return node;
4708
- } else if (result === NodeFilter.FILTER_SKIP) {
4708
+ } else if (result === NodeFilter2.FILTER_SKIP) {
4709
4709
  continue CHILDREN;
4710
4710
  }
4711
4711
  }
@@ -4724,7 +4724,7 @@ var require_TreeWalker = __commonJS({
4724
4724
  var require_NodeIterator = __commonJS({
4725
4725
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
4726
4726
  module.exports = NodeIterator;
4727
- var NodeFilter = require_NodeFilter();
4727
+ var NodeFilter2 = require_NodeFilter();
4728
4728
  var NodeTraversal = require_NodeTraversal();
4729
4729
  var utils = require_utils();
4730
4730
  function move(node, stayWithin, directionIsNext) {
@@ -4759,7 +4759,7 @@ var require_NodeIterator = __commonJS({
4759
4759
  }
4760
4760
  }
4761
4761
  var result = ni._internalFilter(node);
4762
- if (result === NodeFilter.FILTER_ACCEPT) {
4762
+ if (result === NodeFilter2.FILTER_ACCEPT) {
4763
4763
  break;
4764
4764
  }
4765
4765
  }
@@ -4807,11 +4807,11 @@ var require_NodeIterator = __commonJS({
4807
4807
  utils.InvalidStateError();
4808
4808
  }
4809
4809
  if (!(1 << node.nodeType - 1 & this._whatToShow)) {
4810
- return NodeFilter.FILTER_SKIP;
4810
+ return NodeFilter2.FILTER_SKIP;
4811
4811
  }
4812
4812
  filter = this._filter;
4813
4813
  if (filter === null) {
4814
- result = NodeFilter.FILTER_ACCEPT;
4814
+ result = NodeFilter2.FILTER_ACCEPT;
4815
4815
  } else {
4816
4816
  this._active = true;
4817
4817
  try {
@@ -5021,32 +5021,32 @@ var require_URL = __commonJS({
5021
5021
  else
5022
5022
  return basepath.substring(0, lastslash + 1) + refpath;
5023
5023
  }
5024
- function remove_dot_segments(path14) {
5025
- if (!path14) return path14;
5024
+ function remove_dot_segments(path13) {
5025
+ if (!path13) return path13;
5026
5026
  var output = "";
5027
- while (path14.length > 0) {
5028
- if (path14 === "." || path14 === "..") {
5029
- path14 = "";
5027
+ while (path13.length > 0) {
5028
+ if (path13 === "." || path13 === "..") {
5029
+ path13 = "";
5030
5030
  break;
5031
5031
  }
5032
- var twochars = path14.substring(0, 2);
5033
- var threechars = path14.substring(0, 3);
5034
- var fourchars = path14.substring(0, 4);
5032
+ var twochars = path13.substring(0, 2);
5033
+ var threechars = path13.substring(0, 3);
5034
+ var fourchars = path13.substring(0, 4);
5035
5035
  if (threechars === "../") {
5036
- path14 = path14.substring(3);
5036
+ path13 = path13.substring(3);
5037
5037
  } else if (twochars === "./") {
5038
- path14 = path14.substring(2);
5038
+ path13 = path13.substring(2);
5039
5039
  } else if (threechars === "/./") {
5040
- path14 = "/" + path14.substring(3);
5041
- } else if (twochars === "/." && path14.length === 2) {
5042
- path14 = "/";
5043
- } else if (fourchars === "/../" || threechars === "/.." && path14.length === 3) {
5044
- path14 = "/" + path14.substring(4);
5040
+ path13 = "/" + path13.substring(3);
5041
+ } else if (twochars === "/." && path13.length === 2) {
5042
+ path13 = "/";
5043
+ } else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
5044
+ path13 = "/" + path13.substring(4);
5045
5045
  output = output.replace(/\/?[^\/]*$/, "");
5046
5046
  } else {
5047
- var segment = path14.match(/(\/?([^\/]*))/)[0];
5047
+ var segment = path13.match(/(\/?([^\/]*))/)[0];
5048
5048
  output += segment;
5049
- path14 = path14.substring(segment.length);
5049
+ path13 = path13.substring(segment.length);
5050
5050
  }
5051
5051
  }
5052
5052
  return output;
@@ -5611,9 +5611,9 @@ var require_defineElement = __commonJS({
5611
5611
  });
5612
5612
  return c;
5613
5613
  };
5614
- function EventHandlerBuilder(body, document, form, element) {
5614
+ function EventHandlerBuilder(body, document2, form, element) {
5615
5615
  this.body = body;
5616
- this.document = document;
5616
+ this.document = document2;
5617
5617
  this.form = form;
5618
5618
  this.element = element;
5619
5619
  }
@@ -5647,7 +5647,7 @@ var require_defineElement = __commonJS({
5647
5647
  var require_htmlelts = __commonJS({
5648
5648
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
5649
5649
  var Node2 = require_Node();
5650
- var Element = require_Element();
5650
+ var Element2 = require_Element();
5651
5651
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
5652
5652
  var utils = require_utils();
5653
5653
  var URLUtils = require_URLUtils();
@@ -5715,10 +5715,10 @@ var require_htmlelts = __commonJS({
5715
5715
  this._form = null;
5716
5716
  };
5717
5717
  var HTMLElement = exports$1.HTMLElement = define({
5718
- superclass: Element,
5718
+ superclass: Element2,
5719
5719
  name: "HTMLElement",
5720
5720
  ctor: function HTMLElement2(doc, localName, prefix) {
5721
- Element.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5721
+ Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
5722
5722
  },
5723
5723
  props: {
5724
5724
  dangerouslySetInnerHTML: {
@@ -7200,7 +7200,7 @@ var require_htmlelts = __commonJS({
7200
7200
  // node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
7201
7201
  var require_svg = __commonJS({
7202
7202
  "node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
7203
- var Element = require_Element();
7203
+ var Element2 = require_Element();
7204
7204
  var defineElement = require_defineElement();
7205
7205
  var utils = require_utils();
7206
7206
  var CSSStyleDeclaration = require_CSSStyleDeclaration();
@@ -7214,10 +7214,10 @@ var require_svg = __commonJS({
7214
7214
  return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
7215
7215
  }
7216
7216
  var SVGElement = define({
7217
- superclass: Element,
7217
+ superclass: Element2,
7218
7218
  name: "SVGElement",
7219
7219
  ctor: function SVGElement2(doc, localName, prefix) {
7220
- Element.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7220
+ Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
7221
7221
  },
7222
7222
  props: {
7223
7223
  style: { get: function() {
@@ -7352,7 +7352,7 @@ var require_Document = __commonJS({
7352
7352
  var Node2 = require_Node();
7353
7353
  var NodeList = require_NodeList();
7354
7354
  var ContainerNode = require_ContainerNode();
7355
- var Element = require_Element();
7355
+ var Element2 = require_Element();
7356
7356
  var Text = require_Text();
7357
7357
  var Comment = require_Comment();
7358
7358
  var Event = require_Event();
@@ -7361,7 +7361,7 @@ var require_Document = __commonJS({
7361
7361
  var DOMImplementation = require_DOMImplementation();
7362
7362
  var TreeWalker = require_TreeWalker();
7363
7363
  var NodeIterator = require_NodeIterator();
7364
- var NodeFilter = require_NodeFilter();
7364
+ var NodeFilter2 = require_NodeFilter();
7365
7365
  var URL2 = require_URL();
7366
7366
  var select = require_select();
7367
7367
  var events = require_events();
@@ -7500,13 +7500,13 @@ var require_Document = __commonJS({
7500
7500
  if (this.isHTML) {
7501
7501
  localName = utils.toASCIILowerCase(localName);
7502
7502
  }
7503
- return new Element._Attr(null, localName, null, null, "");
7503
+ return new Element2._Attr(null, localName, null, null, "");
7504
7504
  } },
7505
7505
  createAttributeNS: { value: function(namespace, qualifiedName) {
7506
7506
  namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
7507
7507
  qualifiedName = String(qualifiedName);
7508
7508
  var ve = validateAndExtract(namespace, qualifiedName);
7509
- return new Element._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7509
+ return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
7510
7510
  } },
7511
7511
  createElement: { value: function(localName) {
7512
7512
  localName = String(localName);
@@ -7518,7 +7518,7 @@ var require_Document = __commonJS({
7518
7518
  } else if (this.contentType === "application/xhtml+xml") {
7519
7519
  return html.createElement(this, localName, null);
7520
7520
  } else {
7521
- return new Element(this, localName, null, null);
7521
+ return new Element2(this, localName, null, null);
7522
7522
  }
7523
7523
  }, writable: isApiWritable },
7524
7524
  createElementNS: { value: function(namespace, qualifiedName) {
@@ -7535,7 +7535,7 @@ var require_Document = __commonJS({
7535
7535
  } else if (namespace === NAMESPACE.SVG) {
7536
7536
  return svg.createElement(this, localName, prefix);
7537
7537
  }
7538
- return new Element(this, localName, namespace, prefix);
7538
+ return new Element2(this, localName, namespace, prefix);
7539
7539
  } },
7540
7540
  createEvent: { value: function createEvent(interfaceName) {
7541
7541
  interfaceName = interfaceName.toLowerCase();
@@ -7557,7 +7557,7 @@ var require_Document = __commonJS({
7557
7557
  if (!(root3 instanceof Node2)) {
7558
7558
  throw new TypeError("root not a node");
7559
7559
  }
7560
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7560
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7561
7561
  filter = filter === void 0 ? null : filter;
7562
7562
  return new TreeWalker(root3, whatToShow, filter);
7563
7563
  } },
@@ -7569,7 +7569,7 @@ var require_Document = __commonJS({
7569
7569
  if (!(root3 instanceof Node2)) {
7570
7570
  throw new TypeError("root not a node");
7571
7571
  }
7572
- whatToShow = whatToShow === void 0 ? NodeFilter.SHOW_ALL : +whatToShow;
7572
+ whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
7573
7573
  filter = filter === void 0 ? null : filter;
7574
7574
  return new NodeIterator(root3, whatToShow, filter);
7575
7575
  } },
@@ -7630,10 +7630,10 @@ var require_Document = __commonJS({
7630
7630
  return this.byId[id] instanceof MultiId;
7631
7631
  } },
7632
7632
  // Just copy this method from the Element prototype
7633
- getElementsByName: { value: Element.prototype.getElementsByName },
7634
- getElementsByTagName: { value: Element.prototype.getElementsByTagName },
7635
- getElementsByTagNameNS: { value: Element.prototype.getElementsByTagNameNS },
7636
- getElementsByClassName: { value: Element.prototype.getElementsByClassName },
7633
+ getElementsByName: { value: Element2.prototype.getElementsByName },
7634
+ getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
7635
+ getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
7636
+ getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
7637
7637
  adoptNode: { value: function adoptNode(node) {
7638
7638
  if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
7639
7639
  if (node.nodeType === Node2.ATTRIBUTE_NODE) {
@@ -16459,8 +16459,8 @@ var require_Window = __commonJS({
16459
16459
  var Location = require_Location();
16460
16460
  var utils = require_utils();
16461
16461
  module.exports = Window;
16462
- function Window(document) {
16463
- this.document = document || new DOMImplementation(null).createHTMLDocument("");
16462
+ function Window(document2) {
16463
+ this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
16464
16464
  this.document._scripting_enabled = true;
16465
16465
  this.document.defaultView = this;
16466
16466
  this.location = new Location(this, this.document._address || "about:blank");
@@ -16590,11 +16590,11 @@ var require_lib = __commonJS({
16590
16590
  };
16591
16591
  };
16592
16592
  exports$1.createWindow = function(html, address) {
16593
- var document = exports$1.createDocument(html);
16593
+ var document2 = exports$1.createDocument(html);
16594
16594
  if (address !== void 0) {
16595
- document._address = address;
16595
+ document2._address = address;
16596
16596
  }
16597
- return new impl.Window(document);
16597
+ return new impl.Window(document2);
16598
16598
  };
16599
16599
  exports$1.impl = impl;
16600
16600
  }
@@ -16610,6 +16610,8 @@ var searchSocketConfigSchema = zod.z.object({
16610
16610
  envVar: zod.z.string().min(1).optional(),
16611
16611
  sanitize: zod.z.boolean().optional()
16612
16612
  }).optional(),
16613
+ exclude: zod.z.array(zod.z.string()).optional(),
16614
+ respectRobotsTxt: zod.z.boolean().optional(),
16613
16615
  source: zod.z.object({
16614
16616
  mode: zod.z.enum(["static-output", "crawl", "content-files", "build"]).optional(),
16615
16617
  staticOutputDir: zod.z.string().min(1).optional(),
@@ -16657,29 +16659,18 @@ var searchSocketConfigSchema = zod.z.object({
16657
16659
  prependTitle: zod.z.boolean().optional(),
16658
16660
  pageSummaryChunk: zod.z.boolean().optional()
16659
16661
  }).optional(),
16660
- embeddings: zod.z.object({
16661
- provider: zod.z.literal("jina").optional(),
16662
- model: zod.z.string().min(1).optional(),
16663
- apiKey: zod.z.string().min(1).optional(),
16664
- apiKeyEnv: zod.z.string().min(1).optional(),
16665
- batchSize: zod.z.number().int().positive().optional(),
16666
- concurrency: zod.z.number().int().positive().optional(),
16667
- pricePer1kTokens: zod.z.number().positive().optional()
16662
+ upstash: zod.z.object({
16663
+ url: zod.z.string().url().optional(),
16664
+ token: zod.z.string().min(1).optional(),
16665
+ urlEnv: zod.z.string().min(1).optional(),
16666
+ tokenEnv: zod.z.string().min(1).optional()
16668
16667
  }).optional(),
16669
- vector: zod.z.object({
16670
- dimension: zod.z.number().int().positive().optional(),
16671
- turso: zod.z.object({
16672
- url: zod.z.string().url().optional(),
16673
- authToken: zod.z.string().min(1).optional(),
16674
- urlEnv: zod.z.string().optional(),
16675
- authTokenEnv: zod.z.string().optional(),
16676
- localPath: zod.z.string().optional()
16677
- }).optional()
16678
- }).optional(),
16679
- rerank: zod.z.object({
16680
- enabled: zod.z.boolean().optional(),
16681
- topN: zod.z.number().int().positive().optional(),
16682
- model: zod.z.string().optional()
16668
+ search: zod.z.object({
16669
+ semanticWeight: zod.z.number().min(0).max(1).optional(),
16670
+ inputEnrichment: zod.z.boolean().optional(),
16671
+ reranking: zod.z.boolean().optional(),
16672
+ dualSearch: zod.z.boolean().optional(),
16673
+ pageSearchWeight: zod.z.number().min(0).max(1).optional()
16683
16674
  }).optional(),
16684
16675
  ranking: zod.z.object({
16685
16676
  enableIncomingLinkBoost: zod.z.boolean().optional(),
@@ -16689,11 +16680,12 @@ var searchSocketConfigSchema = zod.z.object({
16689
16680
  aggregationDecay: zod.z.number().min(0).max(1).optional(),
16690
16681
  minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
16691
16682
  minScore: zod.z.number().min(0).max(1).optional(),
16683
+ scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
16692
16684
  weights: zod.z.object({
16693
16685
  incomingLinks: zod.z.number().optional(),
16694
16686
  depth: zod.z.number().optional(),
16695
- rerank: zod.z.number().optional(),
16696
- aggregation: zod.z.number().optional()
16687
+ aggregation: zod.z.number().optional(),
16688
+ titleMatch: zod.z.number().optional()
16697
16689
  }).optional()
16698
16690
  }).optional(),
16699
16691
  api: zod.z.object({
@@ -16715,8 +16707,7 @@ var searchSocketConfigSchema = zod.z.object({
16715
16707
  }).optional()
16716
16708
  }).optional(),
16717
16709
  state: zod.z.object({
16718
- dir: zod.z.string().optional(),
16719
- writeMirror: zod.z.boolean().optional()
16710
+ dir: zod.z.string().optional()
16720
16711
  }).optional()
16721
16712
  });
16722
16713
 
@@ -16740,6 +16731,8 @@ function createDefaultConfig(projectId) {
16740
16731
  envVar: "SEARCHSOCKET_SCOPE",
16741
16732
  sanitize: true
16742
16733
  },
16734
+ exclude: [],
16735
+ respectRobotsTxt: true,
16743
16736
  source: {
16744
16737
  mode: "static-output",
16745
16738
  staticOutputDir: "build",
@@ -16768,24 +16761,16 @@ function createDefaultConfig(projectId) {
16768
16761
  prependTitle: true,
16769
16762
  pageSummaryChunk: true
16770
16763
  },
16771
- embeddings: {
16772
- provider: "jina",
16773
- model: "jina-embeddings-v3",
16774
- apiKeyEnv: "JINA_API_KEY",
16775
- batchSize: 64,
16776
- concurrency: 4
16777
- },
16778
- vector: {
16779
- turso: {
16780
- urlEnv: "TURSO_DATABASE_URL",
16781
- authTokenEnv: "TURSO_AUTH_TOKEN",
16782
- localPath: ".searchsocket/vectors.db"
16783
- }
16764
+ upstash: {
16765
+ urlEnv: "UPSTASH_SEARCH_REST_URL",
16766
+ tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
16784
16767
  },
16785
- rerank: {
16786
- enabled: false,
16787
- topN: 20,
16788
- model: "jina-reranker-v2-base-multilingual"
16768
+ search: {
16769
+ semanticWeight: 0.75,
16770
+ inputEnrichment: true,
16771
+ reranking: true,
16772
+ dualSearch: true,
16773
+ pageSearchWeight: 0.3
16789
16774
  },
16790
16775
  ranking: {
16791
16776
  enableIncomingLinkBoost: true,
@@ -16794,12 +16779,13 @@ function createDefaultConfig(projectId) {
16794
16779
  aggregationCap: 5,
16795
16780
  aggregationDecay: 0.5,
16796
16781
  minChunkScoreRatio: 0.5,
16797
- minScore: 0,
16782
+ minScore: 0.3,
16783
+ scoreGapThreshold: 0.4,
16798
16784
  weights: {
16799
16785
  incomingLinks: 0.05,
16800
16786
  depth: 0.03,
16801
- rerank: 1,
16802
- aggregation: 0.1
16787
+ aggregation: 0.1,
16788
+ titleMatch: 0.15
16803
16789
  }
16804
16790
  },
16805
16791
  api: {
@@ -16817,8 +16803,7 @@ function createDefaultConfig(projectId) {
16817
16803
  }
16818
16804
  },
16819
16805
  state: {
16820
- dir: ".searchsocket",
16821
- writeMirror: false
16806
+ dir: ".searchsocket"
16822
16807
  }
16823
16808
  };
16824
16809
  }
@@ -16904,6 +16889,8 @@ ${issues}`
16904
16889
  ...defaults.scope,
16905
16890
  ...parsed.scope
16906
16891
  },
16892
+ exclude: parsed.exclude ?? defaults.exclude,
16893
+ respectRobotsTxt: parsed.respectRobotsTxt ?? defaults.respectRobotsTxt,
16907
16894
  source: {
16908
16895
  ...defaults.source,
16909
16896
  ...parsed.source,
@@ -16940,21 +16927,13 @@ ${issues}`
16940
16927
  ...defaults.chunking,
16941
16928
  ...parsed.chunking
16942
16929
  },
16943
- embeddings: {
16944
- ...defaults.embeddings,
16945
- ...parsed.embeddings
16930
+ upstash: {
16931
+ ...defaults.upstash,
16932
+ ...parsed.upstash
16946
16933
  },
16947
- vector: {
16948
- ...defaults.vector,
16949
- ...parsed.vector,
16950
- turso: {
16951
- ...defaults.vector.turso,
16952
- ...parsed.vector?.turso
16953
- }
16954
- },
16955
- rerank: {
16956
- ...defaults.rerank,
16957
- ...parsed.rerank
16934
+ search: {
16935
+ ...defaults.search,
16936
+ ...parsed.search
16958
16937
  },
16959
16938
  ranking: {
16960
16939
  ...defaults.ranking,
@@ -17045,128 +17024,6 @@ async function loadConfig(options = {}) {
17045
17024
  function isServerless() {
17046
17025
  return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
17047
17026
  }
17048
- function sleep(ms) {
17049
- return new Promise((resolve) => {
17050
- setTimeout(resolve, ms);
17051
- });
17052
- }
17053
- var JinaEmbeddingsProvider = class {
17054
- apiKey;
17055
- batchSize;
17056
- concurrency;
17057
- defaultTask;
17058
- constructor(options) {
17059
- if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
17060
- throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
17061
- }
17062
- if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
17063
- throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
17064
- }
17065
- this.apiKey = options.apiKey;
17066
- this.batchSize = options.batchSize;
17067
- this.concurrency = options.concurrency;
17068
- this.defaultTask = options.task ?? "retrieval.passage";
17069
- }
17070
- estimateTokens(text) {
17071
- const normalized = text.trim();
17072
- if (!normalized) {
17073
- return 0;
17074
- }
17075
- const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
17076
- const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
17077
- const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
17078
- const charEstimate = Math.ceil(normalized.length / 4);
17079
- const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
17080
- return Math.max(1, Math.max(charEstimate, lexicalEstimate));
17081
- }
17082
- async embedTexts(texts, modelId, task) {
17083
- if (texts.length === 0) {
17084
- return [];
17085
- }
17086
- const batches = [];
17087
- for (let i = 0; i < texts.length; i += this.batchSize) {
17088
- batches.push({
17089
- index: i,
17090
- values: texts.slice(i, i + this.batchSize)
17091
- });
17092
- }
17093
- const outputs = new Array(batches.length);
17094
- const limit = pLimit2__default.default(this.concurrency);
17095
- await Promise.all(
17096
- batches.map(
17097
- (batch, position) => limit(async () => {
17098
- outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
17099
- })
17100
- )
17101
- );
17102
- return outputs.flat();
17103
- }
17104
- async embedWithRetry(texts, modelId, task) {
17105
- const maxAttempts = 5;
17106
- let attempt = 0;
17107
- while (attempt < maxAttempts) {
17108
- attempt += 1;
17109
- let response;
17110
- try {
17111
- response = await fetch("https://api.jina.ai/v1/embeddings", {
17112
- method: "POST",
17113
- headers: {
17114
- "content-type": "application/json",
17115
- authorization: `Bearer ${this.apiKey}`
17116
- },
17117
- body: JSON.stringify({
17118
- model: modelId,
17119
- input: texts,
17120
- task
17121
- })
17122
- });
17123
- } catch (error) {
17124
- if (attempt >= maxAttempts) {
17125
- throw error;
17126
- }
17127
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17128
- continue;
17129
- }
17130
- if (!response.ok) {
17131
- const retryable = response.status === 429 || response.status >= 500;
17132
- if (!retryable || attempt >= maxAttempts) {
17133
- const errorBody = await response.text();
17134
- throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
17135
- }
17136
- await sleep(Math.min(2 ** attempt * 300, 5e3));
17137
- continue;
17138
- }
17139
- const payload = await response.json();
17140
- if (!payload.data || !Array.isArray(payload.data)) {
17141
- throw new Error("Invalid Jina embeddings response format");
17142
- }
17143
- return payload.data.map((entry) => entry.embedding);
17144
- }
17145
- throw new Error("Unreachable retry state");
17146
- }
17147
- };
17148
-
17149
- // src/embeddings/factory.ts
17150
- function createEmbeddingsProvider(config) {
17151
- if (config.embeddings.provider !== "jina") {
17152
- throw new SearchSocketError(
17153
- "CONFIG_MISSING",
17154
- `Unsupported embeddings provider ${config.embeddings.provider}`
17155
- );
17156
- }
17157
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17158
- if (!apiKey) {
17159
- throw new SearchSocketError(
17160
- "CONFIG_MISSING",
17161
- `Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
17162
- );
17163
- }
17164
- return new JinaEmbeddingsProvider({
17165
- apiKey,
17166
- batchSize: config.embeddings.batchSize,
17167
- concurrency: config.embeddings.concurrency
17168
- });
17169
- }
17170
17027
 
17171
17028
  // src/utils/text.ts
17172
17029
  function normalizeText(input) {
@@ -17241,103 +17098,6 @@ function resolveScope(config, override) {
17241
17098
  };
17242
17099
  }
17243
17100
 
17244
- // src/rerank/jina.ts
17245
- function sleep2(ms) {
17246
- return new Promise((resolve) => {
17247
- setTimeout(resolve, ms);
17248
- });
17249
- }
17250
- var JinaReranker = class {
17251
- apiKey;
17252
- model;
17253
- maxRetries;
17254
- constructor(options) {
17255
- this.apiKey = options.apiKey;
17256
- this.model = options.model;
17257
- this.maxRetries = options.maxRetries ?? 2;
17258
- }
17259
- async rerank(query, candidates, topN) {
17260
- if (candidates.length === 0) {
17261
- return [];
17262
- }
17263
- const body = {
17264
- model: this.model,
17265
- query,
17266
- documents: candidates.map((candidate) => candidate.text),
17267
- top_n: topN ?? candidates.length,
17268
- return_documents: false
17269
- };
17270
- let attempt = 0;
17271
- while (attempt <= this.maxRetries) {
17272
- attempt += 1;
17273
- let response;
17274
- try {
17275
- response = await fetch("https://api.jina.ai/v1/rerank", {
17276
- method: "POST",
17277
- headers: {
17278
- "content-type": "application/json",
17279
- authorization: `Bearer ${this.apiKey}`
17280
- },
17281
- body: JSON.stringify(body)
17282
- });
17283
- } catch (error) {
17284
- if (attempt <= this.maxRetries) {
17285
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17286
- continue;
17287
- }
17288
- throw error;
17289
- }
17290
- if (!response.ok) {
17291
- const retryable = response.status === 429 || response.status >= 500;
17292
- if (retryable && attempt <= this.maxRetries) {
17293
- await sleep2(Math.min(300 * 2 ** attempt, 4e3));
17294
- continue;
17295
- }
17296
- const errorBody = await response.text();
17297
- throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
17298
- }
17299
- const payload = await response.json();
17300
- const rawResults = payload.results ?? payload.data ?? [];
17301
- if (!Array.isArray(rawResults)) {
17302
- throw new Error("Invalid Jina rerank response format");
17303
- }
17304
- return rawResults.flatMap((item) => {
17305
- const index = item.index;
17306
- if (typeof index !== "number" || index < 0 || index >= candidates.length) {
17307
- return [];
17308
- }
17309
- const candidate = candidates[index];
17310
- if (!candidate) {
17311
- return [];
17312
- }
17313
- const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
17314
- return [
17315
- {
17316
- id: candidate.id,
17317
- score
17318
- }
17319
- ];
17320
- }).sort((a, b) => b.score - a.score);
17321
- }
17322
- throw new Error("Jina rerank request failed after retries");
17323
- }
17324
- };
17325
-
17326
- // src/rerank/factory.ts
17327
- function createReranker(config) {
17328
- if (!config.rerank.enabled) {
17329
- return null;
17330
- }
17331
- const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
17332
- if (!apiKey) {
17333
- return null;
17334
- }
17335
- return new JinaReranker({
17336
- apiKey,
17337
- model: config.rerank.model
17338
- });
17339
- }
17340
-
17341
17101
  // src/utils/time.ts
17342
17102
  function nowIso() {
17343
17103
  return (/* @__PURE__ */ new Date()).toISOString();
@@ -17356,13 +17116,6 @@ function normalizeUrlPath(rawPath) {
17356
17116
  }
17357
17117
  return out;
17358
17118
  }
17359
- function urlPathToMirrorRelative(urlPath) {
17360
- const normalized = normalizeUrlPath(urlPath);
17361
- if (normalized === "/") {
17362
- return "index.md";
17363
- }
17364
- return `${normalized.slice(1)}.md`;
17365
- }
17366
17119
  function staticHtmlFileToUrl(filePath, rootDir) {
17367
17120
  const relative = path__default.default.relative(rootDir, filePath).replace(/\\/g, "/");
17368
17121
  if (relative === "index.html") {
@@ -17396,434 +17149,239 @@ function joinUrl(baseUrl, route) {
17396
17149
  return `${base}${routePart}`;
17397
17150
  }
17398
17151
 
17399
- // src/vector/turso.ts
17400
- var TursoVectorStore = class {
17152
+ // src/vector/upstash.ts
17153
+ function chunkIndexName(scope) {
17154
+ return `${scope.projectId}--${scope.scopeName}`;
17155
+ }
17156
+ function pageIndexName(scope) {
17157
+ return `${scope.projectId}--${scope.scopeName}--pages`;
17158
+ }
17159
+ var UpstashSearchStore = class {
17401
17160
  client;
17402
- dimension;
17403
- chunksReady = false;
17404
- registryReady = false;
17405
- pagesReady = false;
17406
17161
  constructor(opts) {
17407
17162
  this.client = opts.client;
17408
- this.dimension = opts.dimension;
17409
- }
17410
- async ensureRegistry() {
17411
- if (this.registryReady) return;
17412
- await this.client.execute(`
17413
- CREATE TABLE IF NOT EXISTS registry (
17414
- scope_key TEXT PRIMARY KEY,
17415
- project_id TEXT NOT NULL,
17416
- scope_name TEXT NOT NULL,
17417
- model_id TEXT NOT NULL,
17418
- last_indexed_at TEXT NOT NULL,
17419
- vector_count INTEGER,
17420
- last_estimate_tokens INTEGER,
17421
- last_estimate_cost_usd REAL,
17422
- last_estimate_changed_chunks INTEGER
17423
- )
17424
- `);
17425
- const estimateCols = [
17426
- { name: "last_estimate_tokens", def: "INTEGER" },
17427
- { name: "last_estimate_cost_usd", def: "REAL" },
17428
- { name: "last_estimate_changed_chunks", def: "INTEGER" }
17429
- ];
17430
- for (const col of estimateCols) {
17431
- try {
17432
- await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
17433
- } catch (error) {
17434
- if (error instanceof Error && !error.message.includes("duplicate column")) {
17435
- throw error;
17436
- }
17437
- }
17438
- }
17439
- this.registryReady = true;
17440
- }
17441
- async ensureChunks(dim) {
17442
- if (this.chunksReady) return;
17443
- const exists = await this.chunksTableExists();
17444
- if (exists) {
17445
- const currentDim = await this.getChunksDimension();
17446
- if (currentDim !== null && currentDim !== dim) {
17447
- await this.client.batch([
17448
- "DROP INDEX IF EXISTS idx",
17449
- "DROP TABLE IF EXISTS chunks"
17450
- ]);
17451
- }
17452
- }
17453
- await this.client.batch([
17454
- `CREATE TABLE IF NOT EXISTS chunks (
17455
- id TEXT PRIMARY KEY,
17456
- project_id TEXT NOT NULL,
17457
- scope_name TEXT NOT NULL,
17458
- url TEXT NOT NULL,
17459
- path TEXT NOT NULL,
17460
- title TEXT NOT NULL,
17461
- section_title TEXT NOT NULL DEFAULT '',
17462
- heading_path TEXT NOT NULL DEFAULT '[]',
17463
- snippet TEXT NOT NULL DEFAULT '',
17464
- chunk_text TEXT NOT NULL DEFAULT '',
17465
- ordinal INTEGER NOT NULL DEFAULT 0,
17466
- content_hash TEXT NOT NULL DEFAULT '',
17467
- model_id TEXT NOT NULL DEFAULT '',
17468
- depth INTEGER NOT NULL DEFAULT 0,
17469
- incoming_links INTEGER NOT NULL DEFAULT 0,
17470
- route_file TEXT NOT NULL DEFAULT '',
17471
- tags TEXT NOT NULL DEFAULT '[]',
17472
- description TEXT NOT NULL DEFAULT '',
17473
- keywords TEXT NOT NULL DEFAULT '[]',
17474
- embedding F32_BLOB(${dim})
17475
- )`,
17476
- `CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
17477
- ]);
17478
- this.chunksReady = true;
17479
- }
17480
- async ensurePages() {
17481
- if (this.pagesReady) return;
17482
- await this.client.execute(`
17483
- CREATE TABLE IF NOT EXISTS pages (
17484
- project_id TEXT NOT NULL,
17485
- scope_name TEXT NOT NULL,
17486
- url TEXT NOT NULL,
17487
- title TEXT NOT NULL,
17488
- markdown TEXT NOT NULL,
17489
- route_file TEXT NOT NULL DEFAULT '',
17490
- route_resolution TEXT NOT NULL DEFAULT 'exact',
17491
- incoming_links INTEGER NOT NULL DEFAULT 0,
17492
- outgoing_links INTEGER NOT NULL DEFAULT 0,
17493
- depth INTEGER NOT NULL DEFAULT 0,
17494
- tags TEXT NOT NULL DEFAULT '[]',
17495
- indexed_at TEXT NOT NULL,
17496
- PRIMARY KEY (project_id, scope_name, url)
17497
- )
17498
- `);
17499
- this.pagesReady = true;
17500
17163
  }
17501
- async chunksTableExists() {
17502
- try {
17503
- await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
17504
- return true;
17505
- } catch (error) {
17506
- if (error instanceof Error && error.message.includes("no such table")) {
17507
- return false;
17508
- }
17509
- throw error;
17510
- }
17164
+ chunkIndex(scope) {
17165
+ return this.client.index(chunkIndexName(scope));
17511
17166
  }
17512
- /**
17513
- * Read the current F32_BLOB dimension from the chunks table schema.
17514
- * Returns null if the table doesn't exist or the dimension can't be parsed.
17515
- */
17516
- async getChunksDimension() {
17517
- try {
17518
- const rs = await this.client.execute(
17519
- "SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
17520
- );
17521
- if (rs.rows.length === 0) return null;
17522
- const sql = rs.rows[0].sql;
17523
- const match = sql.match(/F32_BLOB\((\d+)\)/i);
17524
- return match ? parseInt(match[1], 10) : null;
17525
- } catch {
17526
- return null;
17527
- }
17167
+ pageIndex(scope) {
17168
+ return this.client.index(pageIndexName(scope));
17528
17169
  }
17529
- /**
17530
- * Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
17531
- * Used by `clean --remote` for a full reset.
17532
- */
17533
- async dropAllTables() {
17534
- await this.client.batch([
17535
- "DROP INDEX IF EXISTS idx",
17536
- "DROP TABLE IF EXISTS chunks",
17537
- "DROP TABLE IF EXISTS registry",
17538
- "DROP TABLE IF EXISTS pages"
17539
- ]);
17540
- this.chunksReady = false;
17541
- this.registryReady = false;
17542
- this.pagesReady = false;
17543
- }
17544
- async upsert(records, _scope) {
17545
- if (records.length === 0) return;
17546
- const dim = this.dimension ?? records[0].vector.length;
17547
- await this.ensureChunks(dim);
17170
+ async upsertChunks(chunks, scope) {
17171
+ if (chunks.length === 0) return;
17172
+ const index = this.chunkIndex(scope);
17548
17173
  const BATCH_SIZE = 100;
17549
- for (let i = 0; i < records.length; i += BATCH_SIZE) {
17550
- const batch = records.slice(i, i + BATCH_SIZE);
17551
- const stmts = batch.map((r) => ({
17552
- sql: `INSERT OR REPLACE INTO chunks
17553
- (id, project_id, scope_name, url, path, title, section_title,
17554
- heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
17555
- incoming_links, route_file, tags, description, keywords, embedding)
17556
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
17557
- args: [
17558
- r.id,
17559
- r.metadata.projectId,
17560
- r.metadata.scopeName,
17561
- r.metadata.url,
17562
- r.metadata.path,
17563
- r.metadata.title,
17564
- r.metadata.sectionTitle,
17565
- JSON.stringify(r.metadata.headingPath),
17566
- r.metadata.snippet,
17567
- r.metadata.chunkText,
17568
- r.metadata.ordinal,
17569
- r.metadata.contentHash,
17570
- r.metadata.modelId,
17571
- r.metadata.depth,
17572
- r.metadata.incomingLinks,
17573
- r.metadata.routeFile,
17574
- JSON.stringify(r.metadata.tags),
17575
- r.metadata.description ?? "",
17576
- JSON.stringify(r.metadata.keywords ?? []),
17577
- JSON.stringify(r.vector)
17578
- ]
17579
- }));
17580
- await this.client.batch(stmts);
17174
+ for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
17175
+ const batch = chunks.slice(i, i + BATCH_SIZE);
17176
+ await index.upsert(batch);
17581
17177
  }
17582
17178
  }
17583
- async query(queryVector, opts, scope) {
17584
- const dim = this.dimension ?? queryVector.length;
17585
- await this.ensureChunks(dim);
17586
- const queryJson = JSON.stringify(queryVector);
17587
- const rs = await this.client.execute({
17588
- sql: `SELECT c.id, c.project_id, c.scope_name, c.url, c.path, c.title,
17589
- c.section_title, c.heading_path, c.snippet, c.chunk_text,
17590
- c.ordinal, c.content_hash,
17591
- c.model_id, c.depth, c.incoming_links, c.route_file, c.tags,
17592
- c.description, c.keywords,
17593
- vector_distance_cos(c.embedding, vector(?)) AS distance
17594
- FROM vector_top_k('idx', vector(?), ?) AS v
17595
- JOIN chunks AS c ON c.rowid = v.id`,
17596
- args: [queryJson, queryJson, opts.topK]
17179
+ async search(query, opts, scope) {
17180
+ const index = this.chunkIndex(scope);
17181
+ const results = await index.search({
17182
+ query,
17183
+ limit: opts.limit,
17184
+ semanticWeight: opts.semanticWeight,
17185
+ inputEnrichment: opts.inputEnrichment,
17186
+ reranking: opts.reranking,
17187
+ filter: opts.filter
17597
17188
  });
17598
- let hits = [];
17599
- for (const row of rs.rows) {
17600
- const projectId = row.project_id;
17601
- const scopeName = row.scope_name;
17602
- if (projectId !== scope.projectId || scopeName !== scope.scopeName) {
17603
- continue;
17189
+ return results.map((doc) => ({
17190
+ id: doc.id,
17191
+ score: doc.score,
17192
+ metadata: {
17193
+ projectId: doc.metadata?.projectId ?? "",
17194
+ scopeName: doc.metadata?.scopeName ?? "",
17195
+ url: doc.content.url,
17196
+ path: doc.metadata?.path ?? "",
17197
+ title: doc.content.title,
17198
+ sectionTitle: doc.content.sectionTitle,
17199
+ headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
17200
+ snippet: doc.metadata?.snippet ?? "",
17201
+ chunkText: doc.content.text,
17202
+ ordinal: doc.metadata?.ordinal ?? 0,
17203
+ contentHash: doc.metadata?.contentHash ?? "",
17204
+ depth: doc.metadata?.depth ?? 0,
17205
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17206
+ routeFile: doc.metadata?.routeFile ?? "",
17207
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17208
+ description: doc.metadata?.description || void 0,
17209
+ keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
17604
17210
  }
17605
- const rowPath = row.path;
17606
- if (opts.pathPrefix) {
17607
- const rawPrefix = opts.pathPrefix.startsWith("/") ? opts.pathPrefix : `/${opts.pathPrefix}`;
17608
- const prefix = rawPrefix.endsWith("/") ? rawPrefix : `${rawPrefix}/`;
17609
- const normalizedPath = rowPath.replace(/\/$/, "");
17610
- const normalizedPrefix = rawPrefix.replace(/\/$/, "");
17611
- if (normalizedPath !== normalizedPrefix && !rowPath.startsWith(prefix)) {
17612
- continue;
17613
- }
17614
- }
17615
- const tags = JSON.parse(row.tags || "[]");
17616
- if (opts.tags && opts.tags.length > 0) {
17617
- if (!opts.tags.every((t) => tags.includes(t))) {
17618
- continue;
17619
- }
17620
- }
17621
- const distance = row.distance;
17622
- const score = 1 - distance;
17623
- const description = row.description || void 0;
17624
- const keywords = (() => {
17625
- const raw = row.keywords || "[]";
17626
- const parsed = JSON.parse(raw);
17627
- return parsed.length > 0 ? parsed : void 0;
17628
- })();
17629
- hits.push({
17630
- id: row.id,
17631
- score,
17632
- metadata: {
17633
- projectId,
17634
- scopeName,
17635
- url: row.url,
17636
- path: rowPath,
17637
- title: row.title,
17638
- sectionTitle: row.section_title,
17639
- headingPath: JSON.parse(row.heading_path || "[]"),
17640
- snippet: row.snippet,
17641
- chunkText: row.chunk_text || "",
17642
- ordinal: row.ordinal || 0,
17643
- contentHash: row.content_hash,
17644
- modelId: row.model_id,
17645
- depth: row.depth,
17646
- incomingLinks: row.incoming_links,
17647
- routeFile: row.route_file,
17648
- tags,
17649
- description,
17650
- keywords
17651
- }
17211
+ }));
17212
+ }
17213
+ async searchPages(query, opts, scope) {
17214
+ const index = this.pageIndex(scope);
17215
+ let results;
17216
+ try {
17217
+ results = await index.search({
17218
+ query,
17219
+ limit: opts.limit,
17220
+ semanticWeight: opts.semanticWeight,
17221
+ inputEnrichment: opts.inputEnrichment,
17222
+ reranking: true,
17223
+ filter: opts.filter
17652
17224
  });
17225
+ } catch {
17226
+ return [];
17653
17227
  }
17654
- hits.sort((a, b) => b.score - a.score);
17655
- return hits;
17228
+ return results.map((doc) => ({
17229
+ id: doc.id,
17230
+ score: doc.score,
17231
+ title: doc.content.title,
17232
+ url: doc.content.url,
17233
+ description: doc.content.description ?? "",
17234
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17235
+ depth: doc.metadata?.depth ?? 0,
17236
+ incomingLinks: doc.metadata?.incomingLinks ?? 0,
17237
+ routeFile: doc.metadata?.routeFile ?? ""
17238
+ }));
17656
17239
  }
17657
17240
  async deleteByIds(ids, scope) {
17658
17241
  if (ids.length === 0) return;
17242
+ const index = this.chunkIndex(scope);
17659
17243
  const BATCH_SIZE = 500;
17660
17244
  for (let i = 0; i < ids.length; i += BATCH_SIZE) {
17661
17245
  const batch = ids.slice(i, i + BATCH_SIZE);
17662
- const placeholders = batch.map(() => "?").join(", ");
17663
- await this.client.execute({
17664
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
17665
- args: [scope.projectId, scope.scopeName, ...batch]
17666
- });
17246
+ await index.delete(batch);
17667
17247
  }
17668
17248
  }
17669
17249
  async deleteScope(scope) {
17670
- await this.ensureRegistry();
17671
17250
  try {
17672
- await this.client.execute({
17673
- sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ?`,
17674
- args: [scope.projectId, scope.scopeName]
17675
- });
17676
- } catch (error) {
17677
- if (error instanceof Error && !error.message.includes("no such table")) {
17678
- throw error;
17679
- }
17251
+ const chunkIdx = this.chunkIndex(scope);
17252
+ await chunkIdx.deleteIndex();
17253
+ } catch {
17680
17254
  }
17681
17255
  try {
17682
- await this.client.execute({
17683
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17684
- args: [scope.projectId, scope.scopeName]
17685
- });
17686
- } catch (error) {
17687
- if (error instanceof Error && !error.message.includes("no such table")) {
17688
- throw error;
17689
- }
17256
+ const pageIdx = this.pageIndex(scope);
17257
+ await pageIdx.deleteIndex();
17258
+ } catch {
17690
17259
  }
17691
- await this.client.execute({
17692
- sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
17693
- args: [scope.projectId, scope.scopeName]
17694
- });
17695
17260
  }
17696
- async listScopes(scopeProjectId) {
17697
- await this.ensureRegistry();
17698
- const rs = await this.client.execute({
17699
- sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
17700
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
17701
- FROM registry WHERE project_id = ?`,
17702
- args: [scopeProjectId]
17703
- });
17704
- return rs.rows.map((row) => ({
17705
- projectId: row.project_id,
17706
- scopeName: row.scope_name,
17707
- modelId: row.model_id,
17708
- lastIndexedAt: row.last_indexed_at,
17709
- vectorCount: row.vector_count,
17710
- lastEstimateTokens: row.last_estimate_tokens,
17711
- lastEstimateCostUSD: row.last_estimate_cost_usd,
17712
- lastEstimateChangedChunks: row.last_estimate_changed_chunks
17713
- }));
17714
- }
17715
- async recordScope(info) {
17716
- await this.ensureRegistry();
17717
- const key = `${info.projectId}:${info.scopeName}`;
17718
- await this.client.execute({
17719
- sql: `INSERT OR REPLACE INTO registry
17720
- (scope_key, project_id, scope_name, model_id, last_indexed_at, vector_count,
17721
- last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks)
17722
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17723
- args: [
17724
- key,
17725
- info.projectId,
17726
- info.scopeName,
17727
- info.modelId,
17728
- info.lastIndexedAt,
17729
- info.vectorCount ?? null,
17730
- info.lastEstimateTokens ?? null,
17731
- info.lastEstimateCostUSD ?? null,
17732
- info.lastEstimateChangedChunks ?? null
17733
- ]
17734
- });
17261
+ async listScopes(projectId) {
17262
+ const allIndexes = await this.client.listIndexes();
17263
+ const prefix = `${projectId}--`;
17264
+ const scopeNames = /* @__PURE__ */ new Set();
17265
+ for (const name of allIndexes) {
17266
+ if (name.startsWith(prefix) && !name.endsWith("--pages")) {
17267
+ const scopeName = name.slice(prefix.length);
17268
+ scopeNames.add(scopeName);
17269
+ }
17270
+ }
17271
+ const scopes = [];
17272
+ for (const scopeName of scopeNames) {
17273
+ const scope = {
17274
+ projectId,
17275
+ scopeName,
17276
+ scopeId: `${projectId}:${scopeName}`
17277
+ };
17278
+ try {
17279
+ const info = await this.chunkIndex(scope).info();
17280
+ scopes.push({
17281
+ projectId,
17282
+ scopeName,
17283
+ lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
17284
+ documentCount: info.documentCount
17285
+ });
17286
+ } catch {
17287
+ scopes.push({
17288
+ projectId,
17289
+ scopeName,
17290
+ lastIndexedAt: "unknown",
17291
+ documentCount: 0
17292
+ });
17293
+ }
17294
+ }
17295
+ return scopes;
17735
17296
  }
17736
17297
  async getContentHashes(scope) {
17737
- const exists = await this.chunksTableExists();
17738
- if (!exists) return /* @__PURE__ */ new Map();
17739
- const rs = await this.client.execute({
17740
- sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
17741
- args: [scope.projectId, scope.scopeName]
17742
- });
17743
17298
  const map = /* @__PURE__ */ new Map();
17744
- for (const row of rs.rows) {
17745
- map.set(row.id, row.content_hash);
17299
+ const index = this.chunkIndex(scope);
17300
+ let cursor = "0";
17301
+ try {
17302
+ for (; ; ) {
17303
+ const result = await index.range({ cursor, limit: 100 });
17304
+ for (const doc of result.documents) {
17305
+ if (doc.metadata?.contentHash) {
17306
+ map.set(doc.id, doc.metadata.contentHash);
17307
+ }
17308
+ }
17309
+ if (!result.nextCursor || result.nextCursor === "0") break;
17310
+ cursor = result.nextCursor;
17311
+ }
17312
+ } catch {
17746
17313
  }
17747
17314
  return map;
17748
17315
  }
17749
17316
  async upsertPages(pages, scope) {
17750
17317
  if (pages.length === 0) return;
17751
- await this.ensurePages();
17752
- for (const page of pages) {
17753
- if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
17754
- throw new Error(
17755
- `Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
17756
- );
17757
- }
17758
- }
17759
- const BATCH_SIZE = 100;
17318
+ const index = this.pageIndex(scope);
17319
+ const BATCH_SIZE = 50;
17760
17320
  for (let i = 0; i < pages.length; i += BATCH_SIZE) {
17761
17321
  const batch = pages.slice(i, i + BATCH_SIZE);
17762
- const stmts = batch.map((p) => ({
17763
- sql: `INSERT OR REPLACE INTO pages
17764
- (project_id, scope_name, url, title, markdown, route_file,
17765
- route_resolution, incoming_links, outgoing_links, depth, tags, indexed_at)
17766
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
17767
- args: [
17768
- p.projectId,
17769
- p.scopeName,
17770
- p.url,
17771
- p.title,
17772
- p.markdown,
17773
- p.routeFile,
17774
- p.routeResolution,
17775
- p.incomingLinks,
17776
- p.outgoingLinks,
17777
- p.depth,
17778
- JSON.stringify(p.tags),
17779
- p.indexedAt
17780
- ]
17322
+ const docs = batch.map((p) => ({
17323
+ id: p.url,
17324
+ content: {
17325
+ title: p.title,
17326
+ url: p.url,
17327
+ type: "page",
17328
+ description: p.description ?? "",
17329
+ keywords: (p.keywords ?? []).join(","),
17330
+ summary: p.summary ?? "",
17331
+ tags: p.tags.join(",")
17332
+ },
17333
+ metadata: {
17334
+ markdown: p.markdown,
17335
+ projectId: p.projectId,
17336
+ scopeName: p.scopeName,
17337
+ routeFile: p.routeFile,
17338
+ routeResolution: p.routeResolution,
17339
+ incomingLinks: p.incomingLinks,
17340
+ outgoingLinks: p.outgoingLinks,
17341
+ depth: p.depth,
17342
+ indexedAt: p.indexedAt
17343
+ }
17781
17344
  }));
17782
- await this.client.batch(stmts);
17345
+ await index.upsert(docs);
17783
17346
  }
17784
17347
  }
17785
17348
  async getPage(url, scope) {
17786
- await this.ensurePages();
17787
- const rs = await this.client.execute({
17788
- sql: `SELECT * FROM pages WHERE project_id = ? AND scope_name = ? AND url = ?`,
17789
- args: [scope.projectId, scope.scopeName, url]
17790
- });
17791
- if (rs.rows.length === 0) return null;
17792
- const row = rs.rows[0];
17793
- return {
17794
- url: row.url,
17795
- title: row.title,
17796
- markdown: row.markdown,
17797
- projectId: row.project_id,
17798
- scopeName: row.scope_name,
17799
- routeFile: row.route_file,
17800
- routeResolution: row.route_resolution,
17801
- incomingLinks: row.incoming_links,
17802
- outgoingLinks: row.outgoing_links,
17803
- depth: row.depth,
17804
- tags: JSON.parse(row.tags || "[]"),
17805
- indexedAt: row.indexed_at
17806
- };
17349
+ const index = this.pageIndex(scope);
17350
+ try {
17351
+ const results = await index.fetch([url]);
17352
+ const doc = results[0];
17353
+ if (!doc) return null;
17354
+ return {
17355
+ url: doc.content.url,
17356
+ title: doc.content.title,
17357
+ markdown: doc.metadata.markdown,
17358
+ projectId: doc.metadata.projectId,
17359
+ scopeName: doc.metadata.scopeName,
17360
+ routeFile: doc.metadata.routeFile,
17361
+ routeResolution: doc.metadata.routeResolution,
17362
+ incomingLinks: doc.metadata.incomingLinks,
17363
+ outgoingLinks: doc.metadata.outgoingLinks,
17364
+ depth: doc.metadata.depth,
17365
+ tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
17366
+ indexedAt: doc.metadata.indexedAt,
17367
+ summary: doc.content.summary || void 0,
17368
+ description: doc.content.description || void 0,
17369
+ keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
17370
+ };
17371
+ } catch {
17372
+ return null;
17373
+ }
17807
17374
  }
17808
17375
  async deletePages(scope) {
17809
- await this.ensurePages();
17810
- await this.client.execute({
17811
- sql: `DELETE FROM pages WHERE project_id = ? AND scope_name = ?`,
17812
- args: [scope.projectId, scope.scopeName]
17813
- });
17814
- }
17815
- async getScopeModelId(scope) {
17816
- await this.ensureRegistry();
17817
- const rs = await this.client.execute({
17818
- sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
17819
- args: [scope.projectId, scope.scopeName]
17820
- });
17821
- if (rs.rows.length === 0) return null;
17822
- return rs.rows[0].model_id;
17376
+ try {
17377
+ const index = this.pageIndex(scope);
17378
+ await index.reset();
17379
+ } catch {
17380
+ }
17823
17381
  }
17824
17382
  async health() {
17825
17383
  try {
17826
- await this.client.execute("SELECT 1");
17384
+ await this.client.info();
17827
17385
  return { ok: true };
17828
17386
  } catch (error) {
17829
17387
  return {
@@ -17832,40 +17390,64 @@ var TursoVectorStore = class {
17832
17390
  };
17833
17391
  }
17834
17392
  }
17393
+ async dropAllIndexes(projectId) {
17394
+ const allIndexes = await this.client.listIndexes();
17395
+ const prefix = `${projectId}--`;
17396
+ for (const name of allIndexes) {
17397
+ if (name.startsWith(prefix)) {
17398
+ try {
17399
+ const index = this.client.index(name);
17400
+ await index.deleteIndex();
17401
+ } catch {
17402
+ }
17403
+ }
17404
+ }
17405
+ }
17835
17406
  };
17836
17407
 
17837
17408
  // src/vector/factory.ts
17838
- async function createVectorStore(config, cwd) {
17839
- const turso = config.vector.turso;
17840
- const remoteUrl = turso.url ?? process.env[turso.urlEnv];
17841
- if (remoteUrl) {
17842
- const { createClient: createClient2 } = await import('@libsql/client/http');
17843
- const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
17844
- const client2 = createClient2({
17845
- url: remoteUrl,
17846
- authToken
17847
- });
17848
- return new TursoVectorStore({
17849
- client: client2,
17850
- dimension: config.vector.dimension
17851
- });
17852
- }
17853
- if (isServerless()) {
17409
+ async function createUpstashStore(config) {
17410
+ const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
17411
+ const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
17412
+ if (!url || !token) {
17854
17413
  throw new SearchSocketError(
17855
17414
  "VECTOR_BACKEND_UNAVAILABLE",
17856
- `No remote vector database URL found (checked vector.turso.url and env var "${turso.urlEnv}"). Local SQLite storage is not available in serverless environments. Set ${turso.urlEnv} or pass vector.turso.url directly.`
17415
+ `Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
17857
17416
  );
17858
17417
  }
17859
- const { createClient } = await import('@libsql/client');
17860
- const localPath = path__default.default.resolve(cwd, turso.localPath);
17861
- fs__default.default.mkdirSync(path__default.default.dirname(localPath), { recursive: true });
17862
- const client = createClient({
17863
- url: `file:${localPath}`
17864
- });
17865
- return new TursoVectorStore({
17866
- client,
17867
- dimension: config.vector.dimension
17868
- });
17418
+ const { Search } = await import('@upstash/search');
17419
+ const client = new Search({ url, token });
17420
+ return new UpstashSearchStore({ client });
17421
+ }
17422
+
17423
+ // src/utils/pattern.ts
17424
+ function matchUrlPattern(url, pattern) {
17425
+ const norm = (p) => p !== "/" && p.endsWith("/") ? p.slice(0, -1) : p;
17426
+ const normalizedUrl = norm(url);
17427
+ const normalizedPattern = norm(pattern);
17428
+ if (normalizedPattern.endsWith("/**")) {
17429
+ const prefix = normalizedPattern.slice(0, -3);
17430
+ if (prefix === "") {
17431
+ return true;
17432
+ }
17433
+ return normalizedUrl === prefix || normalizedUrl.startsWith(prefix + "/");
17434
+ }
17435
+ if (normalizedPattern.endsWith("/*")) {
17436
+ const prefix = normalizedPattern.slice(0, -2);
17437
+ if (prefix === "") {
17438
+ return normalizedUrl !== "/" && !normalizedUrl.slice(1).includes("/");
17439
+ }
17440
+ if (!normalizedUrl.startsWith(prefix + "/")) return false;
17441
+ const rest = normalizedUrl.slice(prefix.length + 1);
17442
+ return rest.length > 0 && !rest.includes("/");
17443
+ }
17444
+ return normalizedUrl === normalizedPattern;
17445
+ }
17446
+ function matchUrlPatterns(url, patterns) {
17447
+ for (const pattern of patterns) {
17448
+ if (matchUrlPattern(url, pattern)) return true;
17449
+ }
17450
+ return false;
17869
17451
  }
17870
17452
 
17871
17453
  // src/search/ranking.ts
@@ -17875,7 +17457,12 @@ function nonNegativeOrZero(value) {
17875
17457
  }
17876
17458
  return Math.max(0, value);
17877
17459
  }
17878
- function rankHits(hits, config) {
17460
+ function normalizeForTitleMatch(text) {
17461
+ return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
17462
+ }
17463
+ function rankHits(hits, config, query) {
17464
+ const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
17465
+ const titleMatchWeight = config.ranking.weights.titleMatch;
17879
17466
  return hits.map((hit) => {
17880
17467
  let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
17881
17468
  if (config.ranking.enableIncomingLinkBoost) {
@@ -17886,6 +17473,12 @@ function rankHits(hits, config) {
17886
17473
  const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
17887
17474
  score += depthBoost * config.ranking.weights.depth;
17888
17475
  }
17476
+ if (normalizedQuery && titleMatchWeight > 0) {
17477
+ const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
17478
+ if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
17479
+ score += titleMatchWeight;
17480
+ }
17481
+ }
17889
17482
  return {
17890
17483
  hit,
17891
17484
  finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
@@ -17895,22 +17488,36 @@ function rankHits(hits, config) {
17895
17488
  return Number.isNaN(delta) ? 0 : delta;
17896
17489
  });
17897
17490
  }
17898
- function findPageWeight(url, pageWeights) {
17899
- const norm = (p) => p !== "/" && p.endsWith("/") ? p.slice(0, -1) : p;
17900
- const normalizedUrl = norm(url);
17901
- for (const [pattern, weight] of Object.entries(pageWeights)) {
17902
- if (norm(pattern) === normalizedUrl) {
17903
- return weight;
17491
+ function trimByScoreGap(results, config) {
17492
+ if (results.length === 0) return results;
17493
+ const threshold = config.ranking.scoreGapThreshold;
17494
+ const minScore = config.ranking.minScore;
17495
+ if (minScore > 0 && results.length > 0) {
17496
+ const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
17497
+ const mid = Math.floor(sortedScores.length / 2);
17498
+ const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
17499
+ if (median < minScore) return [];
17500
+ }
17501
+ if (threshold > 0 && results.length > 1) {
17502
+ for (let i = 1; i < results.length; i++) {
17503
+ const prev = results[i - 1].pageScore;
17504
+ const current = results[i].pageScore;
17505
+ if (prev > 0) {
17506
+ const gap = (prev - current) / prev;
17507
+ if (gap >= threshold) {
17508
+ return results.slice(0, i);
17509
+ }
17510
+ }
17904
17511
  }
17905
17512
  }
17906
- let bestPrefix = "";
17513
+ return results;
17514
+ }
17515
+ function findPageWeight(url, pageWeights) {
17516
+ let bestPattern = "";
17907
17517
  let bestWeight = 1;
17908
17518
  for (const [pattern, weight] of Object.entries(pageWeights)) {
17909
- const normalizedPattern = norm(pattern);
17910
- if (normalizedPattern === "/") continue;
17911
- const prefix = `${normalizedPattern}/`;
17912
- if (normalizedUrl.startsWith(prefix) && prefix.length > bestPrefix.length) {
17913
- bestPrefix = prefix;
17519
+ if (matchUrlPattern(url, pattern) && pattern.length > bestPattern.length) {
17520
+ bestPattern = pattern;
17914
17521
  bestWeight = weight;
17915
17522
  }
17916
17523
  }
@@ -17959,6 +17566,61 @@ function aggregateByPage(ranked, config) {
17959
17566
  return Number.isNaN(delta) ? 0 : delta;
17960
17567
  });
17961
17568
  }
17569
+ function mergePageAndChunkResults(pageHits, rankedChunks, config) {
17570
+ if (pageHits.length === 0) return rankedChunks;
17571
+ const w = config.search.pageSearchWeight;
17572
+ const pageScoreMap = /* @__PURE__ */ new Map();
17573
+ for (const ph of pageHits) {
17574
+ pageScoreMap.set(ph.url, ph);
17575
+ }
17576
+ const pagesWithChunks = /* @__PURE__ */ new Set();
17577
+ const merged = rankedChunks.map((ranked) => {
17578
+ const url = ranked.hit.metadata.url;
17579
+ const pageHit = pageScoreMap.get(url);
17580
+ if (pageHit) {
17581
+ pagesWithChunks.add(url);
17582
+ const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
17583
+ return {
17584
+ hit: ranked.hit,
17585
+ finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
17586
+ };
17587
+ }
17588
+ return ranked;
17589
+ });
17590
+ for (const [url, pageHit] of pageScoreMap) {
17591
+ if (pagesWithChunks.has(url)) continue;
17592
+ const syntheticScore = pageHit.score * w;
17593
+ const syntheticHit = {
17594
+ id: `page:${url}`,
17595
+ score: pageHit.score,
17596
+ metadata: {
17597
+ projectId: "",
17598
+ scopeName: "",
17599
+ url: pageHit.url,
17600
+ path: pageHit.url,
17601
+ title: pageHit.title,
17602
+ sectionTitle: "",
17603
+ headingPath: [],
17604
+ snippet: pageHit.description || pageHit.title,
17605
+ chunkText: pageHit.description || pageHit.title,
17606
+ ordinal: 0,
17607
+ contentHash: "",
17608
+ depth: pageHit.depth,
17609
+ incomingLinks: pageHit.incomingLinks,
17610
+ routeFile: pageHit.routeFile,
17611
+ tags: pageHit.tags
17612
+ }
17613
+ };
17614
+ merged.push({
17615
+ hit: syntheticHit,
17616
+ finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
17617
+ });
17618
+ }
17619
+ return merged.sort((a, b) => {
17620
+ const delta = b.finalScore - a.finalScore;
17621
+ return Number.isNaN(delta) ? 0 : delta;
17622
+ });
17623
+ }
17962
17624
 
17963
17625
  // src/search/engine.ts
17964
17626
  var requestSchema = zod.z.object({
@@ -17967,34 +17629,25 @@ var requestSchema = zod.z.object({
17967
17629
  scope: zod.z.string().optional(),
17968
17630
  pathPrefix: zod.z.string().optional(),
17969
17631
  tags: zod.z.array(zod.z.string()).optional(),
17970
- rerank: zod.z.boolean().optional(),
17971
17632
  groupBy: zod.z.enum(["page", "chunk"]).optional()
17972
17633
  });
17973
17634
  var SearchEngine = class _SearchEngine {
17974
17635
  cwd;
17975
17636
  config;
17976
- embeddings;
17977
- vectorStore;
17978
- reranker;
17637
+ store;
17979
17638
  constructor(options) {
17980
17639
  this.cwd = options.cwd;
17981
17640
  this.config = options.config;
17982
- this.embeddings = options.embeddings;
17983
- this.vectorStore = options.vectorStore;
17984
- this.reranker = options.reranker;
17641
+ this.store = options.store;
17985
17642
  }
17986
17643
  static async create(options = {}) {
17987
17644
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
17988
17645
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
17989
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
17990
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
17991
- const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
17646
+ const store = options.store ?? await createUpstashStore(config);
17992
17647
  return new _SearchEngine({
17993
17648
  cwd,
17994
17649
  config,
17995
- embeddings,
17996
- vectorStore,
17997
- reranker
17650
+ store
17998
17651
  });
17999
17652
  }
18000
17653
  getConfig() {
@@ -18008,99 +17661,130 @@ var SearchEngine = class _SearchEngine {
18008
17661
  const input = parsed.data;
18009
17662
  const totalStart = process.hrtime.bigint();
18010
17663
  const resolvedScope = resolveScope(this.config, input.scope);
18011
- await this.assertModelCompatibility(resolvedScope);
18012
17664
  const topK = input.topK ?? 10;
18013
- const wantsRerank = Boolean(input.rerank);
18014
17665
  const groupByPage = (input.groupBy ?? "page") === "page";
18015
17666
  const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
18016
- const embedStart = process.hrtime.bigint();
18017
- const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
18018
- const queryVector = queryEmbeddings[0];
18019
- if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
18020
- throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
17667
+ const filterParts = [];
17668
+ if (input.pathPrefix) {
17669
+ const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
17670
+ filterParts.push(`url GLOB '${prefix}*'`);
18021
17671
  }
18022
- const embedMs = hrTimeMs(embedStart);
18023
- const vectorStart = process.hrtime.bigint();
18024
- const hits = await this.vectorStore.query(
18025
- queryVector,
18026
- {
18027
- topK: candidateK,
18028
- pathPrefix: input.pathPrefix,
18029
- tags: input.tags
18030
- },
18031
- resolvedScope
18032
- );
18033
- const vectorMs = hrTimeMs(vectorStart);
18034
- const ranked = rankHits(hits, this.config);
18035
- let usedRerank = false;
18036
- let rerankMs = 0;
18037
- let ordered = ranked;
18038
- if (wantsRerank) {
18039
- const rerankStart = process.hrtime.bigint();
18040
- ordered = await this.rerankHits(input.q, ranked, topK);
18041
- rerankMs = hrTimeMs(rerankStart);
18042
- usedRerank = true;
17672
+ if (input.tags && input.tags.length > 0) {
17673
+ for (const tag of input.tags) {
17674
+ filterParts.push(`tags GLOB '*${tag}*'`);
17675
+ }
18043
17676
  }
18044
- let results;
18045
- const minScore = this.config.ranking.minScore;
17677
+ const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
17678
+ const useDualSearch = this.config.search.dualSearch && groupByPage;
17679
+ const searchStart = process.hrtime.bigint();
17680
+ let ranked;
17681
+ if (useDualSearch) {
17682
+ const chunkLimit = Math.max(topK * 10, 100);
17683
+ const pageLimit = 20;
17684
+ const [pageHits, chunkHits] = await Promise.all([
17685
+ this.store.searchPages(
17686
+ input.q,
17687
+ {
17688
+ limit: pageLimit,
17689
+ semanticWeight: this.config.search.semanticWeight,
17690
+ inputEnrichment: this.config.search.inputEnrichment,
17691
+ filter
17692
+ },
17693
+ resolvedScope
17694
+ ),
17695
+ this.store.search(
17696
+ input.q,
17697
+ {
17698
+ limit: chunkLimit,
17699
+ semanticWeight: this.config.search.semanticWeight,
17700
+ inputEnrichment: this.config.search.inputEnrichment,
17701
+ reranking: false,
17702
+ filter
17703
+ },
17704
+ resolvedScope
17705
+ )
17706
+ ]);
17707
+ const rankedChunks = rankHits(chunkHits, this.config, input.q);
17708
+ ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
17709
+ } else {
17710
+ const hits = await this.store.search(
17711
+ input.q,
17712
+ {
17713
+ limit: candidateK,
17714
+ semanticWeight: this.config.search.semanticWeight,
17715
+ inputEnrichment: this.config.search.inputEnrichment,
17716
+ reranking: this.config.search.reranking,
17717
+ filter
17718
+ },
17719
+ resolvedScope
17720
+ );
17721
+ ranked = rankHits(hits, this.config, input.q);
17722
+ }
17723
+ const searchMs = hrTimeMs(searchStart);
17724
+ const results = this.buildResults(ranked, topK, groupByPage, input.q);
17725
+ return {
17726
+ q: input.q,
17727
+ scope: resolvedScope.scopeName,
17728
+ results,
17729
+ meta: {
17730
+ timingsMs: {
17731
+ search: Math.round(searchMs),
17732
+ total: Math.round(hrTimeMs(totalStart))
17733
+ }
17734
+ }
17735
+ };
17736
+ }
17737
+ ensureSnippet(hit) {
17738
+ const snippet = hit.hit.metadata.snippet;
17739
+ if (snippet && snippet.length >= 30) return snippet;
17740
+ const chunkText = hit.hit.metadata.chunkText;
17741
+ if (chunkText) return toSnippet(chunkText);
17742
+ return snippet || "";
17743
+ }
17744
+ buildResults(ordered, topK, groupByPage, _query) {
18046
17745
  if (groupByPage) {
18047
17746
  let pages = aggregateByPage(ordered, this.config);
18048
- if (minScore > 0) {
18049
- pages = pages.filter((p) => p.pageScore >= minScore);
18050
- }
17747
+ pages = trimByScoreGap(pages, this.config);
18051
17748
  const minRatio = this.config.ranking.minChunkScoreRatio;
18052
- results = pages.slice(0, topK).map((page) => {
17749
+ return pages.slice(0, topK).map((page) => {
18053
17750
  const bestScore = page.bestChunk.finalScore;
18054
- const minScore2 = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
18055
- const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minScore2).slice(0, 5);
17751
+ const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
17752
+ const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
18056
17753
  return {
18057
17754
  url: page.url,
18058
17755
  title: page.title,
18059
17756
  sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
18060
- snippet: page.bestChunk.hit.metadata.snippet,
17757
+ snippet: this.ensureSnippet(page.bestChunk),
18061
17758
  score: Number(page.pageScore.toFixed(6)),
18062
17759
  routeFile: page.routeFile,
18063
17760
  chunks: meaningful.length > 1 ? meaningful.map((c) => ({
18064
17761
  sectionTitle: c.hit.metadata.sectionTitle || void 0,
18065
- snippet: c.hit.metadata.snippet,
17762
+ snippet: this.ensureSnippet(c),
18066
17763
  headingPath: c.hit.metadata.headingPath,
18067
17764
  score: Number(c.finalScore.toFixed(6))
18068
17765
  })) : void 0
18069
17766
  };
18070
17767
  });
18071
17768
  } else {
17769
+ let filtered = ordered;
17770
+ const minScore = this.config.ranking.minScore;
18072
17771
  if (minScore > 0) {
18073
- ordered = ordered.filter((entry) => entry.finalScore >= minScore);
17772
+ filtered = ordered.filter((entry) => entry.finalScore >= minScore);
18074
17773
  }
18075
- results = ordered.slice(0, topK).map(({ hit, finalScore }) => ({
17774
+ return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
18076
17775
  url: hit.metadata.url,
18077
17776
  title: hit.metadata.title,
18078
17777
  sectionTitle: hit.metadata.sectionTitle || void 0,
18079
- snippet: hit.metadata.snippet,
17778
+ snippet: this.ensureSnippet({ hit, finalScore }),
18080
17779
  score: Number(finalScore.toFixed(6)),
18081
17780
  routeFile: hit.metadata.routeFile
18082
17781
  }));
18083
17782
  }
18084
- return {
18085
- q: input.q,
18086
- scope: resolvedScope.scopeName,
18087
- results,
18088
- meta: {
18089
- timingsMs: {
18090
- embed: Math.round(embedMs),
18091
- vector: Math.round(vectorMs),
18092
- rerank: Math.round(rerankMs),
18093
- total: Math.round(hrTimeMs(totalStart))
18094
- },
18095
- usedRerank,
18096
- modelId: this.config.embeddings.model
18097
- }
18098
- };
18099
17783
  }
18100
17784
  async getPage(pathOrUrl, scope) {
18101
17785
  const resolvedScope = resolveScope(this.config, scope);
18102
17786
  const urlPath = this.resolveInputPath(pathOrUrl);
18103
- const page = await this.vectorStore.getPage(urlPath, resolvedScope);
17787
+ const page = await this.store.getPage(urlPath, resolvedScope);
18104
17788
  if (!page) {
18105
17789
  throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
18106
17790
  }
@@ -18121,7 +17805,7 @@ var SearchEngine = class _SearchEngine {
18121
17805
  };
18122
17806
  }
18123
17807
  async health() {
18124
- return this.vectorStore.health();
17808
+ return this.store.health();
18125
17809
  }
18126
17810
  resolveInputPath(pathOrUrl) {
18127
17811
  try {
@@ -18133,90 +17817,6 @@ var SearchEngine = class _SearchEngine {
18133
17817
  const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
18134
17818
  return normalizeUrlPath(withoutQueryOrHash);
18135
17819
  }
18136
- async assertModelCompatibility(scope) {
18137
- const modelId = await this.vectorStore.getScopeModelId(scope);
18138
- if (modelId && modelId !== this.config.embeddings.model) {
18139
- throw new SearchSocketError(
18140
- "EMBEDDING_MODEL_MISMATCH",
18141
- `Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
18142
- );
18143
- }
18144
- }
18145
- async rerankHits(query, ranked, topK) {
18146
- if (!this.config.rerank.enabled) {
18147
- throw new SearchSocketError(
18148
- "INVALID_REQUEST",
18149
- "rerank=true requested but rerank.enabled is not set to true.",
18150
- 400
18151
- );
18152
- }
18153
- if (!this.reranker) {
18154
- throw new SearchSocketError(
18155
- "CONFIG_MISSING",
18156
- `rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
18157
- 400
18158
- );
18159
- }
18160
- const pageGroups = /* @__PURE__ */ new Map();
18161
- for (const entry of ranked) {
18162
- const url = entry.hit.metadata.url;
18163
- const group = pageGroups.get(url);
18164
- if (group) group.push(entry);
18165
- else pageGroups.set(url, [entry]);
18166
- }
18167
- const MAX_CHUNKS_PER_PAGE = 5;
18168
- const MIN_CHUNKS_PER_PAGE = 1;
18169
- const MIN_CHUNK_SCORE_RATIO = 0.5;
18170
- const MAX_DOC_CHARS = 2e3;
18171
- const pageCandidates = [];
18172
- for (const [url, chunks] of pageGroups) {
18173
- const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
18174
- const bestScore = byScore[0].finalScore;
18175
- const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
18176
- const selected = byScore.filter(
18177
- (c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
18178
- ).slice(0, MAX_CHUNKS_PER_PAGE);
18179
- selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
18180
- const first = selected[0].hit.metadata;
18181
- const parts = [first.title];
18182
- if (first.description) {
18183
- parts.push(first.description);
18184
- }
18185
- if (first.keywords && first.keywords.length > 0) {
18186
- parts.push(first.keywords.join(", "));
18187
- }
18188
- const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
18189
- parts.push(body);
18190
- let text = parts.join("\n\n");
18191
- if (text.length > MAX_DOC_CHARS) {
18192
- text = text.slice(0, MAX_DOC_CHARS);
18193
- }
18194
- pageCandidates.push({ id: url, text });
18195
- }
18196
- const maxCandidates = Math.max(topK, this.config.rerank.topN);
18197
- const cappedCandidates = pageCandidates.slice(0, maxCandidates);
18198
- const reranked = await this.reranker.rerank(
18199
- query,
18200
- cappedCandidates,
18201
- maxCandidates
18202
- );
18203
- const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
18204
- return ranked.map((entry) => {
18205
- const pageScore = scoreByUrl.get(entry.hit.metadata.url);
18206
- const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
18207
- if (pageScore === void 0 || !Number.isFinite(pageScore)) {
18208
- return { ...entry, finalScore: base };
18209
- }
18210
- const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
18211
- return {
18212
- ...entry,
18213
- finalScore: Number.isFinite(combined) ? combined : base
18214
- };
18215
- }).sort((a, b) => {
18216
- const delta = b.finalScore - a.finalScore;
18217
- return Number.isNaN(delta) ? 0 : delta;
18218
- });
18219
- }
18220
17820
  };
18221
17821
 
18222
17822
  // src/sveltekit/handle.ts
@@ -18370,7 +17970,8 @@ function searchsocketHandle(options = {}) {
18370
17970
  throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
18371
17971
  }
18372
17972
  const engine = await getEngine();
18373
- const result = await engine.search(body);
17973
+ const searchRequest = body;
17974
+ const result = await engine.search(searchRequest);
18374
17975
  return withCors(
18375
17976
  new Response(JSON.stringify(result), {
18376
17977
  status: 200,
@@ -18430,9 +18031,8 @@ function withCors(response, request, config) {
18430
18031
  }
18431
18032
  function ensureStateDirs(cwd, stateDir, scope) {
18432
18033
  const statePath = path__default.default.resolve(cwd, stateDir);
18433
- const pagesPath = path__default.default.join(statePath, "pages", scope.scopeName);
18434
- fs__default.default.mkdirSync(pagesPath, { recursive: true });
18435
- return { statePath, pagesPath };
18034
+ fs__default.default.mkdirSync(statePath, { recursive: true });
18035
+ return { statePath };
18436
18036
  }
18437
18037
  function sha1(input) {
18438
18038
  return crypto.createHash("sha1").update(input).digest("hex");
@@ -18682,7 +18282,7 @@ function buildEmbeddingText(chunk, prependTitle) {
18682
18282
 
18683
18283
  ${chunk.chunkText}`;
18684
18284
  }
18685
- function chunkMirrorPage(page, config, scope) {
18285
+ function chunkPage(page, config, scope) {
18686
18286
  const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
18687
18287
  const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
18688
18288
  const chunks = [];
@@ -19599,6 +19199,17 @@ function extractFromHtml(url, html, config) {
19599
19199
  if ($(`[${config.extract.noindexAttr}]`).length > 0) {
19600
19200
  return null;
19601
19201
  }
19202
+ const weightRaw = $("meta[name='searchsocket-weight']").attr("content")?.trim();
19203
+ let weight;
19204
+ if (weightRaw !== void 0) {
19205
+ const parsed = Number(weightRaw);
19206
+ if (Number.isFinite(parsed) && parsed >= 0) {
19207
+ weight = parsed;
19208
+ }
19209
+ }
19210
+ if (weight === 0) {
19211
+ return null;
19212
+ }
19602
19213
  const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
19603
19214
  const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
19604
19215
  const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
@@ -19654,7 +19265,8 @@ function extractFromHtml(url, html, config) {
19654
19265
  noindex: false,
19655
19266
  tags,
19656
19267
  description,
19657
- keywords
19268
+ keywords,
19269
+ weight
19658
19270
  };
19659
19271
  }
19660
19272
  function extractFromMarkdown(url, markdown, title) {
@@ -19667,6 +19279,14 @@ function extractFromMarkdown(url, markdown, title) {
19667
19279
  if (frontmatter.noindex === true || searchsocketMeta?.noindex === true) {
19668
19280
  return null;
19669
19281
  }
19282
+ let mdWeight;
19283
+ const rawWeight = searchsocketMeta?.weight ?? frontmatter.searchsocketWeight;
19284
+ if (typeof rawWeight === "number" && Number.isFinite(rawWeight) && rawWeight >= 0) {
19285
+ mdWeight = rawWeight;
19286
+ }
19287
+ if (mdWeight === 0) {
19288
+ return null;
19289
+ }
19670
19290
  const content = parsed.content;
19671
19291
  const normalized = normalizeMarkdown(content);
19672
19292
  if (!normalizeText(normalized)) {
@@ -19689,56 +19309,10 @@ function extractFromMarkdown(url, markdown, title) {
19689
19309
  noindex: false,
19690
19310
  tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
19691
19311
  description: fmDescription,
19692
- keywords: fmKeywords
19312
+ keywords: fmKeywords,
19313
+ weight: mdWeight
19693
19314
  };
19694
19315
  }
19695
- function yamlString(value) {
19696
- return JSON.stringify(value);
19697
- }
19698
- function yamlArray(values) {
19699
- return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
19700
- }
19701
- function buildMirrorMarkdown(page) {
19702
- const frontmatterLines = [
19703
- "---",
19704
- `url: ${yamlString(page.url)}`,
19705
- `title: ${yamlString(page.title)}`,
19706
- `scope: ${yamlString(page.scope)}`,
19707
- `routeFile: ${yamlString(page.routeFile)}`,
19708
- `routeResolution: ${yamlString(page.routeResolution)}`,
19709
- `generatedAt: ${yamlString(page.generatedAt)}`,
19710
- `incomingLinks: ${page.incomingLinks}`,
19711
- `outgoingLinks: ${page.outgoingLinks}`,
19712
- `depth: ${page.depth}`,
19713
- `tags: ${yamlArray(page.tags)}`,
19714
- "---",
19715
- ""
19716
- ];
19717
- return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
19718
- }
19719
- function stripGeneratedAt(content) {
19720
- return content.replace(/^generatedAt: .*$/m, "");
19721
- }
19722
- async function writeMirrorPage(statePath, scope, page) {
19723
- const relative = urlPathToMirrorRelative(page.url);
19724
- const outputPath = path__default.default.join(statePath, "pages", scope.scopeName, relative);
19725
- await fs4__default.default.mkdir(path__default.default.dirname(outputPath), { recursive: true });
19726
- const newContent = buildMirrorMarkdown(page);
19727
- try {
19728
- const existing = await fs4__default.default.readFile(outputPath, "utf8");
19729
- if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
19730
- return outputPath;
19731
- }
19732
- } catch {
19733
- }
19734
- await fs4__default.default.writeFile(outputPath, newContent, "utf8");
19735
- return outputPath;
19736
- }
19737
- async function cleanMirrorForScope(statePath, scope) {
19738
- const target = path__default.default.join(statePath, "pages", scope.scopeName);
19739
- await fs4__default.default.rm(target, { recursive: true, force: true });
19740
- await fs4__default.default.mkdir(target, { recursive: true });
19741
- }
19742
19316
  function segmentToRegex(segment) {
19743
19317
  if (segment.startsWith("(") && segment.endsWith(")")) {
19744
19318
  return { regex: "", score: 0 };
@@ -19899,7 +19473,7 @@ async function parseManifest(cwd, outputDir) {
19899
19473
  const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
19900
19474
  let content;
19901
19475
  try {
19902
- content = await fs4__default.default.readFile(manifestPath, "utf8");
19476
+ content = await fs3__default.default.readFile(manifestPath, "utf8");
19903
19477
  } catch {
19904
19478
  throw new SearchSocketError(
19905
19479
  "BUILD_MANIFEST_NOT_FOUND",
@@ -19958,15 +19532,7 @@ function expandDynamicUrl(url, value) {
19958
19532
  return url.replace(/\[\[?\.\.\.[^\]]+\]?\]|\[\[[^\]]+\]\]|\[[^\]]+\]/g, value);
19959
19533
  }
19960
19534
  function isExcluded(url, patterns) {
19961
- for (const pattern of patterns) {
19962
- if (pattern.endsWith("/*")) {
19963
- const prefix = pattern.slice(0, -1);
19964
- if (url.startsWith(prefix) || url === prefix.slice(0, -1)) return true;
19965
- } else if (url === pattern) {
19966
- return true;
19967
- }
19968
- }
19969
- return false;
19535
+ return matchUrlPatterns(url, patterns);
19970
19536
  }
19971
19537
  function findFreePort() {
19972
19538
  return new Promise((resolve, reject) => {
@@ -20080,7 +19646,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
20080
19646
  const visited = /* @__PURE__ */ new Set();
20081
19647
  const pages = [];
20082
19648
  const queue = [];
20083
- const limit = pLimit2__default.default(8);
19649
+ const limit = pLimit__default.default(8);
20084
19650
  for (const seed of seedUrls) {
20085
19651
  const normalized = normalizeUrlPath(seed);
20086
19652
  if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
@@ -20162,7 +19728,7 @@ async function loadBuildPages(cwd, config, maxPages) {
20162
19728
  const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
20163
19729
  const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
20164
19730
  try {
20165
- const concurrencyLimit = pLimit2__default.default(8);
19731
+ const concurrencyLimit = pLimit__default.default(8);
20166
19732
  const results = await Promise.allSettled(
20167
19733
  selected.map(
20168
19734
  (route) => concurrencyLimit(async () => {
@@ -20236,7 +19802,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
20236
19802
  const selected = typeof limit === "number" ? files.slice(0, limit) : files;
20237
19803
  const pages = [];
20238
19804
  for (const filePath of selected) {
20239
- const raw = await fs4__default.default.readFile(filePath, "utf8");
19805
+ const raw = await fs3__default.default.readFile(filePath, "utf8");
20240
19806
  const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
20241
19807
  pages.push({
20242
19808
  url: filePathToUrl(filePath, baseDir),
@@ -20331,7 +19897,7 @@ async function loadCrawledPages(config, maxPages) {
20331
19897
  const routes = await resolveRoutes(config);
20332
19898
  const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
20333
19899
  const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
20334
- const concurrencyLimit = pLimit2__default.default(8);
19900
+ const concurrencyLimit = pLimit__default.default(8);
20335
19901
  const results = await Promise.allSettled(
20336
19902
  selected.map(
20337
19903
  (route) => concurrencyLimit(async () => {
@@ -20372,7 +19938,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
20372
19938
  const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
20373
19939
  const pages = [];
20374
19940
  for (const filePath of selected) {
20375
- const html = await fs4__default.default.readFile(filePath, "utf8");
19941
+ const html = await fs3__default.default.readFile(filePath, "utf8");
20376
19942
  pages.push({
20377
19943
  url: staticHtmlFileToUrl(filePath, outputDir),
20378
19944
  html,
@@ -20382,35 +19948,113 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
20382
19948
  }
20383
19949
  return pages;
20384
19950
  }
19951
+ function parseRobotsTxt(content, userAgent = "Searchsocket") {
19952
+ const lines = content.split(/\r?\n/);
19953
+ const agentGroups = /* @__PURE__ */ new Map();
19954
+ let currentAgents = [];
19955
+ for (const rawLine of lines) {
19956
+ const line = rawLine.replace(/#.*$/, "").trim();
19957
+ if (!line) continue;
19958
+ const colonIdx = line.indexOf(":");
19959
+ if (colonIdx === -1) continue;
19960
+ const directive = line.slice(0, colonIdx).trim().toLowerCase();
19961
+ const value = line.slice(colonIdx + 1).trim();
19962
+ if (directive === "user-agent") {
19963
+ const agentName = value.toLowerCase();
19964
+ currentAgents.push(agentName);
19965
+ if (!agentGroups.has(agentName)) {
19966
+ agentGroups.set(agentName, { disallow: [], allow: [] });
19967
+ }
19968
+ } else if (directive === "disallow" && value && currentAgents.length > 0) {
19969
+ for (const agent of currentAgents) {
19970
+ agentGroups.get(agent).disallow.push(value);
19971
+ }
19972
+ } else if (directive === "allow" && value && currentAgents.length > 0) {
19973
+ for (const agent of currentAgents) {
19974
+ agentGroups.get(agent).allow.push(value);
19975
+ }
19976
+ } else if (directive !== "disallow" && directive !== "allow") {
19977
+ currentAgents = [];
19978
+ }
19979
+ }
19980
+ const specific = agentGroups.get(userAgent.toLowerCase());
19981
+ if (specific && (specific.disallow.length > 0 || specific.allow.length > 0)) {
19982
+ return specific;
19983
+ }
19984
+ return agentGroups.get("*") ?? { disallow: [], allow: [] };
19985
+ }
19986
+ function isBlockedByRobots(urlPath, rules3) {
19987
+ let longestDisallow = "";
19988
+ for (const pattern of rules3.disallow) {
19989
+ if (urlPath.startsWith(pattern) && pattern.length > longestDisallow.length) {
19990
+ longestDisallow = pattern;
19991
+ }
19992
+ }
19993
+ if (!longestDisallow) return false;
19994
+ let longestAllow = "";
19995
+ for (const pattern of rules3.allow) {
19996
+ if (urlPath.startsWith(pattern) && pattern.length > longestAllow.length) {
19997
+ longestAllow = pattern;
19998
+ }
19999
+ }
20000
+ return longestAllow.length < longestDisallow.length;
20001
+ }
20002
+ async function loadRobotsTxtFromDir(dir) {
20003
+ try {
20004
+ const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
20005
+ return parseRobotsTxt(content);
20006
+ } catch {
20007
+ return null;
20008
+ }
20009
+ }
20010
+ async function fetchRobotsTxt(baseUrl) {
20011
+ try {
20012
+ const url = new URL("/robots.txt", baseUrl).href;
20013
+ const response = await fetch(url);
20014
+ if (!response.ok) return null;
20015
+ const content = await response.text();
20016
+ return parseRobotsTxt(content);
20017
+ } catch {
20018
+ return null;
20019
+ }
20020
+ }
20385
20021
 
20386
20022
  // src/indexing/pipeline.ts
20387
- var EMBEDDING_PRICE_PER_1K_TOKENS_USD = {
20388
- "jina-embeddings-v3": 2e-5
20389
- };
20390
- var DEFAULT_EMBEDDING_PRICE_PER_1K = 2e-5;
20023
+ function buildPageSummary(page, maxChars = 3500) {
20024
+ const parts = [page.title];
20025
+ if (page.description) {
20026
+ parts.push(page.description);
20027
+ }
20028
+ if (page.keywords && page.keywords.length > 0) {
20029
+ parts.push(page.keywords.join(", "));
20030
+ }
20031
+ const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
20032
+ if (plainBody) {
20033
+ parts.push(plainBody);
20034
+ }
20035
+ const joined = parts.join("\n\n");
20036
+ if (joined.length <= maxChars) return joined;
20037
+ return joined.slice(0, maxChars).trim();
20038
+ }
20391
20039
  var IndexPipeline = class _IndexPipeline {
20392
20040
  cwd;
20393
20041
  config;
20394
- embeddings;
20395
- vectorStore;
20042
+ store;
20396
20043
  logger;
20397
20044
  constructor(options) {
20398
20045
  this.cwd = options.cwd;
20399
20046
  this.config = options.config;
20400
- this.embeddings = options.embeddings;
20401
- this.vectorStore = options.vectorStore;
20047
+ this.store = options.store;
20402
20048
  this.logger = options.logger;
20403
20049
  }
20404
20050
  static async create(options = {}) {
20405
20051
  const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
20406
20052
  const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
20407
- const embeddings = options.embeddingsProvider ?? createEmbeddingsProvider(config);
20408
- const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
20053
+ const store = options.store ?? await createUpstashStore(config);
20409
20054
  return new _IndexPipeline({
20410
20055
  cwd,
20411
20056
  config,
20412
- embeddings,
20413
- vectorStore,
20057
+ store,
20414
20058
  logger: options.logger ?? new Logger()
20415
20059
  });
20416
20060
  }
@@ -20430,25 +20074,17 @@ var IndexPipeline = class _IndexPipeline {
20430
20074
  stageTimingsMs[name] = Math.round(hrTimeMs(start));
20431
20075
  };
20432
20076
  const scope = resolveScope(this.config, options.scopeOverride);
20433
- const { statePath } = ensureStateDirs(this.cwd, this.config.state.dir, scope);
20077
+ ensureStateDirs(this.cwd, this.config.state.dir);
20434
20078
  const sourceMode = options.sourceOverride ?? this.config.source.mode;
20435
- this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, model: ${this.config.embeddings.model})`);
20079
+ this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
20436
20080
  if (options.force) {
20437
20081
  this.logger.info("Force mode enabled \u2014 full rebuild");
20438
- await cleanMirrorForScope(statePath, scope);
20439
20082
  }
20440
20083
  if (options.dryRun) {
20441
20084
  this.logger.info("Dry run \u2014 no writes will be performed");
20442
20085
  }
20443
20086
  const manifestStart = stageStart();
20444
- const existingHashes = await this.vectorStore.getContentHashes(scope);
20445
- const existingModelId = await this.vectorStore.getScopeModelId(scope);
20446
- if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
20447
- throw new SearchSocketError(
20448
- "EMBEDDING_MODEL_MISMATCH",
20449
- `Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
20450
- );
20451
- }
20087
+ const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
20452
20088
  stageEnd("manifest", manifestStart);
20453
20089
  this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
20454
20090
  const sourceStart = stageStart();
@@ -20465,6 +20101,53 @@ var IndexPipeline = class _IndexPipeline {
20465
20101
  }
20466
20102
  stageEnd("source", sourceStart);
20467
20103
  this.logger.info(`Loaded ${sourcePages.length} page${sourcePages.length === 1 ? "" : "s"} (${stageTimingsMs["source"]}ms)`);
20104
+ const filterStart = stageStart();
20105
+ let filteredSourcePages = sourcePages;
20106
+ if (this.config.exclude.length > 0) {
20107
+ const beforeExclude = filteredSourcePages.length;
20108
+ filteredSourcePages = filteredSourcePages.filter((p) => {
20109
+ const url = normalizeUrlPath(p.url);
20110
+ if (matchUrlPatterns(url, this.config.exclude)) {
20111
+ this.logger.debug(`Excluding ${url} (matched exclude pattern)`);
20112
+ return false;
20113
+ }
20114
+ return true;
20115
+ });
20116
+ const excludedCount = beforeExclude - filteredSourcePages.length;
20117
+ if (excludedCount > 0) {
20118
+ this.logger.info(`Excluded ${excludedCount} page${excludedCount === 1 ? "" : "s"} by config exclude patterns`);
20119
+ }
20120
+ }
20121
+ if (this.config.respectRobotsTxt) {
20122
+ let robotsRules = null;
20123
+ if (sourceMode === "static-output") {
20124
+ robotsRules = await loadRobotsTxtFromDir(
20125
+ path__default.default.resolve(this.cwd, this.config.source.staticOutputDir)
20126
+ );
20127
+ } else if (sourceMode === "build" && this.config.source.build) {
20128
+ robotsRules = await loadRobotsTxtFromDir(
20129
+ path__default.default.resolve(this.cwd, this.config.source.build.outputDir)
20130
+ );
20131
+ } else if (sourceMode === "crawl" && this.config.source.crawl) {
20132
+ robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
20133
+ }
20134
+ if (robotsRules) {
20135
+ const beforeRobots = filteredSourcePages.length;
20136
+ filteredSourcePages = filteredSourcePages.filter((p) => {
20137
+ const url = normalizeUrlPath(p.url);
20138
+ if (isBlockedByRobots(url, robotsRules)) {
20139
+ this.logger.debug(`Excluding ${url} (blocked by robots.txt)`);
20140
+ return false;
20141
+ }
20142
+ return true;
20143
+ });
20144
+ const robotsExcluded = beforeRobots - filteredSourcePages.length;
20145
+ if (robotsExcluded > 0) {
20146
+ this.logger.info(`Excluded ${robotsExcluded} page${robotsExcluded === 1 ? "" : "s"} by robots.txt`);
20147
+ }
20148
+ }
20149
+ }
20150
+ stageEnd("filter", filterStart);
20468
20151
  const routeStart = stageStart();
20469
20152
  const routePatterns = await buildRoutePatterns(this.cwd);
20470
20153
  stageEnd("route_map", routeStart);
@@ -20472,7 +20155,7 @@ var IndexPipeline = class _IndexPipeline {
20472
20155
  const extractStart = stageStart();
20473
20156
  this.logger.info("Extracting content...");
20474
20157
  const extractedPages = [];
20475
- for (const sourcePage of sourcePages) {
20158
+ for (const sourcePage of filteredSourcePages) {
20476
20159
  const extracted = sourcePage.html ? extractFromHtml(sourcePage.url, sourcePage.html, this.config) : extractFromMarkdown(sourcePage.url, sourcePage.markdown ?? "", sourcePage.title);
20477
20160
  if (!extracted) {
20478
20161
  this.logger.warn(
@@ -20498,16 +20181,29 @@ var IndexPipeline = class _IndexPipeline {
20498
20181
  seenUrls.add(page.url);
20499
20182
  uniquePages.push(page);
20500
20183
  }
20184
+ const indexablePages = [];
20185
+ for (const page of uniquePages) {
20186
+ const effectiveWeight = page.weight ?? findPageWeight(page.url, this.config.ranking.pageWeights);
20187
+ if (effectiveWeight === 0) {
20188
+ this.logger.debug(`Excluding ${page.url} (zero weight)`);
20189
+ continue;
20190
+ }
20191
+ indexablePages.push(page);
20192
+ }
20193
+ const zeroWeightCount = uniquePages.length - indexablePages.length;
20194
+ if (zeroWeightCount > 0) {
20195
+ this.logger.info(`Excluded ${zeroWeightCount} page${zeroWeightCount === 1 ? "" : "s"} with zero weight`);
20196
+ }
20501
20197
  stageEnd("extract", extractStart);
20502
- const skippedPages = sourcePages.length - uniquePages.length;
20503
- this.logger.info(`Extracted ${uniquePages.length} page${uniquePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
20198
+ const skippedPages = filteredSourcePages.length - indexablePages.length;
20199
+ this.logger.info(`Extracted ${indexablePages.length} page${indexablePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
20504
20200
  const linkStart = stageStart();
20505
- const pageSet = new Set(uniquePages.map((page) => normalizeUrlPath(page.url)));
20201
+ const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
20506
20202
  const incomingLinkCount = /* @__PURE__ */ new Map();
20507
- for (const page of uniquePages) {
20203
+ for (const page of indexablePages) {
20508
20204
  incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
20509
20205
  }
20510
- for (const page of uniquePages) {
20206
+ for (const page of indexablePages) {
20511
20207
  for (const outgoing of page.outgoingLinks) {
20512
20208
  if (!pageSet.has(outgoing)) {
20513
20209
  continue;
@@ -20517,9 +20213,9 @@ var IndexPipeline = class _IndexPipeline {
20517
20213
  }
20518
20214
  stageEnd("links", linkStart);
20519
20215
  this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
20520
- const mirrorStart = stageStart();
20521
- this.logger.info("Writing mirror pages...");
20522
- const mirrorPages = [];
20216
+ const pagesStart = stageStart();
20217
+ this.logger.info("Building indexed pages...");
20218
+ const pages = [];
20523
20219
  let routeExact = 0;
20524
20220
  let routeBestEffort = 0;
20525
20221
  const precomputedRoutes = /* @__PURE__ */ new Map();
@@ -20531,7 +20227,7 @@ var IndexPipeline = class _IndexPipeline {
20531
20227
  });
20532
20228
  }
20533
20229
  }
20534
- for (const page of uniquePages) {
20230
+ for (const page of indexablePages) {
20535
20231
  const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
20536
20232
  if (routeMatch.routeResolution === "best-effort") {
20537
20233
  if (this.config.source.strictRouteMapping) {
@@ -20548,7 +20244,7 @@ var IndexPipeline = class _IndexPipeline {
20548
20244
  } else {
20549
20245
  routeExact += 1;
20550
20246
  }
20551
- const mirror = {
20247
+ const indexedPage = {
20552
20248
  url: page.url,
20553
20249
  title: page.title,
20554
20250
  scope: scope.scopeName,
@@ -20563,35 +20259,38 @@ var IndexPipeline = class _IndexPipeline {
20563
20259
  description: page.description,
20564
20260
  keywords: page.keywords
20565
20261
  };
20566
- mirrorPages.push(mirror);
20567
- if (this.config.state.writeMirror) {
20568
- await writeMirrorPage(statePath, scope, mirror);
20569
- }
20570
- this.logger.event("markdown_written", { url: page.url });
20262
+ pages.push(indexedPage);
20263
+ this.logger.event("page_indexed", { url: page.url });
20571
20264
  }
20572
20265
  if (!options.dryRun) {
20573
- const pageRecords = mirrorPages.map((mp) => ({
20574
- url: mp.url,
20575
- title: mp.title,
20576
- markdown: mp.markdown,
20577
- projectId: scope.projectId,
20578
- scopeName: scope.scopeName,
20579
- routeFile: mp.routeFile,
20580
- routeResolution: mp.routeResolution,
20581
- incomingLinks: mp.incomingLinks,
20582
- outgoingLinks: mp.outgoingLinks,
20583
- depth: mp.depth,
20584
- tags: mp.tags,
20585
- indexedAt: mp.generatedAt
20586
- }));
20587
- await this.vectorStore.deletePages(scope);
20588
- await this.vectorStore.upsertPages(pageRecords, scope);
20266
+ const pageRecords = pages.map((p) => {
20267
+ const summary = buildPageSummary(p);
20268
+ return {
20269
+ url: p.url,
20270
+ title: p.title,
20271
+ markdown: p.markdown,
20272
+ projectId: scope.projectId,
20273
+ scopeName: scope.scopeName,
20274
+ routeFile: p.routeFile,
20275
+ routeResolution: p.routeResolution,
20276
+ incomingLinks: p.incomingLinks,
20277
+ outgoingLinks: p.outgoingLinks,
20278
+ depth: p.depth,
20279
+ tags: p.tags,
20280
+ indexedAt: p.generatedAt,
20281
+ summary,
20282
+ description: p.description,
20283
+ keywords: p.keywords
20284
+ };
20285
+ });
20286
+ await this.store.deletePages(scope);
20287
+ await this.store.upsertPages(pageRecords, scope);
20589
20288
  }
20590
- stageEnd("mirror", mirrorStart);
20591
- this.logger.info(`Mirrored ${mirrorPages.length} page${mirrorPages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["mirror"]}ms)`);
20289
+ stageEnd("pages", pagesStart);
20290
+ this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
20592
20291
  const chunkStart = stageStart();
20593
20292
  this.logger.info("Chunking pages...");
20594
- let chunks = mirrorPages.flatMap((page) => chunkMirrorPage(page, this.config, scope));
20293
+ let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
20595
20294
  const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
20596
20295
  if (typeof maxChunks === "number") {
20597
20296
  chunks = chunks.slice(0, maxChunks);
@@ -20623,125 +20322,59 @@ var IndexPipeline = class _IndexPipeline {
20623
20322
  });
20624
20323
  const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
20625
20324
  this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
20626
- const embedStart = stageStart();
20627
- const chunkTokenEstimates = /* @__PURE__ */ new Map();
20628
- for (const chunk of changedChunks) {
20629
- chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
20630
- }
20631
- const estimatedTokens = changedChunks.reduce(
20632
- (sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
20633
- 0
20634
- );
20635
- const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
20636
- const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
20637
- let newEmbeddings = 0;
20638
- const vectorsByChunk = /* @__PURE__ */ new Map();
20325
+ const upsertStart = stageStart();
20326
+ let documentsUpserted = 0;
20639
20327
  if (!options.dryRun && changedChunks.length > 0) {
20640
- this.logger.info(`Embedding ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} (~${estimatedTokens.toLocaleString()} tokens, ~$${estimatedCostUSD.toFixed(6)})...`);
20641
- const embeddings = await this.embeddings.embedTexts(
20642
- changedChunks.map((chunk) => buildEmbeddingText(chunk, this.config.chunking.prependTitle)),
20643
- this.config.embeddings.model,
20644
- "retrieval.passage"
20645
- );
20646
- if (embeddings.length !== changedChunks.length) {
20647
- throw new SearchSocketError(
20648
- "VECTOR_BACKEND_UNAVAILABLE",
20649
- `Embedding provider returned ${embeddings.length} vectors for ${changedChunks.length} chunks.`
20650
- );
20651
- }
20652
- for (let i = 0; i < changedChunks.length; i += 1) {
20653
- const chunk = changedChunks[i];
20654
- const embedding = embeddings[i];
20655
- if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
20656
- throw new SearchSocketError(
20657
- "VECTOR_BACKEND_UNAVAILABLE",
20658
- `Embedding provider returned an invalid vector for chunk index ${i}.`
20659
- );
20660
- }
20661
- vectorsByChunk.set(chunk.chunkKey, embedding);
20662
- newEmbeddings += 1;
20663
- this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
20664
- }
20665
- }
20666
- stageEnd("embedding", embedStart);
20667
- if (changedChunks.length > 0) {
20668
- this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
20669
- } else {
20670
- this.logger.info("No chunks to embed \u2014 all up to date");
20671
- }
20672
- const syncStart = stageStart();
20673
- if (!options.dryRun) {
20674
- this.logger.info("Syncing vectors...");
20675
- const upserts = [];
20676
- for (const chunk of changedChunks) {
20677
- const vector = vectorsByChunk.get(chunk.chunkKey);
20678
- if (!vector) {
20679
- continue;
20680
- }
20681
- upserts.push({
20328
+ this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
20329
+ const UPSTASH_CONTENT_LIMIT = 4096;
20330
+ const docs = changedChunks.map((chunk) => {
20331
+ const title = chunk.title;
20332
+ const sectionTitle = chunk.sectionTitle ?? "";
20333
+ const url = chunk.url;
20334
+ const tags = chunk.tags.join(",");
20335
+ const headingPath = chunk.headingPath.join(" > ");
20336
+ const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
20337
+ const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
20338
+ const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
20339
+ return {
20682
20340
  id: chunk.chunkKey,
20683
- vector,
20341
+ content: { title, sectionTitle, text, url, tags, headingPath },
20684
20342
  metadata: {
20685
20343
  projectId: scope.projectId,
20686
20344
  scopeName: scope.scopeName,
20687
- url: chunk.url,
20688
20345
  path: chunk.path,
20689
- title: chunk.title,
20690
- sectionTitle: chunk.sectionTitle ?? "",
20691
- headingPath: chunk.headingPath,
20692
20346
  snippet: chunk.snippet,
20693
- chunkText: chunk.chunkText.slice(0, 4e3),
20694
20347
  ordinal: chunk.ordinal,
20695
20348
  contentHash: chunk.contentHash,
20696
- modelId: this.config.embeddings.model,
20697
20349
  depth: chunk.depth,
20698
20350
  incomingLinks: chunk.incomingLinks,
20699
20351
  routeFile: chunk.routeFile,
20700
- tags: chunk.tags,
20701
- description: chunk.description,
20702
- keywords: chunk.keywords
20352
+ description: chunk.description ?? "",
20353
+ keywords: (chunk.keywords ?? []).join(",")
20703
20354
  }
20704
- });
20705
- }
20706
- if (upserts.length > 0) {
20707
- await this.vectorStore.upsert(upserts, scope);
20708
- this.logger.event("upserted", { count: upserts.length });
20709
- }
20710
- if (deletes.length > 0) {
20711
- await this.vectorStore.deleteByIds(deletes, scope);
20712
- this.logger.event("deleted", { count: deletes.length });
20713
- }
20714
- }
20715
- stageEnd("sync", syncStart);
20716
- this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
20717
- const finalizeStart = stageStart();
20718
- if (!options.dryRun) {
20719
- const scopeInfo = {
20720
- projectId: scope.projectId,
20721
- scopeName: scope.scopeName,
20722
- modelId: this.config.embeddings.model,
20723
- lastIndexedAt: nowIso(),
20724
- vectorCount: chunks.length,
20725
- lastEstimateTokens: estimatedTokens,
20726
- lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
20727
- lastEstimateChangedChunks: changedChunks.length
20728
- };
20729
- await this.vectorStore.recordScope(scopeInfo);
20730
- this.logger.event("registry_updated", {
20731
- scope: scope.scopeName,
20732
- vectorCount: chunks.length
20355
+ };
20733
20356
  });
20357
+ await this.store.upsertChunks(docs, scope);
20358
+ documentsUpserted = docs.length;
20359
+ this.logger.event("upserted", { count: docs.length });
20360
+ }
20361
+ if (!options.dryRun && deletes.length > 0) {
20362
+ await this.store.deleteByIds(deletes, scope);
20363
+ this.logger.event("deleted", { count: deletes.length });
20364
+ }
20365
+ stageEnd("upsert", upsertStart);
20366
+ if (changedChunks.length > 0) {
20367
+ this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
20368
+ } else {
20369
+ this.logger.info("No chunks to upsert \u2014 all up to date");
20734
20370
  }
20735
- stageEnd("finalize", finalizeStart);
20736
20371
  this.logger.info("Done.");
20737
20372
  return {
20738
- pagesProcessed: mirrorPages.length,
20373
+ pagesProcessed: pages.length,
20739
20374
  chunksTotal: chunks.length,
20740
20375
  chunksChanged: changedChunks.length,
20741
- newEmbeddings,
20376
+ documentsUpserted,
20742
20377
  deletes: deletes.length,
20743
- estimatedTokens,
20744
- estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
20745
20378
  routeExact,
20746
20379
  routeBestEffort,
20747
20380
  stageTimingsMs
@@ -20772,30 +20405,11 @@ function shouldRunAutoIndex(options) {
20772
20405
  }
20773
20406
  return false;
20774
20407
  }
20775
- function searchsocketViteConfig() {
20776
- return {
20777
- name: "searchsocket:config",
20778
- config() {
20779
- return {
20780
- ssr: {
20781
- external: ["@libsql/client", "libsql"]
20782
- }
20783
- };
20784
- }
20785
- };
20786
- }
20787
20408
  function searchsocketVitePlugin(options = {}) {
20788
20409
  let executed = false;
20789
20410
  let running = false;
20790
20411
  return {
20791
20412
  name: "searchsocket:auto-index",
20792
- config() {
20793
- return {
20794
- ssr: {
20795
- external: ["@libsql/client", "libsql"]
20796
- }
20797
- };
20798
- },
20799
20413
  async closeBundle() {
20800
20414
  if (executed || running) {
20801
20415
  return;
@@ -20817,15 +20431,14 @@ function searchsocketVitePlugin(options = {}) {
20817
20431
  });
20818
20432
  const stats = await pipeline.run({
20819
20433
  changedOnly: options.changedOnly ?? true,
20820
- force: options.force ?? false,
20434
+ force: (options.force ?? false) || /^(1|true|yes)$/i.test(process.env.SEARCHSOCKET_FORCE_REINDEX ?? ""),
20821
20435
  dryRun: options.dryRun ?? false,
20822
20436
  scopeOverride: options.scope,
20823
20437
  verbose: options.verbose
20824
20438
  });
20825
20439
  logger3.info(
20826
- `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} embedded=${stats.newEmbeddings}`
20440
+ `[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
20827
20441
  );
20828
- logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
20829
20442
  executed = true;
20830
20443
  } finally {
20831
20444
  running = false;
@@ -20833,6 +20446,186 @@ function searchsocketVitePlugin(options = {}) {
20833
20446
  }
20834
20447
  };
20835
20448
  }
20449
+
20450
+ // src/sveltekit/scroll-to-text.ts
20451
+ var HIGHLIGHT_CLASS = "ssk-highlight";
20452
+ var HIGHLIGHT_DURATION = 2e3;
20453
+ var HIGHLIGHT_MARKER_ATTR = "data-ssk-highlight-marker";
20454
+ var HIGHLIGHT_NAME = "ssk-search-match";
20455
+ var styleInjected = false;
20456
+ function ensureHighlightStyle() {
20457
+ if (styleInjected || typeof document === "undefined") return;
20458
+ styleInjected = true;
20459
+ const style = document.createElement("style");
20460
+ style.textContent = `
20461
+ @keyframes ssk-highlight-fade {
20462
+ 0% { background-color: rgba(16, 185, 129, 0.18); }
20463
+ 100% { background-color: transparent; }
20464
+ }
20465
+ .${HIGHLIGHT_CLASS} {
20466
+ animation: ssk-highlight-fade ${HIGHLIGHT_DURATION}ms ease-out forwards;
20467
+ border-radius: 4px;
20468
+ }
20469
+ ::highlight(${HIGHLIGHT_NAME}) {
20470
+ background-color: rgba(16, 185, 129, 0.18);
20471
+ }
20472
+ `;
20473
+ document.head.appendChild(style);
20474
+ }
20475
+ var IGNORED_TAGS = /* @__PURE__ */ new Set(["SCRIPT", "STYLE", "NOSCRIPT", "TEMPLATE"]);
20476
+ function buildTextMap(root2) {
20477
+ const walker = document.createTreeWalker(root2, NodeFilter.SHOW_TEXT, {
20478
+ acceptNode(node) {
20479
+ const parent = node.parentElement;
20480
+ if (!parent || IGNORED_TAGS.has(parent.tagName)) return NodeFilter.FILTER_REJECT;
20481
+ return NodeFilter.FILTER_ACCEPT;
20482
+ }
20483
+ });
20484
+ const chunks = [];
20485
+ let text = "";
20486
+ let current;
20487
+ while (current = walker.nextNode()) {
20488
+ const value = current.nodeValue ?? "";
20489
+ if (!value) continue;
20490
+ chunks.push({ node: current, start: text.length, end: text.length + value.length });
20491
+ text += value;
20492
+ }
20493
+ return { text, chunks };
20494
+ }
20495
+ function normalize(text) {
20496
+ return text.toLowerCase().replace(/\s+/g, " ").trim();
20497
+ }
20498
+ function escapeRegExp(value) {
20499
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
20500
+ }
20501
+ function buildNeedleRegex(needle) {
20502
+ const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
20503
+ if (tokenParts.length > 1) {
20504
+ const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]+");
20505
+ return new RegExp(pattern, "iu");
20506
+ }
20507
+ if (tokenParts.length === 1) {
20508
+ return new RegExp(escapeRegExp(tokenParts[0]), "iu");
20509
+ }
20510
+ if (!needle) return null;
20511
+ return new RegExp(escapeRegExp(needle).replace(/\s+/g, "\\s+"), "i");
20512
+ }
20513
+ function buildLenientRegex(needle) {
20514
+ const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
20515
+ if (tokenParts.length <= 1) return null;
20516
+ const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]*");
20517
+ return new RegExp(pattern, "iu");
20518
+ }
20519
+ function findMatch(fullText, needle) {
20520
+ const regex = buildNeedleRegex(needle);
20521
+ if (regex) {
20522
+ const m = regex.exec(fullText);
20523
+ if (m && typeof m.index === "number") {
20524
+ return { start: m.index, end: m.index + m[0].length };
20525
+ }
20526
+ }
20527
+ const lenient = buildLenientRegex(needle);
20528
+ if (lenient) {
20529
+ const m = lenient.exec(fullText);
20530
+ if (m && typeof m.index === "number") {
20531
+ return { start: m.index, end: m.index + m[0].length };
20532
+ }
20533
+ }
20534
+ return null;
20535
+ }
20536
+ function resolveRange(map, offsets) {
20537
+ let startChunk;
20538
+ let endChunk;
20539
+ for (const chunk of map.chunks) {
20540
+ if (!startChunk && offsets.start >= chunk.start && offsets.start < chunk.end) {
20541
+ startChunk = chunk;
20542
+ }
20543
+ if (offsets.end > chunk.start && offsets.end <= chunk.end) {
20544
+ endChunk = chunk;
20545
+ }
20546
+ if (startChunk && endChunk) break;
20547
+ }
20548
+ if (!startChunk || !endChunk) return null;
20549
+ const range = document.createRange();
20550
+ range.setStart(startChunk.node, offsets.start - startChunk.start);
20551
+ range.setEnd(endChunk.node, offsets.end - endChunk.start);
20552
+ return range;
20553
+ }
20554
+ function hasCustomHighlightAPI() {
20555
+ return typeof CSS !== "undefined" && typeof CSS.highlights !== "undefined";
20556
+ }
20557
+ var highlightTimer = null;
20558
+ function highlightWithCSS(range) {
20559
+ ensureHighlightStyle();
20560
+ const hl = new globalThis.Highlight(range);
20561
+ CSS.highlights.set(HIGHLIGHT_NAME, hl);
20562
+ if (highlightTimer) clearTimeout(highlightTimer);
20563
+ highlightTimer = setTimeout(() => {
20564
+ CSS.highlights.delete(HIGHLIGHT_NAME);
20565
+ highlightTimer = null;
20566
+ }, HIGHLIGHT_DURATION);
20567
+ }
20568
+ function unwrapMarker(marker) {
20569
+ if (!marker.isConnected) return;
20570
+ const parent = marker.parentNode;
20571
+ if (!parent) return;
20572
+ while (marker.firstChild) parent.insertBefore(marker.firstChild, marker);
20573
+ parent.removeChild(marker);
20574
+ if (parent instanceof Element) parent.normalize();
20575
+ }
20576
+ function highlightWithDOM(range) {
20577
+ ensureHighlightStyle();
20578
+ try {
20579
+ const marker = document.createElement("span");
20580
+ marker.classList.add(HIGHLIGHT_CLASS);
20581
+ marker.setAttribute(HIGHLIGHT_MARKER_ATTR, "true");
20582
+ range.surroundContents(marker);
20583
+ setTimeout(() => unwrapMarker(marker), HIGHLIGHT_DURATION);
20584
+ return marker;
20585
+ } catch {
20586
+ const ancestor = range.commonAncestorContainer;
20587
+ const el = ancestor instanceof Element ? ancestor : ancestor.parentElement;
20588
+ if (el) {
20589
+ el.classList.add(HIGHLIGHT_CLASS);
20590
+ setTimeout(() => el.classList.remove(HIGHLIGHT_CLASS), HIGHLIGHT_DURATION);
20591
+ return el;
20592
+ }
20593
+ return document.body;
20594
+ }
20595
+ }
20596
+ function scrollToRange(range) {
20597
+ const rect = range.getBoundingClientRect();
20598
+ window.scrollTo({
20599
+ top: window.scrollY + rect.top - window.innerHeight / 3,
20600
+ behavior: "smooth"
20601
+ });
20602
+ }
20603
+ function scrollIntoViewIfPossible(el) {
20604
+ if (typeof el.scrollIntoView === "function") {
20605
+ el.scrollIntoView({ behavior: "smooth", block: "start" });
20606
+ }
20607
+ }
20608
+ function searchsocketScrollToText(navigation) {
20609
+ if (typeof document === "undefined") return;
20610
+ const params = navigation.to?.url.searchParams;
20611
+ const raw = params?.get("_sskt") ?? params?.get("_ssk");
20612
+ if (!raw) return;
20613
+ const needle = normalize(raw);
20614
+ if (!needle) return;
20615
+ const map = buildTextMap(document.body);
20616
+ const offsets = findMatch(map.text, needle);
20617
+ if (!offsets) return;
20618
+ const range = resolveRange(map, offsets);
20619
+ if (!range) return;
20620
+ if (hasCustomHighlightAPI()) {
20621
+ highlightWithCSS(range);
20622
+ scrollToRange(range);
20623
+ } else {
20624
+ const marker = highlightWithDOM(range);
20625
+ const target = typeof marker.scrollIntoView === "function" ? marker : marker.parentElement;
20626
+ if (target) scrollIntoViewIfPossible(target);
20627
+ }
20628
+ }
20836
20629
  /*! Bundled license information:
20837
20630
 
20838
20631
  @mixmark-io/domino/lib/style_parser.js:
@@ -20846,7 +20639,7 @@ function searchsocketVitePlugin(options = {}) {
20846
20639
  */
20847
20640
 
20848
20641
  exports.searchsocketHandle = searchsocketHandle;
20849
- exports.searchsocketViteConfig = searchsocketViteConfig;
20642
+ exports.searchsocketScrollToText = searchsocketScrollToText;
20850
20643
  exports.searchsocketVitePlugin = searchsocketVitePlugin;
20851
20644
  //# sourceMappingURL=sveltekit.cjs.map
20852
20645
  //# sourceMappingURL=sveltekit.cjs.map