searchsocket 0.3.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -39
- package/dist/cli.js +947 -1378
- package/dist/client.cjs +45 -0
- package/dist/client.d.cts +3 -2
- package/dist/client.d.ts +3 -2
- package/dist/client.js +45 -1
- package/dist/index.cjs +909 -1286
- package/dist/index.d.cts +73 -33
- package/dist/index.d.ts +73 -33
- package/dist/index.js +906 -1281
- package/dist/plugin-B_npJSux.d.cts +36 -0
- package/dist/plugin-M-aW0ev6.d.ts +36 -0
- package/dist/scroll.cjs +185 -0
- package/dist/scroll.d.cts +42 -0
- package/dist/scroll.d.ts +42 -0
- package/dist/scroll.js +183 -0
- package/dist/sveltekit.cjs +997 -1204
- package/dist/sveltekit.d.cts +3 -43
- package/dist/sveltekit.d.ts +3 -43
- package/dist/sveltekit.js +995 -1202
- package/dist/{types-BrG6XTUU.d.cts → types-Dk43uz25.d.cts} +50 -109
- package/dist/{types-BrG6XTUU.d.ts → types-Dk43uz25.d.ts} +50 -109
- package/package.json +10 -3
package/dist/sveltekit.cjs
CHANGED
|
@@ -4,13 +4,13 @@ var fs = require('fs');
|
|
|
4
4
|
var path = require('path');
|
|
5
5
|
var jiti = require('jiti');
|
|
6
6
|
var zod = require('zod');
|
|
7
|
-
var pLimit2 = require('p-limit');
|
|
8
7
|
var child_process = require('child_process');
|
|
9
8
|
var crypto = require('crypto');
|
|
10
9
|
var cheerio = require('cheerio');
|
|
11
10
|
var matter = require('gray-matter');
|
|
12
|
-
var fs4 = require('fs/promises');
|
|
13
11
|
var fg = require('fast-glob');
|
|
12
|
+
var pLimit = require('p-limit');
|
|
13
|
+
var fs3 = require('fs/promises');
|
|
14
14
|
var net = require('net');
|
|
15
15
|
var zlib = require('zlib');
|
|
16
16
|
|
|
@@ -18,10 +18,10 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
|
18
18
|
|
|
19
19
|
var fs__default = /*#__PURE__*/_interopDefault(fs);
|
|
20
20
|
var path__default = /*#__PURE__*/_interopDefault(path);
|
|
21
|
-
var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
|
|
22
21
|
var matter__default = /*#__PURE__*/_interopDefault(matter);
|
|
23
|
-
var fs4__default = /*#__PURE__*/_interopDefault(fs4);
|
|
24
22
|
var fg__default = /*#__PURE__*/_interopDefault(fg);
|
|
23
|
+
var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
|
|
24
|
+
var fs3__default = /*#__PURE__*/_interopDefault(fs3);
|
|
25
25
|
var net__default = /*#__PURE__*/_interopDefault(net);
|
|
26
26
|
|
|
27
27
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
@@ -2767,12 +2767,12 @@ var require_ChildNode = __commonJS({
|
|
|
2767
2767
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
|
|
2768
2768
|
var Node2 = require_Node();
|
|
2769
2769
|
var LinkedList = require_LinkedList();
|
|
2770
|
-
var createDocumentFragmentFromArguments = function(
|
|
2771
|
-
var docFrag =
|
|
2770
|
+
var createDocumentFragmentFromArguments = function(document2, args) {
|
|
2771
|
+
var docFrag = document2.createDocumentFragment();
|
|
2772
2772
|
for (var i = 0; i < args.length; i++) {
|
|
2773
2773
|
var argItem = args[i];
|
|
2774
2774
|
var isNode = argItem instanceof Node2;
|
|
2775
|
-
docFrag.appendChild(isNode ? argItem :
|
|
2775
|
+
docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
|
|
2776
2776
|
}
|
|
2777
2777
|
return docFrag;
|
|
2778
2778
|
};
|
|
@@ -2930,7 +2930,7 @@ var require_NamedNodeMap = __commonJS({
|
|
|
2930
2930
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
|
|
2931
2931
|
var require_Element = __commonJS({
|
|
2932
2932
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
|
|
2933
|
-
module.exports =
|
|
2933
|
+
module.exports = Element2;
|
|
2934
2934
|
var xml = require_xmlnames();
|
|
2935
2935
|
var utils = require_utils();
|
|
2936
2936
|
var NAMESPACE = utils.NAMESPACE;
|
|
@@ -2947,7 +2947,7 @@ var require_Element = __commonJS({
|
|
|
2947
2947
|
var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
|
|
2948
2948
|
var NamedNodeMap = require_NamedNodeMap();
|
|
2949
2949
|
var uppercaseCache = /* @__PURE__ */ Object.create(null);
|
|
2950
|
-
function
|
|
2950
|
+
function Element2(doc, localName, namespaceURI, prefix) {
|
|
2951
2951
|
ContainerNode.call(this);
|
|
2952
2952
|
this.nodeType = Node2.ELEMENT_NODE;
|
|
2953
2953
|
this.ownerDocument = doc;
|
|
@@ -2967,7 +2967,7 @@ var require_Element = __commonJS({
|
|
|
2967
2967
|
recursiveGetText(node.childNodes[i], a);
|
|
2968
2968
|
}
|
|
2969
2969
|
}
|
|
2970
|
-
|
|
2970
|
+
Element2.prototype = Object.create(ContainerNode.prototype, {
|
|
2971
2971
|
isHTML: { get: function isHTML() {
|
|
2972
2972
|
return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
|
|
2973
2973
|
} },
|
|
@@ -3037,7 +3037,7 @@ var require_Element = __commonJS({
|
|
|
3037
3037
|
return NodeUtils.serializeOne(this, { nodeType: 0 });
|
|
3038
3038
|
},
|
|
3039
3039
|
set: function(v) {
|
|
3040
|
-
var
|
|
3040
|
+
var document2 = this.ownerDocument;
|
|
3041
3041
|
var parent = this.parentNode;
|
|
3042
3042
|
if (parent === null) {
|
|
3043
3043
|
return;
|
|
@@ -3048,8 +3048,8 @@ var require_Element = __commonJS({
|
|
|
3048
3048
|
if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
|
|
3049
3049
|
parent = parent.ownerDocument.createElement("body");
|
|
3050
3050
|
}
|
|
3051
|
-
var parser =
|
|
3052
|
-
|
|
3051
|
+
var parser = document2.implementation.mozHTMLParser(
|
|
3052
|
+
document2._address,
|
|
3053
3053
|
parent
|
|
3054
3054
|
);
|
|
3055
3055
|
parser.parse(v === null ? "" : String(v), true);
|
|
@@ -3108,7 +3108,7 @@ var require_Element = __commonJS({
|
|
|
3108
3108
|
default:
|
|
3109
3109
|
utils.SyntaxError();
|
|
3110
3110
|
}
|
|
3111
|
-
if (!(context instanceof
|
|
3111
|
+
if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
|
|
3112
3112
|
context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
|
|
3113
3113
|
}
|
|
3114
3114
|
var parser = this.ownerDocument.implementation.mozHTMLParser(
|
|
@@ -3716,10 +3716,10 @@ var require_Element = __commonJS({
|
|
|
3716
3716
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
3717
3717
|
} }
|
|
3718
3718
|
});
|
|
3719
|
-
Object.defineProperties(
|
|
3720
|
-
Object.defineProperties(
|
|
3719
|
+
Object.defineProperties(Element2.prototype, ChildNode);
|
|
3720
|
+
Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
|
|
3721
3721
|
attributes.registerChangeHandler(
|
|
3722
|
-
|
|
3722
|
+
Element2,
|
|
3723
3723
|
"id",
|
|
3724
3724
|
function(element, lname, oldval, newval) {
|
|
3725
3725
|
if (element.rooted) {
|
|
@@ -3733,7 +3733,7 @@ var require_Element = __commonJS({
|
|
|
3733
3733
|
}
|
|
3734
3734
|
);
|
|
3735
3735
|
attributes.registerChangeHandler(
|
|
3736
|
-
|
|
3736
|
+
Element2,
|
|
3737
3737
|
"class",
|
|
3738
3738
|
function(element, lname, oldval, newval) {
|
|
3739
3739
|
if (element._classList) {
|
|
@@ -3832,7 +3832,7 @@ var require_Element = __commonJS({
|
|
|
3832
3832
|
}
|
|
3833
3833
|
}
|
|
3834
3834
|
});
|
|
3835
|
-
|
|
3835
|
+
Element2._Attr = Attr;
|
|
3836
3836
|
function AttributesArray(elt) {
|
|
3837
3837
|
NamedNodeMap.call(this, elt);
|
|
3838
3838
|
for (var name in elt._attrsByQName) {
|
|
@@ -4234,7 +4234,7 @@ var require_DocumentFragment = __commonJS({
|
|
|
4234
4234
|
var Node2 = require_Node();
|
|
4235
4235
|
var NodeList = require_NodeList();
|
|
4236
4236
|
var ContainerNode = require_ContainerNode();
|
|
4237
|
-
var
|
|
4237
|
+
var Element2 = require_Element();
|
|
4238
4238
|
var select = require_select();
|
|
4239
4239
|
var utils = require_utils();
|
|
4240
4240
|
function DocumentFragment(doc) {
|
|
@@ -4252,9 +4252,9 @@ var require_DocumentFragment = __commonJS({
|
|
|
4252
4252
|
}
|
|
4253
4253
|
},
|
|
4254
4254
|
// Copy the text content getter/setter from Element
|
|
4255
|
-
textContent: Object.getOwnPropertyDescriptor(
|
|
4255
|
+
textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
|
|
4256
4256
|
// Copy the text content getter/setter from Element
|
|
4257
|
-
innerText: Object.getOwnPropertyDescriptor(
|
|
4257
|
+
innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
|
|
4258
4258
|
querySelector: { value: function(selector) {
|
|
4259
4259
|
var nodes = this.querySelectorAll(selector);
|
|
4260
4260
|
return nodes.length ? nodes[0] : null;
|
|
@@ -4262,8 +4262,8 @@ var require_DocumentFragment = __commonJS({
|
|
|
4262
4262
|
querySelectorAll: { value: function(selector) {
|
|
4263
4263
|
var context = Object.create(this);
|
|
4264
4264
|
context.isHTML = true;
|
|
4265
|
-
context.getElementsByTagName =
|
|
4266
|
-
context.nextElement = Object.getOwnPropertyDescriptor(
|
|
4265
|
+
context.getElementsByTagName = Element2.prototype.getElementsByTagName;
|
|
4266
|
+
context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
|
|
4267
4267
|
var nodes = select(selector, context);
|
|
4268
4268
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
4269
4269
|
} },
|
|
@@ -4345,7 +4345,7 @@ var require_ProcessingInstruction = __commonJS({
|
|
|
4345
4345
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
|
|
4346
4346
|
var require_NodeFilter = __commonJS({
|
|
4347
4347
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
|
|
4348
|
-
var
|
|
4348
|
+
var NodeFilter2 = {
|
|
4349
4349
|
// Constants for acceptNode()
|
|
4350
4350
|
FILTER_ACCEPT: 1,
|
|
4351
4351
|
FILTER_REJECT: 2,
|
|
@@ -4370,7 +4370,7 @@ var require_NodeFilter = __commonJS({
|
|
|
4370
4370
|
SHOW_NOTATION: 2048
|
|
4371
4371
|
// historical
|
|
4372
4372
|
};
|
|
4373
|
-
module.exports =
|
|
4373
|
+
module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
|
|
4374
4374
|
}
|
|
4375
4375
|
});
|
|
4376
4376
|
|
|
@@ -4445,7 +4445,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4445
4445
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
|
|
4446
4446
|
module.exports = TreeWalker;
|
|
4447
4447
|
var Node2 = require_Node();
|
|
4448
|
-
var
|
|
4448
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4449
4449
|
var NodeTraversal = require_NodeTraversal();
|
|
4450
4450
|
var utils = require_utils();
|
|
4451
4451
|
var mapChild = {
|
|
@@ -4465,11 +4465,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4465
4465
|
node = tw._currentNode[mapChild[type]];
|
|
4466
4466
|
while (node !== null) {
|
|
4467
4467
|
result = tw._internalFilter(node);
|
|
4468
|
-
if (result ===
|
|
4468
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4469
4469
|
tw._currentNode = node;
|
|
4470
4470
|
return node;
|
|
4471
4471
|
}
|
|
4472
|
-
if (result ===
|
|
4472
|
+
if (result === NodeFilter2.FILTER_SKIP) {
|
|
4473
4473
|
child = node[mapChild[type]];
|
|
4474
4474
|
if (child !== null) {
|
|
4475
4475
|
node = child;
|
|
@@ -4503,12 +4503,12 @@ var require_TreeWalker = __commonJS({
|
|
|
4503
4503
|
while (sibling !== null) {
|
|
4504
4504
|
node = sibling;
|
|
4505
4505
|
result = tw._internalFilter(node);
|
|
4506
|
-
if (result ===
|
|
4506
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4507
4507
|
tw._currentNode = node;
|
|
4508
4508
|
return node;
|
|
4509
4509
|
}
|
|
4510
4510
|
sibling = node[mapChild[type]];
|
|
4511
|
-
if (result ===
|
|
4511
|
+
if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
|
|
4512
4512
|
sibling = node[mapSibling[type]];
|
|
4513
4513
|
}
|
|
4514
4514
|
}
|
|
@@ -4516,7 +4516,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4516
4516
|
if (node === null || node === tw.root) {
|
|
4517
4517
|
return null;
|
|
4518
4518
|
}
|
|
4519
|
-
if (tw._internalFilter(node) ===
|
|
4519
|
+
if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4520
4520
|
return null;
|
|
4521
4521
|
}
|
|
4522
4522
|
}
|
|
@@ -4564,11 +4564,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4564
4564
|
utils.InvalidStateError();
|
|
4565
4565
|
}
|
|
4566
4566
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4567
|
-
return
|
|
4567
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4568
4568
|
}
|
|
4569
4569
|
filter = this._filter;
|
|
4570
4570
|
if (filter === null) {
|
|
4571
|
-
result =
|
|
4571
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4572
4572
|
} else {
|
|
4573
4573
|
this._active = true;
|
|
4574
4574
|
try {
|
|
@@ -4597,7 +4597,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4597
4597
|
if (node === null) {
|
|
4598
4598
|
return null;
|
|
4599
4599
|
}
|
|
4600
|
-
if (this._internalFilter(node) ===
|
|
4600
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4601
4601
|
this._currentNode = node;
|
|
4602
4602
|
return node;
|
|
4603
4603
|
}
|
|
@@ -4650,17 +4650,17 @@ var require_TreeWalker = __commonJS({
|
|
|
4650
4650
|
for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
|
|
4651
4651
|
node = previousSibling;
|
|
4652
4652
|
result = this._internalFilter(node);
|
|
4653
|
-
if (result ===
|
|
4653
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4654
4654
|
continue;
|
|
4655
4655
|
}
|
|
4656
4656
|
for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
|
|
4657
4657
|
node = lastChild;
|
|
4658
4658
|
result = this._internalFilter(node);
|
|
4659
|
-
if (result ===
|
|
4659
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4660
4660
|
break;
|
|
4661
4661
|
}
|
|
4662
4662
|
}
|
|
4663
|
-
if (result ===
|
|
4663
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4664
4664
|
this._currentNode = node;
|
|
4665
4665
|
return node;
|
|
4666
4666
|
}
|
|
@@ -4669,7 +4669,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4669
4669
|
return null;
|
|
4670
4670
|
}
|
|
4671
4671
|
node = node.parentNode;
|
|
4672
|
-
if (this._internalFilter(node) ===
|
|
4672
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4673
4673
|
this._currentNode = node;
|
|
4674
4674
|
return node;
|
|
4675
4675
|
}
|
|
@@ -4686,26 +4686,26 @@ var require_TreeWalker = __commonJS({
|
|
|
4686
4686
|
nextNode: { value: function nextNode() {
|
|
4687
4687
|
var node, result, firstChild, nextSibling;
|
|
4688
4688
|
node = this._currentNode;
|
|
4689
|
-
result =
|
|
4689
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4690
4690
|
CHILDREN:
|
|
4691
4691
|
while (true) {
|
|
4692
4692
|
for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
|
|
4693
4693
|
node = firstChild;
|
|
4694
4694
|
result = this._internalFilter(node);
|
|
4695
|
-
if (result ===
|
|
4695
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4696
4696
|
this._currentNode = node;
|
|
4697
4697
|
return node;
|
|
4698
|
-
} else if (result ===
|
|
4698
|
+
} else if (result === NodeFilter2.FILTER_REJECT) {
|
|
4699
4699
|
break;
|
|
4700
4700
|
}
|
|
4701
4701
|
}
|
|
4702
4702
|
for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
|
|
4703
4703
|
node = nextSibling;
|
|
4704
4704
|
result = this._internalFilter(node);
|
|
4705
|
-
if (result ===
|
|
4705
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4706
4706
|
this._currentNode = node;
|
|
4707
4707
|
return node;
|
|
4708
|
-
} else if (result ===
|
|
4708
|
+
} else if (result === NodeFilter2.FILTER_SKIP) {
|
|
4709
4709
|
continue CHILDREN;
|
|
4710
4710
|
}
|
|
4711
4711
|
}
|
|
@@ -4724,7 +4724,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4724
4724
|
var require_NodeIterator = __commonJS({
|
|
4725
4725
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
|
|
4726
4726
|
module.exports = NodeIterator;
|
|
4727
|
-
var
|
|
4727
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4728
4728
|
var NodeTraversal = require_NodeTraversal();
|
|
4729
4729
|
var utils = require_utils();
|
|
4730
4730
|
function move(node, stayWithin, directionIsNext) {
|
|
@@ -4759,7 +4759,7 @@ var require_NodeIterator = __commonJS({
|
|
|
4759
4759
|
}
|
|
4760
4760
|
}
|
|
4761
4761
|
var result = ni._internalFilter(node);
|
|
4762
|
-
if (result ===
|
|
4762
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4763
4763
|
break;
|
|
4764
4764
|
}
|
|
4765
4765
|
}
|
|
@@ -4807,11 +4807,11 @@ var require_NodeIterator = __commonJS({
|
|
|
4807
4807
|
utils.InvalidStateError();
|
|
4808
4808
|
}
|
|
4809
4809
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4810
|
-
return
|
|
4810
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4811
4811
|
}
|
|
4812
4812
|
filter = this._filter;
|
|
4813
4813
|
if (filter === null) {
|
|
4814
|
-
result =
|
|
4814
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4815
4815
|
} else {
|
|
4816
4816
|
this._active = true;
|
|
4817
4817
|
try {
|
|
@@ -5021,32 +5021,32 @@ var require_URL = __commonJS({
|
|
|
5021
5021
|
else
|
|
5022
5022
|
return basepath.substring(0, lastslash + 1) + refpath;
|
|
5023
5023
|
}
|
|
5024
|
-
function remove_dot_segments(
|
|
5025
|
-
if (!
|
|
5024
|
+
function remove_dot_segments(path13) {
|
|
5025
|
+
if (!path13) return path13;
|
|
5026
5026
|
var output = "";
|
|
5027
|
-
while (
|
|
5028
|
-
if (
|
|
5029
|
-
|
|
5027
|
+
while (path13.length > 0) {
|
|
5028
|
+
if (path13 === "." || path13 === "..") {
|
|
5029
|
+
path13 = "";
|
|
5030
5030
|
break;
|
|
5031
5031
|
}
|
|
5032
|
-
var twochars =
|
|
5033
|
-
var threechars =
|
|
5034
|
-
var fourchars =
|
|
5032
|
+
var twochars = path13.substring(0, 2);
|
|
5033
|
+
var threechars = path13.substring(0, 3);
|
|
5034
|
+
var fourchars = path13.substring(0, 4);
|
|
5035
5035
|
if (threechars === "../") {
|
|
5036
|
-
|
|
5036
|
+
path13 = path13.substring(3);
|
|
5037
5037
|
} else if (twochars === "./") {
|
|
5038
|
-
|
|
5038
|
+
path13 = path13.substring(2);
|
|
5039
5039
|
} else if (threechars === "/./") {
|
|
5040
|
-
|
|
5041
|
-
} else if (twochars === "/." &&
|
|
5042
|
-
|
|
5043
|
-
} else if (fourchars === "/../" || threechars === "/.." &&
|
|
5044
|
-
|
|
5040
|
+
path13 = "/" + path13.substring(3);
|
|
5041
|
+
} else if (twochars === "/." && path13.length === 2) {
|
|
5042
|
+
path13 = "/";
|
|
5043
|
+
} else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
|
|
5044
|
+
path13 = "/" + path13.substring(4);
|
|
5045
5045
|
output = output.replace(/\/?[^\/]*$/, "");
|
|
5046
5046
|
} else {
|
|
5047
|
-
var segment =
|
|
5047
|
+
var segment = path13.match(/(\/?([^\/]*))/)[0];
|
|
5048
5048
|
output += segment;
|
|
5049
|
-
|
|
5049
|
+
path13 = path13.substring(segment.length);
|
|
5050
5050
|
}
|
|
5051
5051
|
}
|
|
5052
5052
|
return output;
|
|
@@ -5611,9 +5611,9 @@ var require_defineElement = __commonJS({
|
|
|
5611
5611
|
});
|
|
5612
5612
|
return c;
|
|
5613
5613
|
};
|
|
5614
|
-
function EventHandlerBuilder(body,
|
|
5614
|
+
function EventHandlerBuilder(body, document2, form, element) {
|
|
5615
5615
|
this.body = body;
|
|
5616
|
-
this.document =
|
|
5616
|
+
this.document = document2;
|
|
5617
5617
|
this.form = form;
|
|
5618
5618
|
this.element = element;
|
|
5619
5619
|
}
|
|
@@ -5647,7 +5647,7 @@ var require_defineElement = __commonJS({
|
|
|
5647
5647
|
var require_htmlelts = __commonJS({
|
|
5648
5648
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
|
|
5649
5649
|
var Node2 = require_Node();
|
|
5650
|
-
var
|
|
5650
|
+
var Element2 = require_Element();
|
|
5651
5651
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
5652
5652
|
var utils = require_utils();
|
|
5653
5653
|
var URLUtils = require_URLUtils();
|
|
@@ -5715,10 +5715,10 @@ var require_htmlelts = __commonJS({
|
|
|
5715
5715
|
this._form = null;
|
|
5716
5716
|
};
|
|
5717
5717
|
var HTMLElement = exports$1.HTMLElement = define({
|
|
5718
|
-
superclass:
|
|
5718
|
+
superclass: Element2,
|
|
5719
5719
|
name: "HTMLElement",
|
|
5720
5720
|
ctor: function HTMLElement2(doc, localName, prefix) {
|
|
5721
|
-
|
|
5721
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
|
|
5722
5722
|
},
|
|
5723
5723
|
props: {
|
|
5724
5724
|
dangerouslySetInnerHTML: {
|
|
@@ -7200,7 +7200,7 @@ var require_htmlelts = __commonJS({
|
|
|
7200
7200
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
|
|
7201
7201
|
var require_svg = __commonJS({
|
|
7202
7202
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
|
|
7203
|
-
var
|
|
7203
|
+
var Element2 = require_Element();
|
|
7204
7204
|
var defineElement = require_defineElement();
|
|
7205
7205
|
var utils = require_utils();
|
|
7206
7206
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
@@ -7214,10 +7214,10 @@ var require_svg = __commonJS({
|
|
|
7214
7214
|
return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
|
|
7215
7215
|
}
|
|
7216
7216
|
var SVGElement = define({
|
|
7217
|
-
superclass:
|
|
7217
|
+
superclass: Element2,
|
|
7218
7218
|
name: "SVGElement",
|
|
7219
7219
|
ctor: function SVGElement2(doc, localName, prefix) {
|
|
7220
|
-
|
|
7220
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
|
|
7221
7221
|
},
|
|
7222
7222
|
props: {
|
|
7223
7223
|
style: { get: function() {
|
|
@@ -7352,7 +7352,7 @@ var require_Document = __commonJS({
|
|
|
7352
7352
|
var Node2 = require_Node();
|
|
7353
7353
|
var NodeList = require_NodeList();
|
|
7354
7354
|
var ContainerNode = require_ContainerNode();
|
|
7355
|
-
var
|
|
7355
|
+
var Element2 = require_Element();
|
|
7356
7356
|
var Text = require_Text();
|
|
7357
7357
|
var Comment = require_Comment();
|
|
7358
7358
|
var Event = require_Event();
|
|
@@ -7361,7 +7361,7 @@ var require_Document = __commonJS({
|
|
|
7361
7361
|
var DOMImplementation = require_DOMImplementation();
|
|
7362
7362
|
var TreeWalker = require_TreeWalker();
|
|
7363
7363
|
var NodeIterator = require_NodeIterator();
|
|
7364
|
-
var
|
|
7364
|
+
var NodeFilter2 = require_NodeFilter();
|
|
7365
7365
|
var URL2 = require_URL();
|
|
7366
7366
|
var select = require_select();
|
|
7367
7367
|
var events = require_events();
|
|
@@ -7500,13 +7500,13 @@ var require_Document = __commonJS({
|
|
|
7500
7500
|
if (this.isHTML) {
|
|
7501
7501
|
localName = utils.toASCIILowerCase(localName);
|
|
7502
7502
|
}
|
|
7503
|
-
return new
|
|
7503
|
+
return new Element2._Attr(null, localName, null, null, "");
|
|
7504
7504
|
} },
|
|
7505
7505
|
createAttributeNS: { value: function(namespace, qualifiedName) {
|
|
7506
7506
|
namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
|
|
7507
7507
|
qualifiedName = String(qualifiedName);
|
|
7508
7508
|
var ve = validateAndExtract(namespace, qualifiedName);
|
|
7509
|
-
return new
|
|
7509
|
+
return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
|
|
7510
7510
|
} },
|
|
7511
7511
|
createElement: { value: function(localName) {
|
|
7512
7512
|
localName = String(localName);
|
|
@@ -7518,7 +7518,7 @@ var require_Document = __commonJS({
|
|
|
7518
7518
|
} else if (this.contentType === "application/xhtml+xml") {
|
|
7519
7519
|
return html.createElement(this, localName, null);
|
|
7520
7520
|
} else {
|
|
7521
|
-
return new
|
|
7521
|
+
return new Element2(this, localName, null, null);
|
|
7522
7522
|
}
|
|
7523
7523
|
}, writable: isApiWritable },
|
|
7524
7524
|
createElementNS: { value: function(namespace, qualifiedName) {
|
|
@@ -7535,7 +7535,7 @@ var require_Document = __commonJS({
|
|
|
7535
7535
|
} else if (namespace === NAMESPACE.SVG) {
|
|
7536
7536
|
return svg.createElement(this, localName, prefix);
|
|
7537
7537
|
}
|
|
7538
|
-
return new
|
|
7538
|
+
return new Element2(this, localName, namespace, prefix);
|
|
7539
7539
|
} },
|
|
7540
7540
|
createEvent: { value: function createEvent(interfaceName) {
|
|
7541
7541
|
interfaceName = interfaceName.toLowerCase();
|
|
@@ -7557,7 +7557,7 @@ var require_Document = __commonJS({
|
|
|
7557
7557
|
if (!(root3 instanceof Node2)) {
|
|
7558
7558
|
throw new TypeError("root not a node");
|
|
7559
7559
|
}
|
|
7560
|
-
whatToShow = whatToShow === void 0 ?
|
|
7560
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7561
7561
|
filter = filter === void 0 ? null : filter;
|
|
7562
7562
|
return new TreeWalker(root3, whatToShow, filter);
|
|
7563
7563
|
} },
|
|
@@ -7569,7 +7569,7 @@ var require_Document = __commonJS({
|
|
|
7569
7569
|
if (!(root3 instanceof Node2)) {
|
|
7570
7570
|
throw new TypeError("root not a node");
|
|
7571
7571
|
}
|
|
7572
|
-
whatToShow = whatToShow === void 0 ?
|
|
7572
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7573
7573
|
filter = filter === void 0 ? null : filter;
|
|
7574
7574
|
return new NodeIterator(root3, whatToShow, filter);
|
|
7575
7575
|
} },
|
|
@@ -7630,10 +7630,10 @@ var require_Document = __commonJS({
|
|
|
7630
7630
|
return this.byId[id] instanceof MultiId;
|
|
7631
7631
|
} },
|
|
7632
7632
|
// Just copy this method from the Element prototype
|
|
7633
|
-
getElementsByName: { value:
|
|
7634
|
-
getElementsByTagName: { value:
|
|
7635
|
-
getElementsByTagNameNS: { value:
|
|
7636
|
-
getElementsByClassName: { value:
|
|
7633
|
+
getElementsByName: { value: Element2.prototype.getElementsByName },
|
|
7634
|
+
getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
|
|
7635
|
+
getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
|
|
7636
|
+
getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
|
|
7637
7637
|
adoptNode: { value: function adoptNode(node) {
|
|
7638
7638
|
if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
|
|
7639
7639
|
if (node.nodeType === Node2.ATTRIBUTE_NODE) {
|
|
@@ -16459,8 +16459,8 @@ var require_Window = __commonJS({
|
|
|
16459
16459
|
var Location = require_Location();
|
|
16460
16460
|
var utils = require_utils();
|
|
16461
16461
|
module.exports = Window;
|
|
16462
|
-
function Window(
|
|
16463
|
-
this.document =
|
|
16462
|
+
function Window(document2) {
|
|
16463
|
+
this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
|
|
16464
16464
|
this.document._scripting_enabled = true;
|
|
16465
16465
|
this.document.defaultView = this;
|
|
16466
16466
|
this.location = new Location(this, this.document._address || "about:blank");
|
|
@@ -16590,11 +16590,11 @@ var require_lib = __commonJS({
|
|
|
16590
16590
|
};
|
|
16591
16591
|
};
|
|
16592
16592
|
exports$1.createWindow = function(html, address) {
|
|
16593
|
-
var
|
|
16593
|
+
var document2 = exports$1.createDocument(html);
|
|
16594
16594
|
if (address !== void 0) {
|
|
16595
|
-
|
|
16595
|
+
document2._address = address;
|
|
16596
16596
|
}
|
|
16597
|
-
return new impl.Window(
|
|
16597
|
+
return new impl.Window(document2);
|
|
16598
16598
|
};
|
|
16599
16599
|
exports$1.impl = impl;
|
|
16600
16600
|
}
|
|
@@ -16610,6 +16610,8 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16610
16610
|
envVar: zod.z.string().min(1).optional(),
|
|
16611
16611
|
sanitize: zod.z.boolean().optional()
|
|
16612
16612
|
}).optional(),
|
|
16613
|
+
exclude: zod.z.array(zod.z.string()).optional(),
|
|
16614
|
+
respectRobotsTxt: zod.z.boolean().optional(),
|
|
16613
16615
|
source: zod.z.object({
|
|
16614
16616
|
mode: zod.z.enum(["static-output", "crawl", "content-files", "build"]).optional(),
|
|
16615
16617
|
staticOutputDir: zod.z.string().min(1).optional(),
|
|
@@ -16657,29 +16659,18 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16657
16659
|
prependTitle: zod.z.boolean().optional(),
|
|
16658
16660
|
pageSummaryChunk: zod.z.boolean().optional()
|
|
16659
16661
|
}).optional(),
|
|
16660
|
-
|
|
16661
|
-
|
|
16662
|
-
|
|
16663
|
-
|
|
16664
|
-
|
|
16665
|
-
batchSize: zod.z.number().int().positive().optional(),
|
|
16666
|
-
concurrency: zod.z.number().int().positive().optional(),
|
|
16667
|
-
pricePer1kTokens: zod.z.number().positive().optional()
|
|
16662
|
+
upstash: zod.z.object({
|
|
16663
|
+
url: zod.z.string().url().optional(),
|
|
16664
|
+
token: zod.z.string().min(1).optional(),
|
|
16665
|
+
urlEnv: zod.z.string().min(1).optional(),
|
|
16666
|
+
tokenEnv: zod.z.string().min(1).optional()
|
|
16668
16667
|
}).optional(),
|
|
16669
|
-
|
|
16670
|
-
|
|
16671
|
-
|
|
16672
|
-
|
|
16673
|
-
|
|
16674
|
-
|
|
16675
|
-
authTokenEnv: zod.z.string().optional(),
|
|
16676
|
-
localPath: zod.z.string().optional()
|
|
16677
|
-
}).optional()
|
|
16678
|
-
}).optional(),
|
|
16679
|
-
rerank: zod.z.object({
|
|
16680
|
-
enabled: zod.z.boolean().optional(),
|
|
16681
|
-
topN: zod.z.number().int().positive().optional(),
|
|
16682
|
-
model: zod.z.string().optional()
|
|
16668
|
+
search: zod.z.object({
|
|
16669
|
+
semanticWeight: zod.z.number().min(0).max(1).optional(),
|
|
16670
|
+
inputEnrichment: zod.z.boolean().optional(),
|
|
16671
|
+
reranking: zod.z.boolean().optional(),
|
|
16672
|
+
dualSearch: zod.z.boolean().optional(),
|
|
16673
|
+
pageSearchWeight: zod.z.number().min(0).max(1).optional()
|
|
16683
16674
|
}).optional(),
|
|
16684
16675
|
ranking: zod.z.object({
|
|
16685
16676
|
enableIncomingLinkBoost: zod.z.boolean().optional(),
|
|
@@ -16689,11 +16680,12 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16689
16680
|
aggregationDecay: zod.z.number().min(0).max(1).optional(),
|
|
16690
16681
|
minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
|
|
16691
16682
|
minScore: zod.z.number().min(0).max(1).optional(),
|
|
16683
|
+
scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
|
|
16692
16684
|
weights: zod.z.object({
|
|
16693
16685
|
incomingLinks: zod.z.number().optional(),
|
|
16694
16686
|
depth: zod.z.number().optional(),
|
|
16695
|
-
|
|
16696
|
-
|
|
16687
|
+
aggregation: zod.z.number().optional(),
|
|
16688
|
+
titleMatch: zod.z.number().optional()
|
|
16697
16689
|
}).optional()
|
|
16698
16690
|
}).optional(),
|
|
16699
16691
|
api: zod.z.object({
|
|
@@ -16715,8 +16707,7 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16715
16707
|
}).optional()
|
|
16716
16708
|
}).optional(),
|
|
16717
16709
|
state: zod.z.object({
|
|
16718
|
-
dir: zod.z.string().optional()
|
|
16719
|
-
writeMirror: zod.z.boolean().optional()
|
|
16710
|
+
dir: zod.z.string().optional()
|
|
16720
16711
|
}).optional()
|
|
16721
16712
|
});
|
|
16722
16713
|
|
|
@@ -16740,6 +16731,8 @@ function createDefaultConfig(projectId) {
|
|
|
16740
16731
|
envVar: "SEARCHSOCKET_SCOPE",
|
|
16741
16732
|
sanitize: true
|
|
16742
16733
|
},
|
|
16734
|
+
exclude: [],
|
|
16735
|
+
respectRobotsTxt: true,
|
|
16743
16736
|
source: {
|
|
16744
16737
|
mode: "static-output",
|
|
16745
16738
|
staticOutputDir: "build",
|
|
@@ -16768,24 +16761,16 @@ function createDefaultConfig(projectId) {
|
|
|
16768
16761
|
prependTitle: true,
|
|
16769
16762
|
pageSummaryChunk: true
|
|
16770
16763
|
},
|
|
16771
|
-
|
|
16772
|
-
|
|
16773
|
-
|
|
16774
|
-
apiKeyEnv: "JINA_API_KEY",
|
|
16775
|
-
batchSize: 64,
|
|
16776
|
-
concurrency: 4
|
|
16777
|
-
},
|
|
16778
|
-
vector: {
|
|
16779
|
-
turso: {
|
|
16780
|
-
urlEnv: "TURSO_DATABASE_URL",
|
|
16781
|
-
authTokenEnv: "TURSO_AUTH_TOKEN",
|
|
16782
|
-
localPath: ".searchsocket/vectors.db"
|
|
16783
|
-
}
|
|
16764
|
+
upstash: {
|
|
16765
|
+
urlEnv: "UPSTASH_SEARCH_REST_URL",
|
|
16766
|
+
tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
|
|
16784
16767
|
},
|
|
16785
|
-
|
|
16786
|
-
|
|
16787
|
-
|
|
16788
|
-
|
|
16768
|
+
search: {
|
|
16769
|
+
semanticWeight: 0.75,
|
|
16770
|
+
inputEnrichment: true,
|
|
16771
|
+
reranking: true,
|
|
16772
|
+
dualSearch: true,
|
|
16773
|
+
pageSearchWeight: 0.3
|
|
16789
16774
|
},
|
|
16790
16775
|
ranking: {
|
|
16791
16776
|
enableIncomingLinkBoost: true,
|
|
@@ -16794,12 +16779,13 @@ function createDefaultConfig(projectId) {
|
|
|
16794
16779
|
aggregationCap: 5,
|
|
16795
16780
|
aggregationDecay: 0.5,
|
|
16796
16781
|
minChunkScoreRatio: 0.5,
|
|
16797
|
-
minScore: 0,
|
|
16782
|
+
minScore: 0.3,
|
|
16783
|
+
scoreGapThreshold: 0.4,
|
|
16798
16784
|
weights: {
|
|
16799
16785
|
incomingLinks: 0.05,
|
|
16800
16786
|
depth: 0.03,
|
|
16801
|
-
|
|
16802
|
-
|
|
16787
|
+
aggregation: 0.1,
|
|
16788
|
+
titleMatch: 0.15
|
|
16803
16789
|
}
|
|
16804
16790
|
},
|
|
16805
16791
|
api: {
|
|
@@ -16817,8 +16803,7 @@ function createDefaultConfig(projectId) {
|
|
|
16817
16803
|
}
|
|
16818
16804
|
},
|
|
16819
16805
|
state: {
|
|
16820
|
-
dir: ".searchsocket"
|
|
16821
|
-
writeMirror: false
|
|
16806
|
+
dir: ".searchsocket"
|
|
16822
16807
|
}
|
|
16823
16808
|
};
|
|
16824
16809
|
}
|
|
@@ -16904,6 +16889,8 @@ ${issues}`
|
|
|
16904
16889
|
...defaults.scope,
|
|
16905
16890
|
...parsed.scope
|
|
16906
16891
|
},
|
|
16892
|
+
exclude: parsed.exclude ?? defaults.exclude,
|
|
16893
|
+
respectRobotsTxt: parsed.respectRobotsTxt ?? defaults.respectRobotsTxt,
|
|
16907
16894
|
source: {
|
|
16908
16895
|
...defaults.source,
|
|
16909
16896
|
...parsed.source,
|
|
@@ -16940,21 +16927,13 @@ ${issues}`
|
|
|
16940
16927
|
...defaults.chunking,
|
|
16941
16928
|
...parsed.chunking
|
|
16942
16929
|
},
|
|
16943
|
-
|
|
16944
|
-
...defaults.
|
|
16945
|
-
...parsed.
|
|
16930
|
+
upstash: {
|
|
16931
|
+
...defaults.upstash,
|
|
16932
|
+
...parsed.upstash
|
|
16946
16933
|
},
|
|
16947
|
-
|
|
16948
|
-
...defaults.
|
|
16949
|
-
...parsed.
|
|
16950
|
-
turso: {
|
|
16951
|
-
...defaults.vector.turso,
|
|
16952
|
-
...parsed.vector?.turso
|
|
16953
|
-
}
|
|
16954
|
-
},
|
|
16955
|
-
rerank: {
|
|
16956
|
-
...defaults.rerank,
|
|
16957
|
-
...parsed.rerank
|
|
16934
|
+
search: {
|
|
16935
|
+
...defaults.search,
|
|
16936
|
+
...parsed.search
|
|
16958
16937
|
},
|
|
16959
16938
|
ranking: {
|
|
16960
16939
|
...defaults.ranking,
|
|
@@ -17045,128 +17024,6 @@ async function loadConfig(options = {}) {
|
|
|
17045
17024
|
function isServerless() {
|
|
17046
17025
|
return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
|
|
17047
17026
|
}
|
|
17048
|
-
function sleep(ms) {
|
|
17049
|
-
return new Promise((resolve) => {
|
|
17050
|
-
setTimeout(resolve, ms);
|
|
17051
|
-
});
|
|
17052
|
-
}
|
|
17053
|
-
var JinaEmbeddingsProvider = class {
|
|
17054
|
-
apiKey;
|
|
17055
|
-
batchSize;
|
|
17056
|
-
concurrency;
|
|
17057
|
-
defaultTask;
|
|
17058
|
-
constructor(options) {
|
|
17059
|
-
if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
|
|
17060
|
-
throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
|
|
17061
|
-
}
|
|
17062
|
-
if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
|
|
17063
|
-
throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
|
|
17064
|
-
}
|
|
17065
|
-
this.apiKey = options.apiKey;
|
|
17066
|
-
this.batchSize = options.batchSize;
|
|
17067
|
-
this.concurrency = options.concurrency;
|
|
17068
|
-
this.defaultTask = options.task ?? "retrieval.passage";
|
|
17069
|
-
}
|
|
17070
|
-
estimateTokens(text) {
|
|
17071
|
-
const normalized = text.trim();
|
|
17072
|
-
if (!normalized) {
|
|
17073
|
-
return 0;
|
|
17074
|
-
}
|
|
17075
|
-
const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
|
|
17076
|
-
const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
|
|
17077
|
-
const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
|
|
17078
|
-
const charEstimate = Math.ceil(normalized.length / 4);
|
|
17079
|
-
const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
|
|
17080
|
-
return Math.max(1, Math.max(charEstimate, lexicalEstimate));
|
|
17081
|
-
}
|
|
17082
|
-
async embedTexts(texts, modelId, task) {
|
|
17083
|
-
if (texts.length === 0) {
|
|
17084
|
-
return [];
|
|
17085
|
-
}
|
|
17086
|
-
const batches = [];
|
|
17087
|
-
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
17088
|
-
batches.push({
|
|
17089
|
-
index: i,
|
|
17090
|
-
values: texts.slice(i, i + this.batchSize)
|
|
17091
|
-
});
|
|
17092
|
-
}
|
|
17093
|
-
const outputs = new Array(batches.length);
|
|
17094
|
-
const limit = pLimit2__default.default(this.concurrency);
|
|
17095
|
-
await Promise.all(
|
|
17096
|
-
batches.map(
|
|
17097
|
-
(batch, position) => limit(async () => {
|
|
17098
|
-
outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
|
|
17099
|
-
})
|
|
17100
|
-
)
|
|
17101
|
-
);
|
|
17102
|
-
return outputs.flat();
|
|
17103
|
-
}
|
|
17104
|
-
async embedWithRetry(texts, modelId, task) {
|
|
17105
|
-
const maxAttempts = 5;
|
|
17106
|
-
let attempt = 0;
|
|
17107
|
-
while (attempt < maxAttempts) {
|
|
17108
|
-
attempt += 1;
|
|
17109
|
-
let response;
|
|
17110
|
-
try {
|
|
17111
|
-
response = await fetch("https://api.jina.ai/v1/embeddings", {
|
|
17112
|
-
method: "POST",
|
|
17113
|
-
headers: {
|
|
17114
|
-
"content-type": "application/json",
|
|
17115
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17116
|
-
},
|
|
17117
|
-
body: JSON.stringify({
|
|
17118
|
-
model: modelId,
|
|
17119
|
-
input: texts,
|
|
17120
|
-
task
|
|
17121
|
-
})
|
|
17122
|
-
});
|
|
17123
|
-
} catch (error) {
|
|
17124
|
-
if (attempt >= maxAttempts) {
|
|
17125
|
-
throw error;
|
|
17126
|
-
}
|
|
17127
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17128
|
-
continue;
|
|
17129
|
-
}
|
|
17130
|
-
if (!response.ok) {
|
|
17131
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17132
|
-
if (!retryable || attempt >= maxAttempts) {
|
|
17133
|
-
const errorBody = await response.text();
|
|
17134
|
-
throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
|
|
17135
|
-
}
|
|
17136
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17137
|
-
continue;
|
|
17138
|
-
}
|
|
17139
|
-
const payload = await response.json();
|
|
17140
|
-
if (!payload.data || !Array.isArray(payload.data)) {
|
|
17141
|
-
throw new Error("Invalid Jina embeddings response format");
|
|
17142
|
-
}
|
|
17143
|
-
return payload.data.map((entry) => entry.embedding);
|
|
17144
|
-
}
|
|
17145
|
-
throw new Error("Unreachable retry state");
|
|
17146
|
-
}
|
|
17147
|
-
};
|
|
17148
|
-
|
|
17149
|
-
// src/embeddings/factory.ts
|
|
17150
|
-
function createEmbeddingsProvider(config) {
|
|
17151
|
-
if (config.embeddings.provider !== "jina") {
|
|
17152
|
-
throw new SearchSocketError(
|
|
17153
|
-
"CONFIG_MISSING",
|
|
17154
|
-
`Unsupported embeddings provider ${config.embeddings.provider}`
|
|
17155
|
-
);
|
|
17156
|
-
}
|
|
17157
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17158
|
-
if (!apiKey) {
|
|
17159
|
-
throw new SearchSocketError(
|
|
17160
|
-
"CONFIG_MISSING",
|
|
17161
|
-
`Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
|
|
17162
|
-
);
|
|
17163
|
-
}
|
|
17164
|
-
return new JinaEmbeddingsProvider({
|
|
17165
|
-
apiKey,
|
|
17166
|
-
batchSize: config.embeddings.batchSize,
|
|
17167
|
-
concurrency: config.embeddings.concurrency
|
|
17168
|
-
});
|
|
17169
|
-
}
|
|
17170
17027
|
|
|
17171
17028
|
// src/utils/text.ts
|
|
17172
17029
|
function normalizeText(input) {
|
|
@@ -17241,103 +17098,6 @@ function resolveScope(config, override) {
|
|
|
17241
17098
|
};
|
|
17242
17099
|
}
|
|
17243
17100
|
|
|
17244
|
-
// src/rerank/jina.ts
|
|
17245
|
-
function sleep2(ms) {
|
|
17246
|
-
return new Promise((resolve) => {
|
|
17247
|
-
setTimeout(resolve, ms);
|
|
17248
|
-
});
|
|
17249
|
-
}
|
|
17250
|
-
var JinaReranker = class {
|
|
17251
|
-
apiKey;
|
|
17252
|
-
model;
|
|
17253
|
-
maxRetries;
|
|
17254
|
-
constructor(options) {
|
|
17255
|
-
this.apiKey = options.apiKey;
|
|
17256
|
-
this.model = options.model;
|
|
17257
|
-
this.maxRetries = options.maxRetries ?? 2;
|
|
17258
|
-
}
|
|
17259
|
-
async rerank(query, candidates, topN) {
|
|
17260
|
-
if (candidates.length === 0) {
|
|
17261
|
-
return [];
|
|
17262
|
-
}
|
|
17263
|
-
const body = {
|
|
17264
|
-
model: this.model,
|
|
17265
|
-
query,
|
|
17266
|
-
documents: candidates.map((candidate) => candidate.text),
|
|
17267
|
-
top_n: topN ?? candidates.length,
|
|
17268
|
-
return_documents: false
|
|
17269
|
-
};
|
|
17270
|
-
let attempt = 0;
|
|
17271
|
-
while (attempt <= this.maxRetries) {
|
|
17272
|
-
attempt += 1;
|
|
17273
|
-
let response;
|
|
17274
|
-
try {
|
|
17275
|
-
response = await fetch("https://api.jina.ai/v1/rerank", {
|
|
17276
|
-
method: "POST",
|
|
17277
|
-
headers: {
|
|
17278
|
-
"content-type": "application/json",
|
|
17279
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17280
|
-
},
|
|
17281
|
-
body: JSON.stringify(body)
|
|
17282
|
-
});
|
|
17283
|
-
} catch (error) {
|
|
17284
|
-
if (attempt <= this.maxRetries) {
|
|
17285
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17286
|
-
continue;
|
|
17287
|
-
}
|
|
17288
|
-
throw error;
|
|
17289
|
-
}
|
|
17290
|
-
if (!response.ok) {
|
|
17291
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17292
|
-
if (retryable && attempt <= this.maxRetries) {
|
|
17293
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17294
|
-
continue;
|
|
17295
|
-
}
|
|
17296
|
-
const errorBody = await response.text();
|
|
17297
|
-
throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
|
|
17298
|
-
}
|
|
17299
|
-
const payload = await response.json();
|
|
17300
|
-
const rawResults = payload.results ?? payload.data ?? [];
|
|
17301
|
-
if (!Array.isArray(rawResults)) {
|
|
17302
|
-
throw new Error("Invalid Jina rerank response format");
|
|
17303
|
-
}
|
|
17304
|
-
return rawResults.flatMap((item) => {
|
|
17305
|
-
const index = item.index;
|
|
17306
|
-
if (typeof index !== "number" || index < 0 || index >= candidates.length) {
|
|
17307
|
-
return [];
|
|
17308
|
-
}
|
|
17309
|
-
const candidate = candidates[index];
|
|
17310
|
-
if (!candidate) {
|
|
17311
|
-
return [];
|
|
17312
|
-
}
|
|
17313
|
-
const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
|
|
17314
|
-
return [
|
|
17315
|
-
{
|
|
17316
|
-
id: candidate.id,
|
|
17317
|
-
score
|
|
17318
|
-
}
|
|
17319
|
-
];
|
|
17320
|
-
}).sort((a, b) => b.score - a.score);
|
|
17321
|
-
}
|
|
17322
|
-
throw new Error("Jina rerank request failed after retries");
|
|
17323
|
-
}
|
|
17324
|
-
};
|
|
17325
|
-
|
|
17326
|
-
// src/rerank/factory.ts
|
|
17327
|
-
function createReranker(config) {
|
|
17328
|
-
if (!config.rerank.enabled) {
|
|
17329
|
-
return null;
|
|
17330
|
-
}
|
|
17331
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17332
|
-
if (!apiKey) {
|
|
17333
|
-
return null;
|
|
17334
|
-
}
|
|
17335
|
-
return new JinaReranker({
|
|
17336
|
-
apiKey,
|
|
17337
|
-
model: config.rerank.model
|
|
17338
|
-
});
|
|
17339
|
-
}
|
|
17340
|
-
|
|
17341
17101
|
// src/utils/time.ts
|
|
17342
17102
|
function nowIso() {
|
|
17343
17103
|
return (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -17356,13 +17116,6 @@ function normalizeUrlPath(rawPath) {
|
|
|
17356
17116
|
}
|
|
17357
17117
|
return out;
|
|
17358
17118
|
}
|
|
17359
|
-
function urlPathToMirrorRelative(urlPath) {
|
|
17360
|
-
const normalized = normalizeUrlPath(urlPath);
|
|
17361
|
-
if (normalized === "/") {
|
|
17362
|
-
return "index.md";
|
|
17363
|
-
}
|
|
17364
|
-
return `${normalized.slice(1)}.md`;
|
|
17365
|
-
}
|
|
17366
17119
|
function staticHtmlFileToUrl(filePath, rootDir) {
|
|
17367
17120
|
const relative = path__default.default.relative(rootDir, filePath).replace(/\\/g, "/");
|
|
17368
17121
|
if (relative === "index.html") {
|
|
@@ -17396,434 +17149,239 @@ function joinUrl(baseUrl, route) {
|
|
|
17396
17149
|
return `${base}${routePart}`;
|
|
17397
17150
|
}
|
|
17398
17151
|
|
|
17399
|
-
// src/vector/
|
|
17400
|
-
|
|
17152
|
+
// src/vector/upstash.ts
|
|
17153
|
+
function chunkIndexName(scope) {
|
|
17154
|
+
return `${scope.projectId}--${scope.scopeName}`;
|
|
17155
|
+
}
|
|
17156
|
+
function pageIndexName(scope) {
|
|
17157
|
+
return `${scope.projectId}--${scope.scopeName}--pages`;
|
|
17158
|
+
}
|
|
17159
|
+
var UpstashSearchStore = class {
|
|
17401
17160
|
client;
|
|
17402
|
-
dimension;
|
|
17403
|
-
chunksReady = false;
|
|
17404
|
-
registryReady = false;
|
|
17405
|
-
pagesReady = false;
|
|
17406
17161
|
constructor(opts) {
|
|
17407
17162
|
this.client = opts.client;
|
|
17408
|
-
this.dimension = opts.dimension;
|
|
17409
|
-
}
|
|
17410
|
-
async ensureRegistry() {
|
|
17411
|
-
if (this.registryReady) return;
|
|
17412
|
-
await this.client.execute(`
|
|
17413
|
-
CREATE TABLE IF NOT EXISTS registry (
|
|
17414
|
-
scope_key TEXT PRIMARY KEY,
|
|
17415
|
-
project_id TEXT NOT NULL,
|
|
17416
|
-
scope_name TEXT NOT NULL,
|
|
17417
|
-
model_id TEXT NOT NULL,
|
|
17418
|
-
last_indexed_at TEXT NOT NULL,
|
|
17419
|
-
vector_count INTEGER,
|
|
17420
|
-
last_estimate_tokens INTEGER,
|
|
17421
|
-
last_estimate_cost_usd REAL,
|
|
17422
|
-
last_estimate_changed_chunks INTEGER
|
|
17423
|
-
)
|
|
17424
|
-
`);
|
|
17425
|
-
const estimateCols = [
|
|
17426
|
-
{ name: "last_estimate_tokens", def: "INTEGER" },
|
|
17427
|
-
{ name: "last_estimate_cost_usd", def: "REAL" },
|
|
17428
|
-
{ name: "last_estimate_changed_chunks", def: "INTEGER" }
|
|
17429
|
-
];
|
|
17430
|
-
for (const col of estimateCols) {
|
|
17431
|
-
try {
|
|
17432
|
-
await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
|
|
17433
|
-
} catch (error) {
|
|
17434
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17435
|
-
throw error;
|
|
17436
|
-
}
|
|
17437
|
-
}
|
|
17438
|
-
}
|
|
17439
|
-
this.registryReady = true;
|
|
17440
|
-
}
|
|
17441
|
-
async ensureChunks(dim) {
|
|
17442
|
-
if (this.chunksReady) return;
|
|
17443
|
-
const exists = await this.chunksTableExists();
|
|
17444
|
-
if (exists) {
|
|
17445
|
-
const currentDim = await this.getChunksDimension();
|
|
17446
|
-
if (currentDim !== null && currentDim !== dim) {
|
|
17447
|
-
await this.client.batch([
|
|
17448
|
-
"DROP INDEX IF EXISTS idx",
|
|
17449
|
-
"DROP TABLE IF EXISTS chunks"
|
|
17450
|
-
]);
|
|
17451
|
-
}
|
|
17452
|
-
}
|
|
17453
|
-
await this.client.batch([
|
|
17454
|
-
`CREATE TABLE IF NOT EXISTS chunks (
|
|
17455
|
-
id TEXT PRIMARY KEY,
|
|
17456
|
-
project_id TEXT NOT NULL,
|
|
17457
|
-
scope_name TEXT NOT NULL,
|
|
17458
|
-
url TEXT NOT NULL,
|
|
17459
|
-
path TEXT NOT NULL,
|
|
17460
|
-
title TEXT NOT NULL,
|
|
17461
|
-
section_title TEXT NOT NULL DEFAULT '',
|
|
17462
|
-
heading_path TEXT NOT NULL DEFAULT '[]',
|
|
17463
|
-
snippet TEXT NOT NULL DEFAULT '',
|
|
17464
|
-
chunk_text TEXT NOT NULL DEFAULT '',
|
|
17465
|
-
ordinal INTEGER NOT NULL DEFAULT 0,
|
|
17466
|
-
content_hash TEXT NOT NULL DEFAULT '',
|
|
17467
|
-
model_id TEXT NOT NULL DEFAULT '',
|
|
17468
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17469
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17470
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17471
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17472
|
-
description TEXT NOT NULL DEFAULT '',
|
|
17473
|
-
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17474
|
-
embedding F32_BLOB(${dim})
|
|
17475
|
-
)`,
|
|
17476
|
-
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17477
|
-
]);
|
|
17478
|
-
this.chunksReady = true;
|
|
17479
|
-
}
|
|
17480
|
-
async ensurePages() {
|
|
17481
|
-
if (this.pagesReady) return;
|
|
17482
|
-
await this.client.execute(`
|
|
17483
|
-
CREATE TABLE IF NOT EXISTS pages (
|
|
17484
|
-
project_id TEXT NOT NULL,
|
|
17485
|
-
scope_name TEXT NOT NULL,
|
|
17486
|
-
url TEXT NOT NULL,
|
|
17487
|
-
title TEXT NOT NULL,
|
|
17488
|
-
markdown TEXT NOT NULL,
|
|
17489
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17490
|
-
route_resolution TEXT NOT NULL DEFAULT 'exact',
|
|
17491
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17492
|
-
outgoing_links INTEGER NOT NULL DEFAULT 0,
|
|
17493
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17494
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17495
|
-
indexed_at TEXT NOT NULL,
|
|
17496
|
-
PRIMARY KEY (project_id, scope_name, url)
|
|
17497
|
-
)
|
|
17498
|
-
`);
|
|
17499
|
-
this.pagesReady = true;
|
|
17500
17163
|
}
|
|
17501
|
-
|
|
17502
|
-
|
|
17503
|
-
await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
|
|
17504
|
-
return true;
|
|
17505
|
-
} catch (error) {
|
|
17506
|
-
if (error instanceof Error && error.message.includes("no such table")) {
|
|
17507
|
-
return false;
|
|
17508
|
-
}
|
|
17509
|
-
throw error;
|
|
17510
|
-
}
|
|
17164
|
+
chunkIndex(scope) {
|
|
17165
|
+
return this.client.index(chunkIndexName(scope));
|
|
17511
17166
|
}
|
|
17512
|
-
|
|
17513
|
-
|
|
17514
|
-
* Returns null if the table doesn't exist or the dimension can't be parsed.
|
|
17515
|
-
*/
|
|
17516
|
-
async getChunksDimension() {
|
|
17517
|
-
try {
|
|
17518
|
-
const rs = await this.client.execute(
|
|
17519
|
-
"SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
|
|
17520
|
-
);
|
|
17521
|
-
if (rs.rows.length === 0) return null;
|
|
17522
|
-
const sql = rs.rows[0].sql;
|
|
17523
|
-
const match = sql.match(/F32_BLOB\((\d+)\)/i);
|
|
17524
|
-
return match ? parseInt(match[1], 10) : null;
|
|
17525
|
-
} catch {
|
|
17526
|
-
return null;
|
|
17527
|
-
}
|
|
17167
|
+
pageIndex(scope) {
|
|
17168
|
+
return this.client.index(pageIndexName(scope));
|
|
17528
17169
|
}
|
|
17529
|
-
|
|
17530
|
-
|
|
17531
|
-
|
|
17532
|
-
*/
|
|
17533
|
-
async dropAllTables() {
|
|
17534
|
-
await this.client.batch([
|
|
17535
|
-
"DROP INDEX IF EXISTS idx",
|
|
17536
|
-
"DROP TABLE IF EXISTS chunks",
|
|
17537
|
-
"DROP TABLE IF EXISTS registry",
|
|
17538
|
-
"DROP TABLE IF EXISTS pages"
|
|
17539
|
-
]);
|
|
17540
|
-
this.chunksReady = false;
|
|
17541
|
-
this.registryReady = false;
|
|
17542
|
-
this.pagesReady = false;
|
|
17543
|
-
}
|
|
17544
|
-
async upsert(records, _scope) {
|
|
17545
|
-
if (records.length === 0) return;
|
|
17546
|
-
const dim = this.dimension ?? records[0].vector.length;
|
|
17547
|
-
await this.ensureChunks(dim);
|
|
17170
|
+
async upsertChunks(chunks, scope) {
|
|
17171
|
+
if (chunks.length === 0) return;
|
|
17172
|
+
const index = this.chunkIndex(scope);
|
|
17548
17173
|
const BATCH_SIZE = 100;
|
|
17549
|
-
for (let i = 0; i <
|
|
17550
|
-
const batch =
|
|
17551
|
-
|
|
17552
|
-
sql: `INSERT OR REPLACE INTO chunks
|
|
17553
|
-
(id, project_id, scope_name, url, path, title, section_title,
|
|
17554
|
-
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17555
|
-
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17556
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17557
|
-
args: [
|
|
17558
|
-
r.id,
|
|
17559
|
-
r.metadata.projectId,
|
|
17560
|
-
r.metadata.scopeName,
|
|
17561
|
-
r.metadata.url,
|
|
17562
|
-
r.metadata.path,
|
|
17563
|
-
r.metadata.title,
|
|
17564
|
-
r.metadata.sectionTitle,
|
|
17565
|
-
JSON.stringify(r.metadata.headingPath),
|
|
17566
|
-
r.metadata.snippet,
|
|
17567
|
-
r.metadata.chunkText,
|
|
17568
|
-
r.metadata.ordinal,
|
|
17569
|
-
r.metadata.contentHash,
|
|
17570
|
-
r.metadata.modelId,
|
|
17571
|
-
r.metadata.depth,
|
|
17572
|
-
r.metadata.incomingLinks,
|
|
17573
|
-
r.metadata.routeFile,
|
|
17574
|
-
JSON.stringify(r.metadata.tags),
|
|
17575
|
-
r.metadata.description ?? "",
|
|
17576
|
-
JSON.stringify(r.metadata.keywords ?? []),
|
|
17577
|
-
JSON.stringify(r.vector)
|
|
17578
|
-
]
|
|
17579
|
-
}));
|
|
17580
|
-
await this.client.batch(stmts);
|
|
17174
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
17175
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
17176
|
+
await index.upsert(batch);
|
|
17581
17177
|
}
|
|
17582
17178
|
}
|
|
17583
|
-
async query
|
|
17584
|
-
const
|
|
17585
|
-
await
|
|
17586
|
-
|
|
17587
|
-
|
|
17588
|
-
|
|
17589
|
-
|
|
17590
|
-
|
|
17591
|
-
|
|
17592
|
-
c.description, c.keywords,
|
|
17593
|
-
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17594
|
-
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17595
|
-
JOIN chunks AS c ON c.rowid = v.id`,
|
|
17596
|
-
args: [queryJson, queryJson, opts.topK]
|
|
17179
|
+
async search(query, opts, scope) {
|
|
17180
|
+
const index = this.chunkIndex(scope);
|
|
17181
|
+
const results = await index.search({
|
|
17182
|
+
query,
|
|
17183
|
+
limit: opts.limit,
|
|
17184
|
+
semanticWeight: opts.semanticWeight,
|
|
17185
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17186
|
+
reranking: opts.reranking,
|
|
17187
|
+
filter: opts.filter
|
|
17597
17188
|
});
|
|
17598
|
-
|
|
17599
|
-
|
|
17600
|
-
|
|
17601
|
-
|
|
17602
|
-
|
|
17603
|
-
|
|
17189
|
+
return results.map((doc) => ({
|
|
17190
|
+
id: doc.id,
|
|
17191
|
+
score: doc.score,
|
|
17192
|
+
metadata: {
|
|
17193
|
+
projectId: doc.metadata?.projectId ?? "",
|
|
17194
|
+
scopeName: doc.metadata?.scopeName ?? "",
|
|
17195
|
+
url: doc.content.url,
|
|
17196
|
+
path: doc.metadata?.path ?? "",
|
|
17197
|
+
title: doc.content.title,
|
|
17198
|
+
sectionTitle: doc.content.sectionTitle,
|
|
17199
|
+
headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
|
|
17200
|
+
snippet: doc.metadata?.snippet ?? "",
|
|
17201
|
+
chunkText: doc.content.text,
|
|
17202
|
+
ordinal: doc.metadata?.ordinal ?? 0,
|
|
17203
|
+
contentHash: doc.metadata?.contentHash ?? "",
|
|
17204
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17205
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17206
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
17207
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17208
|
+
description: doc.metadata?.description || void 0,
|
|
17209
|
+
keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
|
|
17604
17210
|
}
|
|
17605
|
-
|
|
17606
|
-
|
|
17607
|
-
|
|
17608
|
-
|
|
17609
|
-
|
|
17610
|
-
|
|
17611
|
-
|
|
17612
|
-
|
|
17613
|
-
|
|
17614
|
-
|
|
17615
|
-
|
|
17616
|
-
|
|
17617
|
-
|
|
17618
|
-
continue;
|
|
17619
|
-
}
|
|
17620
|
-
}
|
|
17621
|
-
const distance = row.distance;
|
|
17622
|
-
const score = 1 - distance;
|
|
17623
|
-
const description = row.description || void 0;
|
|
17624
|
-
const keywords = (() => {
|
|
17625
|
-
const raw = row.keywords || "[]";
|
|
17626
|
-
const parsed = JSON.parse(raw);
|
|
17627
|
-
return parsed.length > 0 ? parsed : void 0;
|
|
17628
|
-
})();
|
|
17629
|
-
hits.push({
|
|
17630
|
-
id: row.id,
|
|
17631
|
-
score,
|
|
17632
|
-
metadata: {
|
|
17633
|
-
projectId,
|
|
17634
|
-
scopeName,
|
|
17635
|
-
url: row.url,
|
|
17636
|
-
path: rowPath,
|
|
17637
|
-
title: row.title,
|
|
17638
|
-
sectionTitle: row.section_title,
|
|
17639
|
-
headingPath: JSON.parse(row.heading_path || "[]"),
|
|
17640
|
-
snippet: row.snippet,
|
|
17641
|
-
chunkText: row.chunk_text || "",
|
|
17642
|
-
ordinal: row.ordinal || 0,
|
|
17643
|
-
contentHash: row.content_hash,
|
|
17644
|
-
modelId: row.model_id,
|
|
17645
|
-
depth: row.depth,
|
|
17646
|
-
incomingLinks: row.incoming_links,
|
|
17647
|
-
routeFile: row.route_file,
|
|
17648
|
-
tags,
|
|
17649
|
-
description,
|
|
17650
|
-
keywords
|
|
17651
|
-
}
|
|
17211
|
+
}));
|
|
17212
|
+
}
|
|
17213
|
+
async searchPages(query, opts, scope) {
|
|
17214
|
+
const index = this.pageIndex(scope);
|
|
17215
|
+
let results;
|
|
17216
|
+
try {
|
|
17217
|
+
results = await index.search({
|
|
17218
|
+
query,
|
|
17219
|
+
limit: opts.limit,
|
|
17220
|
+
semanticWeight: opts.semanticWeight,
|
|
17221
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17222
|
+
reranking: true,
|
|
17223
|
+
filter: opts.filter
|
|
17652
17224
|
});
|
|
17225
|
+
} catch {
|
|
17226
|
+
return [];
|
|
17653
17227
|
}
|
|
17654
|
-
|
|
17655
|
-
|
|
17228
|
+
return results.map((doc) => ({
|
|
17229
|
+
id: doc.id,
|
|
17230
|
+
score: doc.score,
|
|
17231
|
+
title: doc.content.title,
|
|
17232
|
+
url: doc.content.url,
|
|
17233
|
+
description: doc.content.description ?? "",
|
|
17234
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17235
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17236
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17237
|
+
routeFile: doc.metadata?.routeFile ?? ""
|
|
17238
|
+
}));
|
|
17656
17239
|
}
|
|
17657
17240
|
async deleteByIds(ids, scope) {
|
|
17658
17241
|
if (ids.length === 0) return;
|
|
17242
|
+
const index = this.chunkIndex(scope);
|
|
17659
17243
|
const BATCH_SIZE = 500;
|
|
17660
17244
|
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
17661
17245
|
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
17662
|
-
|
|
17663
|
-
await this.client.execute({
|
|
17664
|
-
sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
|
|
17665
|
-
args: [scope.projectId, scope.scopeName, ...batch]
|
|
17666
|
-
});
|
|
17246
|
+
await index.delete(batch);
|
|
17667
17247
|
}
|
|
17668
17248
|
}
|
|
17669
17249
|
async deleteScope(scope) {
|
|
17670
|
-
await this.ensureRegistry();
|
|
17671
17250
|
try {
|
|
17672
|
-
|
|
17673
|
-
|
|
17674
|
-
|
|
17675
|
-
});
|
|
17676
|
-
} catch (error) {
|
|
17677
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17678
|
-
throw error;
|
|
17679
|
-
}
|
|
17251
|
+
const chunkIdx = this.chunkIndex(scope);
|
|
17252
|
+
await chunkIdx.deleteIndex();
|
|
17253
|
+
} catch {
|
|
17680
17254
|
}
|
|
17681
17255
|
try {
|
|
17682
|
-
|
|
17683
|
-
|
|
17684
|
-
|
|
17685
|
-
});
|
|
17686
|
-
} catch (error) {
|
|
17687
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17688
|
-
throw error;
|
|
17689
|
-
}
|
|
17256
|
+
const pageIdx = this.pageIndex(scope);
|
|
17257
|
+
await pageIdx.deleteIndex();
|
|
17258
|
+
} catch {
|
|
17690
17259
|
}
|
|
17691
|
-
await this.client.execute({
|
|
17692
|
-
sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17693
|
-
args: [scope.projectId, scope.scopeName]
|
|
17694
|
-
});
|
|
17695
17260
|
}
|
|
17696
|
-
async listScopes(
|
|
17697
|
-
await this.
|
|
17698
|
-
const
|
|
17699
|
-
|
|
17700
|
-
|
|
17701
|
-
|
|
17702
|
-
|
|
17703
|
-
|
|
17704
|
-
|
|
17705
|
-
|
|
17706
|
-
|
|
17707
|
-
|
|
17708
|
-
|
|
17709
|
-
|
|
17710
|
-
|
|
17711
|
-
|
|
17712
|
-
|
|
17713
|
-
|
|
17714
|
-
|
|
17715
|
-
|
|
17716
|
-
|
|
17717
|
-
|
|
17718
|
-
|
|
17719
|
-
|
|
17720
|
-
|
|
17721
|
-
|
|
17722
|
-
|
|
17723
|
-
|
|
17724
|
-
|
|
17725
|
-
|
|
17726
|
-
|
|
17727
|
-
|
|
17728
|
-
|
|
17729
|
-
|
|
17730
|
-
|
|
17731
|
-
info.lastEstimateCostUSD ?? null,
|
|
17732
|
-
info.lastEstimateChangedChunks ?? null
|
|
17733
|
-
]
|
|
17734
|
-
});
|
|
17261
|
+
async listScopes(projectId) {
|
|
17262
|
+
const allIndexes = await this.client.listIndexes();
|
|
17263
|
+
const prefix = `${projectId}--`;
|
|
17264
|
+
const scopeNames = /* @__PURE__ */ new Set();
|
|
17265
|
+
for (const name of allIndexes) {
|
|
17266
|
+
if (name.startsWith(prefix) && !name.endsWith("--pages")) {
|
|
17267
|
+
const scopeName = name.slice(prefix.length);
|
|
17268
|
+
scopeNames.add(scopeName);
|
|
17269
|
+
}
|
|
17270
|
+
}
|
|
17271
|
+
const scopes = [];
|
|
17272
|
+
for (const scopeName of scopeNames) {
|
|
17273
|
+
const scope = {
|
|
17274
|
+
projectId,
|
|
17275
|
+
scopeName,
|
|
17276
|
+
scopeId: `${projectId}:${scopeName}`
|
|
17277
|
+
};
|
|
17278
|
+
try {
|
|
17279
|
+
const info = await this.chunkIndex(scope).info();
|
|
17280
|
+
scopes.push({
|
|
17281
|
+
projectId,
|
|
17282
|
+
scopeName,
|
|
17283
|
+
lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
17284
|
+
documentCount: info.documentCount
|
|
17285
|
+
});
|
|
17286
|
+
} catch {
|
|
17287
|
+
scopes.push({
|
|
17288
|
+
projectId,
|
|
17289
|
+
scopeName,
|
|
17290
|
+
lastIndexedAt: "unknown",
|
|
17291
|
+
documentCount: 0
|
|
17292
|
+
});
|
|
17293
|
+
}
|
|
17294
|
+
}
|
|
17295
|
+
return scopes;
|
|
17735
17296
|
}
|
|
17736
17297
|
async getContentHashes(scope) {
|
|
17737
|
-
const exists = await this.chunksTableExists();
|
|
17738
|
-
if (!exists) return /* @__PURE__ */ new Map();
|
|
17739
|
-
const rs = await this.client.execute({
|
|
17740
|
-
sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
|
|
17741
|
-
args: [scope.projectId, scope.scopeName]
|
|
17742
|
-
});
|
|
17743
17298
|
const map = /* @__PURE__ */ new Map();
|
|
17744
|
-
|
|
17745
|
-
|
|
17299
|
+
const index = this.chunkIndex(scope);
|
|
17300
|
+
let cursor = "0";
|
|
17301
|
+
try {
|
|
17302
|
+
for (; ; ) {
|
|
17303
|
+
const result = await index.range({ cursor, limit: 100 });
|
|
17304
|
+
for (const doc of result.documents) {
|
|
17305
|
+
if (doc.metadata?.contentHash) {
|
|
17306
|
+
map.set(doc.id, doc.metadata.contentHash);
|
|
17307
|
+
}
|
|
17308
|
+
}
|
|
17309
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17310
|
+
cursor = result.nextCursor;
|
|
17311
|
+
}
|
|
17312
|
+
} catch {
|
|
17746
17313
|
}
|
|
17747
17314
|
return map;
|
|
17748
17315
|
}
|
|
17749
17316
|
async upsertPages(pages, scope) {
|
|
17750
17317
|
if (pages.length === 0) return;
|
|
17751
|
-
|
|
17752
|
-
|
|
17753
|
-
if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
|
|
17754
|
-
throw new Error(
|
|
17755
|
-
`Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
|
|
17756
|
-
);
|
|
17757
|
-
}
|
|
17758
|
-
}
|
|
17759
|
-
const BATCH_SIZE = 100;
|
|
17318
|
+
const index = this.pageIndex(scope);
|
|
17319
|
+
const BATCH_SIZE = 50;
|
|
17760
17320
|
for (let i = 0; i < pages.length; i += BATCH_SIZE) {
|
|
17761
17321
|
const batch = pages.slice(i, i + BATCH_SIZE);
|
|
17762
|
-
const
|
|
17763
|
-
|
|
17764
|
-
|
|
17765
|
-
|
|
17766
|
-
|
|
17767
|
-
|
|
17768
|
-
p.
|
|
17769
|
-
p.
|
|
17770
|
-
p.
|
|
17771
|
-
p.
|
|
17772
|
-
|
|
17773
|
-
|
|
17774
|
-
p.
|
|
17775
|
-
p.
|
|
17776
|
-
p.
|
|
17777
|
-
p.
|
|
17778
|
-
|
|
17779
|
-
p.
|
|
17780
|
-
|
|
17322
|
+
const docs = batch.map((p) => ({
|
|
17323
|
+
id: p.url,
|
|
17324
|
+
content: {
|
|
17325
|
+
title: p.title,
|
|
17326
|
+
url: p.url,
|
|
17327
|
+
type: "page",
|
|
17328
|
+
description: p.description ?? "",
|
|
17329
|
+
keywords: (p.keywords ?? []).join(","),
|
|
17330
|
+
summary: p.summary ?? "",
|
|
17331
|
+
tags: p.tags.join(",")
|
|
17332
|
+
},
|
|
17333
|
+
metadata: {
|
|
17334
|
+
markdown: p.markdown,
|
|
17335
|
+
projectId: p.projectId,
|
|
17336
|
+
scopeName: p.scopeName,
|
|
17337
|
+
routeFile: p.routeFile,
|
|
17338
|
+
routeResolution: p.routeResolution,
|
|
17339
|
+
incomingLinks: p.incomingLinks,
|
|
17340
|
+
outgoingLinks: p.outgoingLinks,
|
|
17341
|
+
depth: p.depth,
|
|
17342
|
+
indexedAt: p.indexedAt
|
|
17343
|
+
}
|
|
17781
17344
|
}));
|
|
17782
|
-
await
|
|
17345
|
+
await index.upsert(docs);
|
|
17783
17346
|
}
|
|
17784
17347
|
}
|
|
17785
17348
|
async getPage(url, scope) {
|
|
17786
|
-
|
|
17787
|
-
|
|
17788
|
-
|
|
17789
|
-
|
|
17790
|
-
|
|
17791
|
-
|
|
17792
|
-
|
|
17793
|
-
|
|
17794
|
-
|
|
17795
|
-
|
|
17796
|
-
|
|
17797
|
-
|
|
17798
|
-
|
|
17799
|
-
|
|
17800
|
-
|
|
17801
|
-
|
|
17802
|
-
|
|
17803
|
-
|
|
17804
|
-
|
|
17805
|
-
|
|
17806
|
-
|
|
17349
|
+
const index = this.pageIndex(scope);
|
|
17350
|
+
try {
|
|
17351
|
+
const results = await index.fetch([url]);
|
|
17352
|
+
const doc = results[0];
|
|
17353
|
+
if (!doc) return null;
|
|
17354
|
+
return {
|
|
17355
|
+
url: doc.content.url,
|
|
17356
|
+
title: doc.content.title,
|
|
17357
|
+
markdown: doc.metadata.markdown,
|
|
17358
|
+
projectId: doc.metadata.projectId,
|
|
17359
|
+
scopeName: doc.metadata.scopeName,
|
|
17360
|
+
routeFile: doc.metadata.routeFile,
|
|
17361
|
+
routeResolution: doc.metadata.routeResolution,
|
|
17362
|
+
incomingLinks: doc.metadata.incomingLinks,
|
|
17363
|
+
outgoingLinks: doc.metadata.outgoingLinks,
|
|
17364
|
+
depth: doc.metadata.depth,
|
|
17365
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17366
|
+
indexedAt: doc.metadata.indexedAt,
|
|
17367
|
+
summary: doc.content.summary || void 0,
|
|
17368
|
+
description: doc.content.description || void 0,
|
|
17369
|
+
keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
|
|
17370
|
+
};
|
|
17371
|
+
} catch {
|
|
17372
|
+
return null;
|
|
17373
|
+
}
|
|
17807
17374
|
}
|
|
17808
17375
|
async deletePages(scope) {
|
|
17809
|
-
|
|
17810
|
-
|
|
17811
|
-
|
|
17812
|
-
|
|
17813
|
-
}
|
|
17814
|
-
}
|
|
17815
|
-
async getScopeModelId(scope) {
|
|
17816
|
-
await this.ensureRegistry();
|
|
17817
|
-
const rs = await this.client.execute({
|
|
17818
|
-
sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17819
|
-
args: [scope.projectId, scope.scopeName]
|
|
17820
|
-
});
|
|
17821
|
-
if (rs.rows.length === 0) return null;
|
|
17822
|
-
return rs.rows[0].model_id;
|
|
17376
|
+
try {
|
|
17377
|
+
const index = this.pageIndex(scope);
|
|
17378
|
+
await index.reset();
|
|
17379
|
+
} catch {
|
|
17380
|
+
}
|
|
17823
17381
|
}
|
|
17824
17382
|
async health() {
|
|
17825
17383
|
try {
|
|
17826
|
-
await this.client.
|
|
17384
|
+
await this.client.info();
|
|
17827
17385
|
return { ok: true };
|
|
17828
17386
|
} catch (error) {
|
|
17829
17387
|
return {
|
|
@@ -17832,40 +17390,64 @@ var TursoVectorStore = class {
|
|
|
17832
17390
|
};
|
|
17833
17391
|
}
|
|
17834
17392
|
}
|
|
17393
|
+
async dropAllIndexes(projectId) {
|
|
17394
|
+
const allIndexes = await this.client.listIndexes();
|
|
17395
|
+
const prefix = `${projectId}--`;
|
|
17396
|
+
for (const name of allIndexes) {
|
|
17397
|
+
if (name.startsWith(prefix)) {
|
|
17398
|
+
try {
|
|
17399
|
+
const index = this.client.index(name);
|
|
17400
|
+
await index.deleteIndex();
|
|
17401
|
+
} catch {
|
|
17402
|
+
}
|
|
17403
|
+
}
|
|
17404
|
+
}
|
|
17405
|
+
}
|
|
17835
17406
|
};
|
|
17836
17407
|
|
|
17837
17408
|
// src/vector/factory.ts
|
|
17838
|
-
async function
|
|
17839
|
-
const
|
|
17840
|
-
const
|
|
17841
|
-
if (
|
|
17842
|
-
const { createClient: createClient2 } = await import('@libsql/client/http');
|
|
17843
|
-
const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
|
|
17844
|
-
const client2 = createClient2({
|
|
17845
|
-
url: remoteUrl,
|
|
17846
|
-
authToken
|
|
17847
|
-
});
|
|
17848
|
-
return new TursoVectorStore({
|
|
17849
|
-
client: client2,
|
|
17850
|
-
dimension: config.vector.dimension
|
|
17851
|
-
});
|
|
17852
|
-
}
|
|
17853
|
-
if (isServerless()) {
|
|
17409
|
+
async function createUpstashStore(config) {
|
|
17410
|
+
const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
|
|
17411
|
+
const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
|
|
17412
|
+
if (!url || !token) {
|
|
17854
17413
|
throw new SearchSocketError(
|
|
17855
17414
|
"VECTOR_BACKEND_UNAVAILABLE",
|
|
17856
|
-
`
|
|
17415
|
+
`Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
|
|
17857
17416
|
);
|
|
17858
17417
|
}
|
|
17859
|
-
const {
|
|
17860
|
-
const
|
|
17861
|
-
|
|
17862
|
-
|
|
17863
|
-
|
|
17864
|
-
|
|
17865
|
-
|
|
17866
|
-
|
|
17867
|
-
|
|
17868
|
-
|
|
17418
|
+
const { Search } = await import('@upstash/search');
|
|
17419
|
+
const client = new Search({ url, token });
|
|
17420
|
+
return new UpstashSearchStore({ client });
|
|
17421
|
+
}
|
|
17422
|
+
|
|
17423
|
+
// src/utils/pattern.ts
|
|
17424
|
+
function matchUrlPattern(url, pattern) {
|
|
17425
|
+
const norm = (p) => p !== "/" && p.endsWith("/") ? p.slice(0, -1) : p;
|
|
17426
|
+
const normalizedUrl = norm(url);
|
|
17427
|
+
const normalizedPattern = norm(pattern);
|
|
17428
|
+
if (normalizedPattern.endsWith("/**")) {
|
|
17429
|
+
const prefix = normalizedPattern.slice(0, -3);
|
|
17430
|
+
if (prefix === "") {
|
|
17431
|
+
return true;
|
|
17432
|
+
}
|
|
17433
|
+
return normalizedUrl === prefix || normalizedUrl.startsWith(prefix + "/");
|
|
17434
|
+
}
|
|
17435
|
+
if (normalizedPattern.endsWith("/*")) {
|
|
17436
|
+
const prefix = normalizedPattern.slice(0, -2);
|
|
17437
|
+
if (prefix === "") {
|
|
17438
|
+
return normalizedUrl !== "/" && !normalizedUrl.slice(1).includes("/");
|
|
17439
|
+
}
|
|
17440
|
+
if (!normalizedUrl.startsWith(prefix + "/")) return false;
|
|
17441
|
+
const rest = normalizedUrl.slice(prefix.length + 1);
|
|
17442
|
+
return rest.length > 0 && !rest.includes("/");
|
|
17443
|
+
}
|
|
17444
|
+
return normalizedUrl === normalizedPattern;
|
|
17445
|
+
}
|
|
17446
|
+
function matchUrlPatterns(url, patterns) {
|
|
17447
|
+
for (const pattern of patterns) {
|
|
17448
|
+
if (matchUrlPattern(url, pattern)) return true;
|
|
17449
|
+
}
|
|
17450
|
+
return false;
|
|
17869
17451
|
}
|
|
17870
17452
|
|
|
17871
17453
|
// src/search/ranking.ts
|
|
@@ -17875,7 +17457,12 @@ function nonNegativeOrZero(value) {
|
|
|
17875
17457
|
}
|
|
17876
17458
|
return Math.max(0, value);
|
|
17877
17459
|
}
|
|
17878
|
-
function
|
|
17460
|
+
function normalizeForTitleMatch(text) {
|
|
17461
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
|
|
17462
|
+
}
|
|
17463
|
+
function rankHits(hits, config, query) {
|
|
17464
|
+
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
17465
|
+
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
17879
17466
|
return hits.map((hit) => {
|
|
17880
17467
|
let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
17881
17468
|
if (config.ranking.enableIncomingLinkBoost) {
|
|
@@ -17886,6 +17473,12 @@ function rankHits(hits, config) {
|
|
|
17886
17473
|
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
|
|
17887
17474
|
score += depthBoost * config.ranking.weights.depth;
|
|
17888
17475
|
}
|
|
17476
|
+
if (normalizedQuery && titleMatchWeight > 0) {
|
|
17477
|
+
const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
|
|
17478
|
+
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
17479
|
+
score += titleMatchWeight;
|
|
17480
|
+
}
|
|
17481
|
+
}
|
|
17889
17482
|
return {
|
|
17890
17483
|
hit,
|
|
17891
17484
|
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
|
|
@@ -17895,22 +17488,36 @@ function rankHits(hits, config) {
|
|
|
17895
17488
|
return Number.isNaN(delta) ? 0 : delta;
|
|
17896
17489
|
});
|
|
17897
17490
|
}
|
|
17898
|
-
function
|
|
17899
|
-
|
|
17900
|
-
const
|
|
17901
|
-
|
|
17902
|
-
|
|
17903
|
-
|
|
17491
|
+
function trimByScoreGap(results, config) {
|
|
17492
|
+
if (results.length === 0) return results;
|
|
17493
|
+
const threshold = config.ranking.scoreGapThreshold;
|
|
17494
|
+
const minScore = config.ranking.minScore;
|
|
17495
|
+
if (minScore > 0 && results.length > 0) {
|
|
17496
|
+
const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
|
|
17497
|
+
const mid = Math.floor(sortedScores.length / 2);
|
|
17498
|
+
const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
|
|
17499
|
+
if (median < minScore) return [];
|
|
17500
|
+
}
|
|
17501
|
+
if (threshold > 0 && results.length > 1) {
|
|
17502
|
+
for (let i = 1; i < results.length; i++) {
|
|
17503
|
+
const prev = results[i - 1].pageScore;
|
|
17504
|
+
const current = results[i].pageScore;
|
|
17505
|
+
if (prev > 0) {
|
|
17506
|
+
const gap = (prev - current) / prev;
|
|
17507
|
+
if (gap >= threshold) {
|
|
17508
|
+
return results.slice(0, i);
|
|
17509
|
+
}
|
|
17510
|
+
}
|
|
17904
17511
|
}
|
|
17905
17512
|
}
|
|
17906
|
-
|
|
17513
|
+
return results;
|
|
17514
|
+
}
|
|
17515
|
+
function findPageWeight(url, pageWeights) {
|
|
17516
|
+
let bestPattern = "";
|
|
17907
17517
|
let bestWeight = 1;
|
|
17908
17518
|
for (const [pattern, weight] of Object.entries(pageWeights)) {
|
|
17909
|
-
|
|
17910
|
-
|
|
17911
|
-
const prefix = `${normalizedPattern}/`;
|
|
17912
|
-
if (normalizedUrl.startsWith(prefix) && prefix.length > bestPrefix.length) {
|
|
17913
|
-
bestPrefix = prefix;
|
|
17519
|
+
if (matchUrlPattern(url, pattern) && pattern.length > bestPattern.length) {
|
|
17520
|
+
bestPattern = pattern;
|
|
17914
17521
|
bestWeight = weight;
|
|
17915
17522
|
}
|
|
17916
17523
|
}
|
|
@@ -17959,6 +17566,61 @@ function aggregateByPage(ranked, config) {
|
|
|
17959
17566
|
return Number.isNaN(delta) ? 0 : delta;
|
|
17960
17567
|
});
|
|
17961
17568
|
}
|
|
17569
|
+
function mergePageAndChunkResults(pageHits, rankedChunks, config) {
|
|
17570
|
+
if (pageHits.length === 0) return rankedChunks;
|
|
17571
|
+
const w = config.search.pageSearchWeight;
|
|
17572
|
+
const pageScoreMap = /* @__PURE__ */ new Map();
|
|
17573
|
+
for (const ph of pageHits) {
|
|
17574
|
+
pageScoreMap.set(ph.url, ph);
|
|
17575
|
+
}
|
|
17576
|
+
const pagesWithChunks = /* @__PURE__ */ new Set();
|
|
17577
|
+
const merged = rankedChunks.map((ranked) => {
|
|
17578
|
+
const url = ranked.hit.metadata.url;
|
|
17579
|
+
const pageHit = pageScoreMap.get(url);
|
|
17580
|
+
if (pageHit) {
|
|
17581
|
+
pagesWithChunks.add(url);
|
|
17582
|
+
const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
|
|
17583
|
+
return {
|
|
17584
|
+
hit: ranked.hit,
|
|
17585
|
+
finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
|
|
17586
|
+
};
|
|
17587
|
+
}
|
|
17588
|
+
return ranked;
|
|
17589
|
+
});
|
|
17590
|
+
for (const [url, pageHit] of pageScoreMap) {
|
|
17591
|
+
if (pagesWithChunks.has(url)) continue;
|
|
17592
|
+
const syntheticScore = pageHit.score * w;
|
|
17593
|
+
const syntheticHit = {
|
|
17594
|
+
id: `page:${url}`,
|
|
17595
|
+
score: pageHit.score,
|
|
17596
|
+
metadata: {
|
|
17597
|
+
projectId: "",
|
|
17598
|
+
scopeName: "",
|
|
17599
|
+
url: pageHit.url,
|
|
17600
|
+
path: pageHit.url,
|
|
17601
|
+
title: pageHit.title,
|
|
17602
|
+
sectionTitle: "",
|
|
17603
|
+
headingPath: [],
|
|
17604
|
+
snippet: pageHit.description || pageHit.title,
|
|
17605
|
+
chunkText: pageHit.description || pageHit.title,
|
|
17606
|
+
ordinal: 0,
|
|
17607
|
+
contentHash: "",
|
|
17608
|
+
depth: pageHit.depth,
|
|
17609
|
+
incomingLinks: pageHit.incomingLinks,
|
|
17610
|
+
routeFile: pageHit.routeFile,
|
|
17611
|
+
tags: pageHit.tags
|
|
17612
|
+
}
|
|
17613
|
+
};
|
|
17614
|
+
merged.push({
|
|
17615
|
+
hit: syntheticHit,
|
|
17616
|
+
finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
|
|
17617
|
+
});
|
|
17618
|
+
}
|
|
17619
|
+
return merged.sort((a, b) => {
|
|
17620
|
+
const delta = b.finalScore - a.finalScore;
|
|
17621
|
+
return Number.isNaN(delta) ? 0 : delta;
|
|
17622
|
+
});
|
|
17623
|
+
}
|
|
17962
17624
|
|
|
17963
17625
|
// src/search/engine.ts
|
|
17964
17626
|
var requestSchema = zod.z.object({
|
|
@@ -17967,34 +17629,25 @@ var requestSchema = zod.z.object({
|
|
|
17967
17629
|
scope: zod.z.string().optional(),
|
|
17968
17630
|
pathPrefix: zod.z.string().optional(),
|
|
17969
17631
|
tags: zod.z.array(zod.z.string()).optional(),
|
|
17970
|
-
rerank: zod.z.boolean().optional(),
|
|
17971
17632
|
groupBy: zod.z.enum(["page", "chunk"]).optional()
|
|
17972
17633
|
});
|
|
17973
17634
|
var SearchEngine = class _SearchEngine {
|
|
17974
17635
|
cwd;
|
|
17975
17636
|
config;
|
|
17976
|
-
|
|
17977
|
-
vectorStore;
|
|
17978
|
-
reranker;
|
|
17637
|
+
store;
|
|
17979
17638
|
constructor(options) {
|
|
17980
17639
|
this.cwd = options.cwd;
|
|
17981
17640
|
this.config = options.config;
|
|
17982
|
-
this.
|
|
17983
|
-
this.vectorStore = options.vectorStore;
|
|
17984
|
-
this.reranker = options.reranker;
|
|
17641
|
+
this.store = options.store;
|
|
17985
17642
|
}
|
|
17986
17643
|
static async create(options = {}) {
|
|
17987
17644
|
const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
|
|
17988
17645
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
17989
|
-
const
|
|
17990
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
17991
|
-
const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
|
|
17646
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
17992
17647
|
return new _SearchEngine({
|
|
17993
17648
|
cwd,
|
|
17994
17649
|
config,
|
|
17995
|
-
|
|
17996
|
-
vectorStore,
|
|
17997
|
-
reranker
|
|
17650
|
+
store
|
|
17998
17651
|
});
|
|
17999
17652
|
}
|
|
18000
17653
|
getConfig() {
|
|
@@ -18008,99 +17661,130 @@ var SearchEngine = class _SearchEngine {
|
|
|
18008
17661
|
const input = parsed.data;
|
|
18009
17662
|
const totalStart = process.hrtime.bigint();
|
|
18010
17663
|
const resolvedScope = resolveScope(this.config, input.scope);
|
|
18011
|
-
await this.assertModelCompatibility(resolvedScope);
|
|
18012
17664
|
const topK = input.topK ?? 10;
|
|
18013
|
-
const wantsRerank = Boolean(input.rerank);
|
|
18014
17665
|
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
18015
17666
|
const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
|
|
18016
|
-
const
|
|
18017
|
-
|
|
18018
|
-
|
|
18019
|
-
|
|
18020
|
-
throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
|
|
17667
|
+
const filterParts = [];
|
|
17668
|
+
if (input.pathPrefix) {
|
|
17669
|
+
const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
|
|
17670
|
+
filterParts.push(`url GLOB '${prefix}*'`);
|
|
18021
17671
|
}
|
|
18022
|
-
|
|
18023
|
-
|
|
18024
|
-
|
|
18025
|
-
|
|
18026
|
-
{
|
|
18027
|
-
topK: candidateK,
|
|
18028
|
-
pathPrefix: input.pathPrefix,
|
|
18029
|
-
tags: input.tags
|
|
18030
|
-
},
|
|
18031
|
-
resolvedScope
|
|
18032
|
-
);
|
|
18033
|
-
const vectorMs = hrTimeMs(vectorStart);
|
|
18034
|
-
const ranked = rankHits(hits, this.config);
|
|
18035
|
-
let usedRerank = false;
|
|
18036
|
-
let rerankMs = 0;
|
|
18037
|
-
let ordered = ranked;
|
|
18038
|
-
if (wantsRerank) {
|
|
18039
|
-
const rerankStart = process.hrtime.bigint();
|
|
18040
|
-
ordered = await this.rerankHits(input.q, ranked, topK);
|
|
18041
|
-
rerankMs = hrTimeMs(rerankStart);
|
|
18042
|
-
usedRerank = true;
|
|
17672
|
+
if (input.tags && input.tags.length > 0) {
|
|
17673
|
+
for (const tag of input.tags) {
|
|
17674
|
+
filterParts.push(`tags GLOB '*${tag}*'`);
|
|
17675
|
+
}
|
|
18043
17676
|
}
|
|
18044
|
-
|
|
18045
|
-
const
|
|
17677
|
+
const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
|
|
17678
|
+
const useDualSearch = this.config.search.dualSearch && groupByPage;
|
|
17679
|
+
const searchStart = process.hrtime.bigint();
|
|
17680
|
+
let ranked;
|
|
17681
|
+
if (useDualSearch) {
|
|
17682
|
+
const chunkLimit = Math.max(topK * 10, 100);
|
|
17683
|
+
const pageLimit = 20;
|
|
17684
|
+
const [pageHits, chunkHits] = await Promise.all([
|
|
17685
|
+
this.store.searchPages(
|
|
17686
|
+
input.q,
|
|
17687
|
+
{
|
|
17688
|
+
limit: pageLimit,
|
|
17689
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17690
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17691
|
+
filter
|
|
17692
|
+
},
|
|
17693
|
+
resolvedScope
|
|
17694
|
+
),
|
|
17695
|
+
this.store.search(
|
|
17696
|
+
input.q,
|
|
17697
|
+
{
|
|
17698
|
+
limit: chunkLimit,
|
|
17699
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17700
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17701
|
+
reranking: false,
|
|
17702
|
+
filter
|
|
17703
|
+
},
|
|
17704
|
+
resolvedScope
|
|
17705
|
+
)
|
|
17706
|
+
]);
|
|
17707
|
+
const rankedChunks = rankHits(chunkHits, this.config, input.q);
|
|
17708
|
+
ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
|
|
17709
|
+
} else {
|
|
17710
|
+
const hits = await this.store.search(
|
|
17711
|
+
input.q,
|
|
17712
|
+
{
|
|
17713
|
+
limit: candidateK,
|
|
17714
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17715
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17716
|
+
reranking: this.config.search.reranking,
|
|
17717
|
+
filter
|
|
17718
|
+
},
|
|
17719
|
+
resolvedScope
|
|
17720
|
+
);
|
|
17721
|
+
ranked = rankHits(hits, this.config, input.q);
|
|
17722
|
+
}
|
|
17723
|
+
const searchMs = hrTimeMs(searchStart);
|
|
17724
|
+
const results = this.buildResults(ranked, topK, groupByPage, input.q);
|
|
17725
|
+
return {
|
|
17726
|
+
q: input.q,
|
|
17727
|
+
scope: resolvedScope.scopeName,
|
|
17728
|
+
results,
|
|
17729
|
+
meta: {
|
|
17730
|
+
timingsMs: {
|
|
17731
|
+
search: Math.round(searchMs),
|
|
17732
|
+
total: Math.round(hrTimeMs(totalStart))
|
|
17733
|
+
}
|
|
17734
|
+
}
|
|
17735
|
+
};
|
|
17736
|
+
}
|
|
17737
|
+
ensureSnippet(hit) {
|
|
17738
|
+
const snippet = hit.hit.metadata.snippet;
|
|
17739
|
+
if (snippet && snippet.length >= 30) return snippet;
|
|
17740
|
+
const chunkText = hit.hit.metadata.chunkText;
|
|
17741
|
+
if (chunkText) return toSnippet(chunkText);
|
|
17742
|
+
return snippet || "";
|
|
17743
|
+
}
|
|
17744
|
+
buildResults(ordered, topK, groupByPage, _query) {
|
|
18046
17745
|
if (groupByPage) {
|
|
18047
17746
|
let pages = aggregateByPage(ordered, this.config);
|
|
18048
|
-
|
|
18049
|
-
pages = pages.filter((p) => p.pageScore >= minScore);
|
|
18050
|
-
}
|
|
17747
|
+
pages = trimByScoreGap(pages, this.config);
|
|
18051
17748
|
const minRatio = this.config.ranking.minChunkScoreRatio;
|
|
18052
|
-
|
|
17749
|
+
return pages.slice(0, topK).map((page) => {
|
|
18053
17750
|
const bestScore = page.bestChunk.finalScore;
|
|
18054
|
-
const
|
|
18055
|
-
const meaningful = page.matchingChunks.filter((c) => c.finalScore >=
|
|
17751
|
+
const minChunkScore = Number.isFinite(bestScore) ? bestScore * minRatio : Number.NEGATIVE_INFINITY;
|
|
17752
|
+
const meaningful = page.matchingChunks.filter((c) => c.finalScore >= minChunkScore).slice(0, 5);
|
|
18056
17753
|
return {
|
|
18057
17754
|
url: page.url,
|
|
18058
17755
|
title: page.title,
|
|
18059
17756
|
sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
|
|
18060
|
-
snippet: page.bestChunk
|
|
17757
|
+
snippet: this.ensureSnippet(page.bestChunk),
|
|
18061
17758
|
score: Number(page.pageScore.toFixed(6)),
|
|
18062
17759
|
routeFile: page.routeFile,
|
|
18063
17760
|
chunks: meaningful.length > 1 ? meaningful.map((c) => ({
|
|
18064
17761
|
sectionTitle: c.hit.metadata.sectionTitle || void 0,
|
|
18065
|
-
snippet: c
|
|
17762
|
+
snippet: this.ensureSnippet(c),
|
|
18066
17763
|
headingPath: c.hit.metadata.headingPath,
|
|
18067
17764
|
score: Number(c.finalScore.toFixed(6))
|
|
18068
17765
|
})) : void 0
|
|
18069
17766
|
};
|
|
18070
17767
|
});
|
|
18071
17768
|
} else {
|
|
17769
|
+
let filtered = ordered;
|
|
17770
|
+
const minScore = this.config.ranking.minScore;
|
|
18072
17771
|
if (minScore > 0) {
|
|
18073
|
-
|
|
17772
|
+
filtered = ordered.filter((entry) => entry.finalScore >= minScore);
|
|
18074
17773
|
}
|
|
18075
|
-
|
|
17774
|
+
return filtered.slice(0, topK).map(({ hit, finalScore }) => ({
|
|
18076
17775
|
url: hit.metadata.url,
|
|
18077
17776
|
title: hit.metadata.title,
|
|
18078
17777
|
sectionTitle: hit.metadata.sectionTitle || void 0,
|
|
18079
|
-
snippet: hit
|
|
17778
|
+
snippet: this.ensureSnippet({ hit, finalScore }),
|
|
18080
17779
|
score: Number(finalScore.toFixed(6)),
|
|
18081
17780
|
routeFile: hit.metadata.routeFile
|
|
18082
17781
|
}));
|
|
18083
17782
|
}
|
|
18084
|
-
return {
|
|
18085
|
-
q: input.q,
|
|
18086
|
-
scope: resolvedScope.scopeName,
|
|
18087
|
-
results,
|
|
18088
|
-
meta: {
|
|
18089
|
-
timingsMs: {
|
|
18090
|
-
embed: Math.round(embedMs),
|
|
18091
|
-
vector: Math.round(vectorMs),
|
|
18092
|
-
rerank: Math.round(rerankMs),
|
|
18093
|
-
total: Math.round(hrTimeMs(totalStart))
|
|
18094
|
-
},
|
|
18095
|
-
usedRerank,
|
|
18096
|
-
modelId: this.config.embeddings.model
|
|
18097
|
-
}
|
|
18098
|
-
};
|
|
18099
17783
|
}
|
|
18100
17784
|
async getPage(pathOrUrl, scope) {
|
|
18101
17785
|
const resolvedScope = resolveScope(this.config, scope);
|
|
18102
17786
|
const urlPath = this.resolveInputPath(pathOrUrl);
|
|
18103
|
-
const page = await this.
|
|
17787
|
+
const page = await this.store.getPage(urlPath, resolvedScope);
|
|
18104
17788
|
if (!page) {
|
|
18105
17789
|
throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
|
|
18106
17790
|
}
|
|
@@ -18121,7 +17805,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18121
17805
|
};
|
|
18122
17806
|
}
|
|
18123
17807
|
async health() {
|
|
18124
|
-
return this.
|
|
17808
|
+
return this.store.health();
|
|
18125
17809
|
}
|
|
18126
17810
|
resolveInputPath(pathOrUrl) {
|
|
18127
17811
|
try {
|
|
@@ -18133,90 +17817,6 @@ var SearchEngine = class _SearchEngine {
|
|
|
18133
17817
|
const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
|
|
18134
17818
|
return normalizeUrlPath(withoutQueryOrHash);
|
|
18135
17819
|
}
|
|
18136
|
-
async assertModelCompatibility(scope) {
|
|
18137
|
-
const modelId = await this.vectorStore.getScopeModelId(scope);
|
|
18138
|
-
if (modelId && modelId !== this.config.embeddings.model) {
|
|
18139
|
-
throw new SearchSocketError(
|
|
18140
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
18141
|
-
`Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
|
|
18142
|
-
);
|
|
18143
|
-
}
|
|
18144
|
-
}
|
|
18145
|
-
async rerankHits(query, ranked, topK) {
|
|
18146
|
-
if (!this.config.rerank.enabled) {
|
|
18147
|
-
throw new SearchSocketError(
|
|
18148
|
-
"INVALID_REQUEST",
|
|
18149
|
-
"rerank=true requested but rerank.enabled is not set to true.",
|
|
18150
|
-
400
|
|
18151
|
-
);
|
|
18152
|
-
}
|
|
18153
|
-
if (!this.reranker) {
|
|
18154
|
-
throw new SearchSocketError(
|
|
18155
|
-
"CONFIG_MISSING",
|
|
18156
|
-
`rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
|
|
18157
|
-
400
|
|
18158
|
-
);
|
|
18159
|
-
}
|
|
18160
|
-
const pageGroups = /* @__PURE__ */ new Map();
|
|
18161
|
-
for (const entry of ranked) {
|
|
18162
|
-
const url = entry.hit.metadata.url;
|
|
18163
|
-
const group = pageGroups.get(url);
|
|
18164
|
-
if (group) group.push(entry);
|
|
18165
|
-
else pageGroups.set(url, [entry]);
|
|
18166
|
-
}
|
|
18167
|
-
const MAX_CHUNKS_PER_PAGE = 5;
|
|
18168
|
-
const MIN_CHUNKS_PER_PAGE = 1;
|
|
18169
|
-
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
18170
|
-
const MAX_DOC_CHARS = 2e3;
|
|
18171
|
-
const pageCandidates = [];
|
|
18172
|
-
for (const [url, chunks] of pageGroups) {
|
|
18173
|
-
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
18174
|
-
const bestScore = byScore[0].finalScore;
|
|
18175
|
-
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
18176
|
-
const selected = byScore.filter(
|
|
18177
|
-
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
18178
|
-
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
18179
|
-
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
18180
|
-
const first = selected[0].hit.metadata;
|
|
18181
|
-
const parts = [first.title];
|
|
18182
|
-
if (first.description) {
|
|
18183
|
-
parts.push(first.description);
|
|
18184
|
-
}
|
|
18185
|
-
if (first.keywords && first.keywords.length > 0) {
|
|
18186
|
-
parts.push(first.keywords.join(", "));
|
|
18187
|
-
}
|
|
18188
|
-
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
18189
|
-
parts.push(body);
|
|
18190
|
-
let text = parts.join("\n\n");
|
|
18191
|
-
if (text.length > MAX_DOC_CHARS) {
|
|
18192
|
-
text = text.slice(0, MAX_DOC_CHARS);
|
|
18193
|
-
}
|
|
18194
|
-
pageCandidates.push({ id: url, text });
|
|
18195
|
-
}
|
|
18196
|
-
const maxCandidates = Math.max(topK, this.config.rerank.topN);
|
|
18197
|
-
const cappedCandidates = pageCandidates.slice(0, maxCandidates);
|
|
18198
|
-
const reranked = await this.reranker.rerank(
|
|
18199
|
-
query,
|
|
18200
|
-
cappedCandidates,
|
|
18201
|
-
maxCandidates
|
|
18202
|
-
);
|
|
18203
|
-
const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
|
|
18204
|
-
return ranked.map((entry) => {
|
|
18205
|
-
const pageScore = scoreByUrl.get(entry.hit.metadata.url);
|
|
18206
|
-
const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
|
|
18207
|
-
if (pageScore === void 0 || !Number.isFinite(pageScore)) {
|
|
18208
|
-
return { ...entry, finalScore: base };
|
|
18209
|
-
}
|
|
18210
|
-
const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
|
|
18211
|
-
return {
|
|
18212
|
-
...entry,
|
|
18213
|
-
finalScore: Number.isFinite(combined) ? combined : base
|
|
18214
|
-
};
|
|
18215
|
-
}).sort((a, b) => {
|
|
18216
|
-
const delta = b.finalScore - a.finalScore;
|
|
18217
|
-
return Number.isNaN(delta) ? 0 : delta;
|
|
18218
|
-
});
|
|
18219
|
-
}
|
|
18220
17820
|
};
|
|
18221
17821
|
|
|
18222
17822
|
// src/sveltekit/handle.ts
|
|
@@ -18370,7 +17970,8 @@ function searchsocketHandle(options = {}) {
|
|
|
18370
17970
|
throw new SearchSocketError("INVALID_REQUEST", "Malformed JSON request body", 400);
|
|
18371
17971
|
}
|
|
18372
17972
|
const engine = await getEngine();
|
|
18373
|
-
const
|
|
17973
|
+
const searchRequest = body;
|
|
17974
|
+
const result = await engine.search(searchRequest);
|
|
18374
17975
|
return withCors(
|
|
18375
17976
|
new Response(JSON.stringify(result), {
|
|
18376
17977
|
status: 200,
|
|
@@ -18430,9 +18031,8 @@ function withCors(response, request, config) {
|
|
|
18430
18031
|
}
|
|
18431
18032
|
function ensureStateDirs(cwd, stateDir, scope) {
|
|
18432
18033
|
const statePath = path__default.default.resolve(cwd, stateDir);
|
|
18433
|
-
|
|
18434
|
-
|
|
18435
|
-
return { statePath, pagesPath };
|
|
18034
|
+
fs__default.default.mkdirSync(statePath, { recursive: true });
|
|
18035
|
+
return { statePath };
|
|
18436
18036
|
}
|
|
18437
18037
|
function sha1(input) {
|
|
18438
18038
|
return crypto.createHash("sha1").update(input).digest("hex");
|
|
@@ -18682,7 +18282,7 @@ function buildEmbeddingText(chunk, prependTitle) {
|
|
|
18682
18282
|
|
|
18683
18283
|
${chunk.chunkText}`;
|
|
18684
18284
|
}
|
|
18685
|
-
function
|
|
18285
|
+
function chunkPage(page, config, scope) {
|
|
18686
18286
|
const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
|
|
18687
18287
|
const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
|
|
18688
18288
|
const chunks = [];
|
|
@@ -19599,6 +19199,17 @@ function extractFromHtml(url, html, config) {
|
|
|
19599
19199
|
if ($(`[${config.extract.noindexAttr}]`).length > 0) {
|
|
19600
19200
|
return null;
|
|
19601
19201
|
}
|
|
19202
|
+
const weightRaw = $("meta[name='searchsocket-weight']").attr("content")?.trim();
|
|
19203
|
+
let weight;
|
|
19204
|
+
if (weightRaw !== void 0) {
|
|
19205
|
+
const parsed = Number(weightRaw);
|
|
19206
|
+
if (Number.isFinite(parsed) && parsed >= 0) {
|
|
19207
|
+
weight = parsed;
|
|
19208
|
+
}
|
|
19209
|
+
}
|
|
19210
|
+
if (weight === 0) {
|
|
19211
|
+
return null;
|
|
19212
|
+
}
|
|
19602
19213
|
const description = $("meta[name='description']").attr("content")?.trim() || $("meta[property='og:description']").attr("content")?.trim() || void 0;
|
|
19603
19214
|
const keywordsRaw = $("meta[name='keywords']").attr("content")?.trim();
|
|
19604
19215
|
const keywords = keywordsRaw ? keywordsRaw.split(",").map((k) => k.trim()).filter(Boolean) : void 0;
|
|
@@ -19654,7 +19265,8 @@ function extractFromHtml(url, html, config) {
|
|
|
19654
19265
|
noindex: false,
|
|
19655
19266
|
tags,
|
|
19656
19267
|
description,
|
|
19657
|
-
keywords
|
|
19268
|
+
keywords,
|
|
19269
|
+
weight
|
|
19658
19270
|
};
|
|
19659
19271
|
}
|
|
19660
19272
|
function extractFromMarkdown(url, markdown, title) {
|
|
@@ -19667,6 +19279,14 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
19667
19279
|
if (frontmatter.noindex === true || searchsocketMeta?.noindex === true) {
|
|
19668
19280
|
return null;
|
|
19669
19281
|
}
|
|
19282
|
+
let mdWeight;
|
|
19283
|
+
const rawWeight = searchsocketMeta?.weight ?? frontmatter.searchsocketWeight;
|
|
19284
|
+
if (typeof rawWeight === "number" && Number.isFinite(rawWeight) && rawWeight >= 0) {
|
|
19285
|
+
mdWeight = rawWeight;
|
|
19286
|
+
}
|
|
19287
|
+
if (mdWeight === 0) {
|
|
19288
|
+
return null;
|
|
19289
|
+
}
|
|
19670
19290
|
const content = parsed.content;
|
|
19671
19291
|
const normalized = normalizeMarkdown(content);
|
|
19672
19292
|
if (!normalizeText(normalized)) {
|
|
@@ -19689,56 +19309,10 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
19689
19309
|
noindex: false,
|
|
19690
19310
|
tags: normalizeUrlPath(url).split("/").filter(Boolean).slice(0, 1),
|
|
19691
19311
|
description: fmDescription,
|
|
19692
|
-
keywords: fmKeywords
|
|
19312
|
+
keywords: fmKeywords,
|
|
19313
|
+
weight: mdWeight
|
|
19693
19314
|
};
|
|
19694
19315
|
}
|
|
19695
|
-
function yamlString(value) {
|
|
19696
|
-
return JSON.stringify(value);
|
|
19697
|
-
}
|
|
19698
|
-
function yamlArray(values) {
|
|
19699
|
-
return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
|
|
19700
|
-
}
|
|
19701
|
-
function buildMirrorMarkdown(page) {
|
|
19702
|
-
const frontmatterLines = [
|
|
19703
|
-
"---",
|
|
19704
|
-
`url: ${yamlString(page.url)}`,
|
|
19705
|
-
`title: ${yamlString(page.title)}`,
|
|
19706
|
-
`scope: ${yamlString(page.scope)}`,
|
|
19707
|
-
`routeFile: ${yamlString(page.routeFile)}`,
|
|
19708
|
-
`routeResolution: ${yamlString(page.routeResolution)}`,
|
|
19709
|
-
`generatedAt: ${yamlString(page.generatedAt)}`,
|
|
19710
|
-
`incomingLinks: ${page.incomingLinks}`,
|
|
19711
|
-
`outgoingLinks: ${page.outgoingLinks}`,
|
|
19712
|
-
`depth: ${page.depth}`,
|
|
19713
|
-
`tags: ${yamlArray(page.tags)}`,
|
|
19714
|
-
"---",
|
|
19715
|
-
""
|
|
19716
|
-
];
|
|
19717
|
-
return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
|
|
19718
|
-
}
|
|
19719
|
-
function stripGeneratedAt(content) {
|
|
19720
|
-
return content.replace(/^generatedAt: .*$/m, "");
|
|
19721
|
-
}
|
|
19722
|
-
async function writeMirrorPage(statePath, scope, page) {
|
|
19723
|
-
const relative = urlPathToMirrorRelative(page.url);
|
|
19724
|
-
const outputPath = path__default.default.join(statePath, "pages", scope.scopeName, relative);
|
|
19725
|
-
await fs4__default.default.mkdir(path__default.default.dirname(outputPath), { recursive: true });
|
|
19726
|
-
const newContent = buildMirrorMarkdown(page);
|
|
19727
|
-
try {
|
|
19728
|
-
const existing = await fs4__default.default.readFile(outputPath, "utf8");
|
|
19729
|
-
if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
|
|
19730
|
-
return outputPath;
|
|
19731
|
-
}
|
|
19732
|
-
} catch {
|
|
19733
|
-
}
|
|
19734
|
-
await fs4__default.default.writeFile(outputPath, newContent, "utf8");
|
|
19735
|
-
return outputPath;
|
|
19736
|
-
}
|
|
19737
|
-
async function cleanMirrorForScope(statePath, scope) {
|
|
19738
|
-
const target = path__default.default.join(statePath, "pages", scope.scopeName);
|
|
19739
|
-
await fs4__default.default.rm(target, { recursive: true, force: true });
|
|
19740
|
-
await fs4__default.default.mkdir(target, { recursive: true });
|
|
19741
|
-
}
|
|
19742
19316
|
function segmentToRegex(segment) {
|
|
19743
19317
|
if (segment.startsWith("(") && segment.endsWith(")")) {
|
|
19744
19318
|
return { regex: "", score: 0 };
|
|
@@ -19899,7 +19473,7 @@ async function parseManifest(cwd, outputDir) {
|
|
|
19899
19473
|
const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
|
|
19900
19474
|
let content;
|
|
19901
19475
|
try {
|
|
19902
|
-
content = await
|
|
19476
|
+
content = await fs3__default.default.readFile(manifestPath, "utf8");
|
|
19903
19477
|
} catch {
|
|
19904
19478
|
throw new SearchSocketError(
|
|
19905
19479
|
"BUILD_MANIFEST_NOT_FOUND",
|
|
@@ -19958,15 +19532,7 @@ function expandDynamicUrl(url, value) {
|
|
|
19958
19532
|
return url.replace(/\[\[?\.\.\.[^\]]+\]?\]|\[\[[^\]]+\]\]|\[[^\]]+\]/g, value);
|
|
19959
19533
|
}
|
|
19960
19534
|
function isExcluded(url, patterns) {
|
|
19961
|
-
|
|
19962
|
-
if (pattern.endsWith("/*")) {
|
|
19963
|
-
const prefix = pattern.slice(0, -1);
|
|
19964
|
-
if (url.startsWith(prefix) || url === prefix.slice(0, -1)) return true;
|
|
19965
|
-
} else if (url === pattern) {
|
|
19966
|
-
return true;
|
|
19967
|
-
}
|
|
19968
|
-
}
|
|
19969
|
-
return false;
|
|
19535
|
+
return matchUrlPatterns(url, patterns);
|
|
19970
19536
|
}
|
|
19971
19537
|
function findFreePort() {
|
|
19972
19538
|
return new Promise((resolve, reject) => {
|
|
@@ -20080,7 +19646,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
|
|
|
20080
19646
|
const visited = /* @__PURE__ */ new Set();
|
|
20081
19647
|
const pages = [];
|
|
20082
19648
|
const queue = [];
|
|
20083
|
-
const limit =
|
|
19649
|
+
const limit = pLimit__default.default(8);
|
|
20084
19650
|
for (const seed of seedUrls) {
|
|
20085
19651
|
const normalized = normalizeUrlPath(seed);
|
|
20086
19652
|
if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
|
|
@@ -20162,7 +19728,7 @@ async function loadBuildPages(cwd, config, maxPages) {
|
|
|
20162
19728
|
const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
|
|
20163
19729
|
const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
|
|
20164
19730
|
try {
|
|
20165
|
-
const concurrencyLimit =
|
|
19731
|
+
const concurrencyLimit = pLimit__default.default(8);
|
|
20166
19732
|
const results = await Promise.allSettled(
|
|
20167
19733
|
selected.map(
|
|
20168
19734
|
(route) => concurrencyLimit(async () => {
|
|
@@ -20236,7 +19802,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
|
|
|
20236
19802
|
const selected = typeof limit === "number" ? files.slice(0, limit) : files;
|
|
20237
19803
|
const pages = [];
|
|
20238
19804
|
for (const filePath of selected) {
|
|
20239
|
-
const raw = await
|
|
19805
|
+
const raw = await fs3__default.default.readFile(filePath, "utf8");
|
|
20240
19806
|
const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
|
|
20241
19807
|
pages.push({
|
|
20242
19808
|
url: filePathToUrl(filePath, baseDir),
|
|
@@ -20331,7 +19897,7 @@ async function loadCrawledPages(config, maxPages) {
|
|
|
20331
19897
|
const routes = await resolveRoutes(config);
|
|
20332
19898
|
const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
|
|
20333
19899
|
const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
|
|
20334
|
-
const concurrencyLimit =
|
|
19900
|
+
const concurrencyLimit = pLimit__default.default(8);
|
|
20335
19901
|
const results = await Promise.allSettled(
|
|
20336
19902
|
selected.map(
|
|
20337
19903
|
(route) => concurrencyLimit(async () => {
|
|
@@ -20372,7 +19938,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
|
|
|
20372
19938
|
const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
|
|
20373
19939
|
const pages = [];
|
|
20374
19940
|
for (const filePath of selected) {
|
|
20375
|
-
const html = await
|
|
19941
|
+
const html = await fs3__default.default.readFile(filePath, "utf8");
|
|
20376
19942
|
pages.push({
|
|
20377
19943
|
url: staticHtmlFileToUrl(filePath, outputDir),
|
|
20378
19944
|
html,
|
|
@@ -20382,35 +19948,113 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
|
|
|
20382
19948
|
}
|
|
20383
19949
|
return pages;
|
|
20384
19950
|
}
|
|
19951
|
+
function parseRobotsTxt(content, userAgent = "Searchsocket") {
|
|
19952
|
+
const lines = content.split(/\r?\n/);
|
|
19953
|
+
const agentGroups = /* @__PURE__ */ new Map();
|
|
19954
|
+
let currentAgents = [];
|
|
19955
|
+
for (const rawLine of lines) {
|
|
19956
|
+
const line = rawLine.replace(/#.*$/, "").trim();
|
|
19957
|
+
if (!line) continue;
|
|
19958
|
+
const colonIdx = line.indexOf(":");
|
|
19959
|
+
if (colonIdx === -1) continue;
|
|
19960
|
+
const directive = line.slice(0, colonIdx).trim().toLowerCase();
|
|
19961
|
+
const value = line.slice(colonIdx + 1).trim();
|
|
19962
|
+
if (directive === "user-agent") {
|
|
19963
|
+
const agentName = value.toLowerCase();
|
|
19964
|
+
currentAgents.push(agentName);
|
|
19965
|
+
if (!agentGroups.has(agentName)) {
|
|
19966
|
+
agentGroups.set(agentName, { disallow: [], allow: [] });
|
|
19967
|
+
}
|
|
19968
|
+
} else if (directive === "disallow" && value && currentAgents.length > 0) {
|
|
19969
|
+
for (const agent of currentAgents) {
|
|
19970
|
+
agentGroups.get(agent).disallow.push(value);
|
|
19971
|
+
}
|
|
19972
|
+
} else if (directive === "allow" && value && currentAgents.length > 0) {
|
|
19973
|
+
for (const agent of currentAgents) {
|
|
19974
|
+
agentGroups.get(agent).allow.push(value);
|
|
19975
|
+
}
|
|
19976
|
+
} else if (directive !== "disallow" && directive !== "allow") {
|
|
19977
|
+
currentAgents = [];
|
|
19978
|
+
}
|
|
19979
|
+
}
|
|
19980
|
+
const specific = agentGroups.get(userAgent.toLowerCase());
|
|
19981
|
+
if (specific && (specific.disallow.length > 0 || specific.allow.length > 0)) {
|
|
19982
|
+
return specific;
|
|
19983
|
+
}
|
|
19984
|
+
return agentGroups.get("*") ?? { disallow: [], allow: [] };
|
|
19985
|
+
}
|
|
19986
|
+
function isBlockedByRobots(urlPath, rules3) {
|
|
19987
|
+
let longestDisallow = "";
|
|
19988
|
+
for (const pattern of rules3.disallow) {
|
|
19989
|
+
if (urlPath.startsWith(pattern) && pattern.length > longestDisallow.length) {
|
|
19990
|
+
longestDisallow = pattern;
|
|
19991
|
+
}
|
|
19992
|
+
}
|
|
19993
|
+
if (!longestDisallow) return false;
|
|
19994
|
+
let longestAllow = "";
|
|
19995
|
+
for (const pattern of rules3.allow) {
|
|
19996
|
+
if (urlPath.startsWith(pattern) && pattern.length > longestAllow.length) {
|
|
19997
|
+
longestAllow = pattern;
|
|
19998
|
+
}
|
|
19999
|
+
}
|
|
20000
|
+
return longestAllow.length < longestDisallow.length;
|
|
20001
|
+
}
|
|
20002
|
+
async function loadRobotsTxtFromDir(dir) {
|
|
20003
|
+
try {
|
|
20004
|
+
const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
|
|
20005
|
+
return parseRobotsTxt(content);
|
|
20006
|
+
} catch {
|
|
20007
|
+
return null;
|
|
20008
|
+
}
|
|
20009
|
+
}
|
|
20010
|
+
async function fetchRobotsTxt(baseUrl) {
|
|
20011
|
+
try {
|
|
20012
|
+
const url = new URL("/robots.txt", baseUrl).href;
|
|
20013
|
+
const response = await fetch(url);
|
|
20014
|
+
if (!response.ok) return null;
|
|
20015
|
+
const content = await response.text();
|
|
20016
|
+
return parseRobotsTxt(content);
|
|
20017
|
+
} catch {
|
|
20018
|
+
return null;
|
|
20019
|
+
}
|
|
20020
|
+
}
|
|
20385
20021
|
|
|
20386
20022
|
// src/indexing/pipeline.ts
|
|
20387
|
-
|
|
20388
|
-
|
|
20389
|
-
|
|
20390
|
-
|
|
20023
|
+
function buildPageSummary(page, maxChars = 3500) {
|
|
20024
|
+
const parts = [page.title];
|
|
20025
|
+
if (page.description) {
|
|
20026
|
+
parts.push(page.description);
|
|
20027
|
+
}
|
|
20028
|
+
if (page.keywords && page.keywords.length > 0) {
|
|
20029
|
+
parts.push(page.keywords.join(", "));
|
|
20030
|
+
}
|
|
20031
|
+
const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
|
|
20032
|
+
if (plainBody) {
|
|
20033
|
+
parts.push(plainBody);
|
|
20034
|
+
}
|
|
20035
|
+
const joined = parts.join("\n\n");
|
|
20036
|
+
if (joined.length <= maxChars) return joined;
|
|
20037
|
+
return joined.slice(0, maxChars).trim();
|
|
20038
|
+
}
|
|
20391
20039
|
var IndexPipeline = class _IndexPipeline {
|
|
20392
20040
|
cwd;
|
|
20393
20041
|
config;
|
|
20394
|
-
|
|
20395
|
-
vectorStore;
|
|
20042
|
+
store;
|
|
20396
20043
|
logger;
|
|
20397
20044
|
constructor(options) {
|
|
20398
20045
|
this.cwd = options.cwd;
|
|
20399
20046
|
this.config = options.config;
|
|
20400
|
-
this.
|
|
20401
|
-
this.vectorStore = options.vectorStore;
|
|
20047
|
+
this.store = options.store;
|
|
20402
20048
|
this.logger = options.logger;
|
|
20403
20049
|
}
|
|
20404
20050
|
static async create(options = {}) {
|
|
20405
20051
|
const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
|
|
20406
20052
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
20407
|
-
const
|
|
20408
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
20053
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
20409
20054
|
return new _IndexPipeline({
|
|
20410
20055
|
cwd,
|
|
20411
20056
|
config,
|
|
20412
|
-
|
|
20413
|
-
vectorStore,
|
|
20057
|
+
store,
|
|
20414
20058
|
logger: options.logger ?? new Logger()
|
|
20415
20059
|
});
|
|
20416
20060
|
}
|
|
@@ -20430,25 +20074,17 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20430
20074
|
stageTimingsMs[name] = Math.round(hrTimeMs(start));
|
|
20431
20075
|
};
|
|
20432
20076
|
const scope = resolveScope(this.config, options.scopeOverride);
|
|
20433
|
-
|
|
20077
|
+
ensureStateDirs(this.cwd, this.config.state.dir);
|
|
20434
20078
|
const sourceMode = options.sourceOverride ?? this.config.source.mode;
|
|
20435
|
-
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode},
|
|
20079
|
+
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
|
|
20436
20080
|
if (options.force) {
|
|
20437
20081
|
this.logger.info("Force mode enabled \u2014 full rebuild");
|
|
20438
|
-
await cleanMirrorForScope(statePath, scope);
|
|
20439
20082
|
}
|
|
20440
20083
|
if (options.dryRun) {
|
|
20441
20084
|
this.logger.info("Dry run \u2014 no writes will be performed");
|
|
20442
20085
|
}
|
|
20443
20086
|
const manifestStart = stageStart();
|
|
20444
|
-
const existingHashes = await this.
|
|
20445
|
-
const existingModelId = await this.vectorStore.getScopeModelId(scope);
|
|
20446
|
-
if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
|
|
20447
|
-
throw new SearchSocketError(
|
|
20448
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
20449
|
-
`Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
|
|
20450
|
-
);
|
|
20451
|
-
}
|
|
20087
|
+
const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
|
|
20452
20088
|
stageEnd("manifest", manifestStart);
|
|
20453
20089
|
this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
|
|
20454
20090
|
const sourceStart = stageStart();
|
|
@@ -20465,6 +20101,53 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20465
20101
|
}
|
|
20466
20102
|
stageEnd("source", sourceStart);
|
|
20467
20103
|
this.logger.info(`Loaded ${sourcePages.length} page${sourcePages.length === 1 ? "" : "s"} (${stageTimingsMs["source"]}ms)`);
|
|
20104
|
+
const filterStart = stageStart();
|
|
20105
|
+
let filteredSourcePages = sourcePages;
|
|
20106
|
+
if (this.config.exclude.length > 0) {
|
|
20107
|
+
const beforeExclude = filteredSourcePages.length;
|
|
20108
|
+
filteredSourcePages = filteredSourcePages.filter((p) => {
|
|
20109
|
+
const url = normalizeUrlPath(p.url);
|
|
20110
|
+
if (matchUrlPatterns(url, this.config.exclude)) {
|
|
20111
|
+
this.logger.debug(`Excluding ${url} (matched exclude pattern)`);
|
|
20112
|
+
return false;
|
|
20113
|
+
}
|
|
20114
|
+
return true;
|
|
20115
|
+
});
|
|
20116
|
+
const excludedCount = beforeExclude - filteredSourcePages.length;
|
|
20117
|
+
if (excludedCount > 0) {
|
|
20118
|
+
this.logger.info(`Excluded ${excludedCount} page${excludedCount === 1 ? "" : "s"} by config exclude patterns`);
|
|
20119
|
+
}
|
|
20120
|
+
}
|
|
20121
|
+
if (this.config.respectRobotsTxt) {
|
|
20122
|
+
let robotsRules = null;
|
|
20123
|
+
if (sourceMode === "static-output") {
|
|
20124
|
+
robotsRules = await loadRobotsTxtFromDir(
|
|
20125
|
+
path__default.default.resolve(this.cwd, this.config.source.staticOutputDir)
|
|
20126
|
+
);
|
|
20127
|
+
} else if (sourceMode === "build" && this.config.source.build) {
|
|
20128
|
+
robotsRules = await loadRobotsTxtFromDir(
|
|
20129
|
+
path__default.default.resolve(this.cwd, this.config.source.build.outputDir)
|
|
20130
|
+
);
|
|
20131
|
+
} else if (sourceMode === "crawl" && this.config.source.crawl) {
|
|
20132
|
+
robotsRules = await fetchRobotsTxt(this.config.source.crawl.baseUrl);
|
|
20133
|
+
}
|
|
20134
|
+
if (robotsRules) {
|
|
20135
|
+
const beforeRobots = filteredSourcePages.length;
|
|
20136
|
+
filteredSourcePages = filteredSourcePages.filter((p) => {
|
|
20137
|
+
const url = normalizeUrlPath(p.url);
|
|
20138
|
+
if (isBlockedByRobots(url, robotsRules)) {
|
|
20139
|
+
this.logger.debug(`Excluding ${url} (blocked by robots.txt)`);
|
|
20140
|
+
return false;
|
|
20141
|
+
}
|
|
20142
|
+
return true;
|
|
20143
|
+
});
|
|
20144
|
+
const robotsExcluded = beforeRobots - filteredSourcePages.length;
|
|
20145
|
+
if (robotsExcluded > 0) {
|
|
20146
|
+
this.logger.info(`Excluded ${robotsExcluded} page${robotsExcluded === 1 ? "" : "s"} by robots.txt`);
|
|
20147
|
+
}
|
|
20148
|
+
}
|
|
20149
|
+
}
|
|
20150
|
+
stageEnd("filter", filterStart);
|
|
20468
20151
|
const routeStart = stageStart();
|
|
20469
20152
|
const routePatterns = await buildRoutePatterns(this.cwd);
|
|
20470
20153
|
stageEnd("route_map", routeStart);
|
|
@@ -20472,7 +20155,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20472
20155
|
const extractStart = stageStart();
|
|
20473
20156
|
this.logger.info("Extracting content...");
|
|
20474
20157
|
const extractedPages = [];
|
|
20475
|
-
for (const sourcePage of
|
|
20158
|
+
for (const sourcePage of filteredSourcePages) {
|
|
20476
20159
|
const extracted = sourcePage.html ? extractFromHtml(sourcePage.url, sourcePage.html, this.config) : extractFromMarkdown(sourcePage.url, sourcePage.markdown ?? "", sourcePage.title);
|
|
20477
20160
|
if (!extracted) {
|
|
20478
20161
|
this.logger.warn(
|
|
@@ -20498,16 +20181,29 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20498
20181
|
seenUrls.add(page.url);
|
|
20499
20182
|
uniquePages.push(page);
|
|
20500
20183
|
}
|
|
20184
|
+
const indexablePages = [];
|
|
20185
|
+
for (const page of uniquePages) {
|
|
20186
|
+
const effectiveWeight = page.weight ?? findPageWeight(page.url, this.config.ranking.pageWeights);
|
|
20187
|
+
if (effectiveWeight === 0) {
|
|
20188
|
+
this.logger.debug(`Excluding ${page.url} (zero weight)`);
|
|
20189
|
+
continue;
|
|
20190
|
+
}
|
|
20191
|
+
indexablePages.push(page);
|
|
20192
|
+
}
|
|
20193
|
+
const zeroWeightCount = uniquePages.length - indexablePages.length;
|
|
20194
|
+
if (zeroWeightCount > 0) {
|
|
20195
|
+
this.logger.info(`Excluded ${zeroWeightCount} page${zeroWeightCount === 1 ? "" : "s"} with zero weight`);
|
|
20196
|
+
}
|
|
20501
20197
|
stageEnd("extract", extractStart);
|
|
20502
|
-
const skippedPages =
|
|
20503
|
-
this.logger.info(`Extracted ${
|
|
20198
|
+
const skippedPages = filteredSourcePages.length - indexablePages.length;
|
|
20199
|
+
this.logger.info(`Extracted ${indexablePages.length} page${indexablePages.length === 1 ? "" : "s"}${skippedPages > 0 ? ` (${skippedPages} skipped)` : ""} (${stageTimingsMs["extract"]}ms)`);
|
|
20504
20200
|
const linkStart = stageStart();
|
|
20505
|
-
const pageSet = new Set(
|
|
20201
|
+
const pageSet = new Set(indexablePages.map((page) => normalizeUrlPath(page.url)));
|
|
20506
20202
|
const incomingLinkCount = /* @__PURE__ */ new Map();
|
|
20507
|
-
for (const page of
|
|
20203
|
+
for (const page of indexablePages) {
|
|
20508
20204
|
incomingLinkCount.set(page.url, incomingLinkCount.get(page.url) ?? 0);
|
|
20509
20205
|
}
|
|
20510
|
-
for (const page of
|
|
20206
|
+
for (const page of indexablePages) {
|
|
20511
20207
|
for (const outgoing of page.outgoingLinks) {
|
|
20512
20208
|
if (!pageSet.has(outgoing)) {
|
|
20513
20209
|
continue;
|
|
@@ -20517,9 +20213,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20517
20213
|
}
|
|
20518
20214
|
stageEnd("links", linkStart);
|
|
20519
20215
|
this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
|
|
20520
|
-
const
|
|
20521
|
-
this.logger.info("
|
|
20522
|
-
const
|
|
20216
|
+
const pagesStart = stageStart();
|
|
20217
|
+
this.logger.info("Building indexed pages...");
|
|
20218
|
+
const pages = [];
|
|
20523
20219
|
let routeExact = 0;
|
|
20524
20220
|
let routeBestEffort = 0;
|
|
20525
20221
|
const precomputedRoutes = /* @__PURE__ */ new Map();
|
|
@@ -20531,7 +20227,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20531
20227
|
});
|
|
20532
20228
|
}
|
|
20533
20229
|
}
|
|
20534
|
-
for (const page of
|
|
20230
|
+
for (const page of indexablePages) {
|
|
20535
20231
|
const routeMatch = precomputedRoutes.get(normalizeUrlPath(page.url)) ?? mapUrlToRoute(page.url, routePatterns);
|
|
20536
20232
|
if (routeMatch.routeResolution === "best-effort") {
|
|
20537
20233
|
if (this.config.source.strictRouteMapping) {
|
|
@@ -20548,7 +20244,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20548
20244
|
} else {
|
|
20549
20245
|
routeExact += 1;
|
|
20550
20246
|
}
|
|
20551
|
-
const
|
|
20247
|
+
const indexedPage = {
|
|
20552
20248
|
url: page.url,
|
|
20553
20249
|
title: page.title,
|
|
20554
20250
|
scope: scope.scopeName,
|
|
@@ -20563,35 +20259,38 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20563
20259
|
description: page.description,
|
|
20564
20260
|
keywords: page.keywords
|
|
20565
20261
|
};
|
|
20566
|
-
|
|
20567
|
-
|
|
20568
|
-
await writeMirrorPage(statePath, scope, mirror);
|
|
20569
|
-
}
|
|
20570
|
-
this.logger.event("markdown_written", { url: page.url });
|
|
20262
|
+
pages.push(indexedPage);
|
|
20263
|
+
this.logger.event("page_indexed", { url: page.url });
|
|
20571
20264
|
}
|
|
20572
20265
|
if (!options.dryRun) {
|
|
20573
|
-
const pageRecords =
|
|
20574
|
-
|
|
20575
|
-
|
|
20576
|
-
|
|
20577
|
-
|
|
20578
|
-
|
|
20579
|
-
|
|
20580
|
-
|
|
20581
|
-
|
|
20582
|
-
|
|
20583
|
-
|
|
20584
|
-
|
|
20585
|
-
|
|
20586
|
-
|
|
20587
|
-
|
|
20588
|
-
|
|
20266
|
+
const pageRecords = pages.map((p) => {
|
|
20267
|
+
const summary = buildPageSummary(p);
|
|
20268
|
+
return {
|
|
20269
|
+
url: p.url,
|
|
20270
|
+
title: p.title,
|
|
20271
|
+
markdown: p.markdown,
|
|
20272
|
+
projectId: scope.projectId,
|
|
20273
|
+
scopeName: scope.scopeName,
|
|
20274
|
+
routeFile: p.routeFile,
|
|
20275
|
+
routeResolution: p.routeResolution,
|
|
20276
|
+
incomingLinks: p.incomingLinks,
|
|
20277
|
+
outgoingLinks: p.outgoingLinks,
|
|
20278
|
+
depth: p.depth,
|
|
20279
|
+
tags: p.tags,
|
|
20280
|
+
indexedAt: p.generatedAt,
|
|
20281
|
+
summary,
|
|
20282
|
+
description: p.description,
|
|
20283
|
+
keywords: p.keywords
|
|
20284
|
+
};
|
|
20285
|
+
});
|
|
20286
|
+
await this.store.deletePages(scope);
|
|
20287
|
+
await this.store.upsertPages(pageRecords, scope);
|
|
20589
20288
|
}
|
|
20590
|
-
stageEnd("
|
|
20591
|
-
this.logger.info(`
|
|
20289
|
+
stageEnd("pages", pagesStart);
|
|
20290
|
+
this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
|
|
20592
20291
|
const chunkStart = stageStart();
|
|
20593
20292
|
this.logger.info("Chunking pages...");
|
|
20594
|
-
let chunks =
|
|
20293
|
+
let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
|
|
20595
20294
|
const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
|
|
20596
20295
|
if (typeof maxChunks === "number") {
|
|
20597
20296
|
chunks = chunks.slice(0, maxChunks);
|
|
@@ -20623,125 +20322,59 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20623
20322
|
});
|
|
20624
20323
|
const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
|
|
20625
20324
|
this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
|
|
20626
|
-
const
|
|
20627
|
-
|
|
20628
|
-
for (const chunk of changedChunks) {
|
|
20629
|
-
chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
|
|
20630
|
-
}
|
|
20631
|
-
const estimatedTokens = changedChunks.reduce(
|
|
20632
|
-
(sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
|
|
20633
|
-
0
|
|
20634
|
-
);
|
|
20635
|
-
const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
|
|
20636
|
-
const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
|
|
20637
|
-
let newEmbeddings = 0;
|
|
20638
|
-
const vectorsByChunk = /* @__PURE__ */ new Map();
|
|
20325
|
+
const upsertStart = stageStart();
|
|
20326
|
+
let documentsUpserted = 0;
|
|
20639
20327
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
20640
|
-
this.logger.info(`
|
|
20641
|
-
const
|
|
20642
|
-
|
|
20643
|
-
|
|
20644
|
-
|
|
20645
|
-
|
|
20646
|
-
|
|
20647
|
-
|
|
20648
|
-
|
|
20649
|
-
|
|
20650
|
-
);
|
|
20651
|
-
|
|
20652
|
-
for (let i = 0; i < changedChunks.length; i += 1) {
|
|
20653
|
-
const chunk = changedChunks[i];
|
|
20654
|
-
const embedding = embeddings[i];
|
|
20655
|
-
if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
|
|
20656
|
-
throw new SearchSocketError(
|
|
20657
|
-
"VECTOR_BACKEND_UNAVAILABLE",
|
|
20658
|
-
`Embedding provider returned an invalid vector for chunk index ${i}.`
|
|
20659
|
-
);
|
|
20660
|
-
}
|
|
20661
|
-
vectorsByChunk.set(chunk.chunkKey, embedding);
|
|
20662
|
-
newEmbeddings += 1;
|
|
20663
|
-
this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
|
|
20664
|
-
}
|
|
20665
|
-
}
|
|
20666
|
-
stageEnd("embedding", embedStart);
|
|
20667
|
-
if (changedChunks.length > 0) {
|
|
20668
|
-
this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
|
|
20669
|
-
} else {
|
|
20670
|
-
this.logger.info("No chunks to embed \u2014 all up to date");
|
|
20671
|
-
}
|
|
20672
|
-
const syncStart = stageStart();
|
|
20673
|
-
if (!options.dryRun) {
|
|
20674
|
-
this.logger.info("Syncing vectors...");
|
|
20675
|
-
const upserts = [];
|
|
20676
|
-
for (const chunk of changedChunks) {
|
|
20677
|
-
const vector = vectorsByChunk.get(chunk.chunkKey);
|
|
20678
|
-
if (!vector) {
|
|
20679
|
-
continue;
|
|
20680
|
-
}
|
|
20681
|
-
upserts.push({
|
|
20328
|
+
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
|
|
20329
|
+
const UPSTASH_CONTENT_LIMIT = 4096;
|
|
20330
|
+
const docs = changedChunks.map((chunk) => {
|
|
20331
|
+
const title = chunk.title;
|
|
20332
|
+
const sectionTitle = chunk.sectionTitle ?? "";
|
|
20333
|
+
const url = chunk.url;
|
|
20334
|
+
const tags = chunk.tags.join(",");
|
|
20335
|
+
const headingPath = chunk.headingPath.join(" > ");
|
|
20336
|
+
const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
|
|
20337
|
+
const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
|
|
20338
|
+
const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
|
|
20339
|
+
return {
|
|
20682
20340
|
id: chunk.chunkKey,
|
|
20683
|
-
|
|
20341
|
+
content: { title, sectionTitle, text, url, tags, headingPath },
|
|
20684
20342
|
metadata: {
|
|
20685
20343
|
projectId: scope.projectId,
|
|
20686
20344
|
scopeName: scope.scopeName,
|
|
20687
|
-
url: chunk.url,
|
|
20688
20345
|
path: chunk.path,
|
|
20689
|
-
title: chunk.title,
|
|
20690
|
-
sectionTitle: chunk.sectionTitle ?? "",
|
|
20691
|
-
headingPath: chunk.headingPath,
|
|
20692
20346
|
snippet: chunk.snippet,
|
|
20693
|
-
chunkText: chunk.chunkText.slice(0, 4e3),
|
|
20694
20347
|
ordinal: chunk.ordinal,
|
|
20695
20348
|
contentHash: chunk.contentHash,
|
|
20696
|
-
modelId: this.config.embeddings.model,
|
|
20697
20349
|
depth: chunk.depth,
|
|
20698
20350
|
incomingLinks: chunk.incomingLinks,
|
|
20699
20351
|
routeFile: chunk.routeFile,
|
|
20700
|
-
|
|
20701
|
-
|
|
20702
|
-
keywords: chunk.keywords
|
|
20352
|
+
description: chunk.description ?? "",
|
|
20353
|
+
keywords: (chunk.keywords ?? []).join(",")
|
|
20703
20354
|
}
|
|
20704
|
-
}
|
|
20705
|
-
}
|
|
20706
|
-
if (upserts.length > 0) {
|
|
20707
|
-
await this.vectorStore.upsert(upserts, scope);
|
|
20708
|
-
this.logger.event("upserted", { count: upserts.length });
|
|
20709
|
-
}
|
|
20710
|
-
if (deletes.length > 0) {
|
|
20711
|
-
await this.vectorStore.deleteByIds(deletes, scope);
|
|
20712
|
-
this.logger.event("deleted", { count: deletes.length });
|
|
20713
|
-
}
|
|
20714
|
-
}
|
|
20715
|
-
stageEnd("sync", syncStart);
|
|
20716
|
-
this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
|
|
20717
|
-
const finalizeStart = stageStart();
|
|
20718
|
-
if (!options.dryRun) {
|
|
20719
|
-
const scopeInfo = {
|
|
20720
|
-
projectId: scope.projectId,
|
|
20721
|
-
scopeName: scope.scopeName,
|
|
20722
|
-
modelId: this.config.embeddings.model,
|
|
20723
|
-
lastIndexedAt: nowIso(),
|
|
20724
|
-
vectorCount: chunks.length,
|
|
20725
|
-
lastEstimateTokens: estimatedTokens,
|
|
20726
|
-
lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
20727
|
-
lastEstimateChangedChunks: changedChunks.length
|
|
20728
|
-
};
|
|
20729
|
-
await this.vectorStore.recordScope(scopeInfo);
|
|
20730
|
-
this.logger.event("registry_updated", {
|
|
20731
|
-
scope: scope.scopeName,
|
|
20732
|
-
vectorCount: chunks.length
|
|
20355
|
+
};
|
|
20733
20356
|
});
|
|
20357
|
+
await this.store.upsertChunks(docs, scope);
|
|
20358
|
+
documentsUpserted = docs.length;
|
|
20359
|
+
this.logger.event("upserted", { count: docs.length });
|
|
20360
|
+
}
|
|
20361
|
+
if (!options.dryRun && deletes.length > 0) {
|
|
20362
|
+
await this.store.deleteByIds(deletes, scope);
|
|
20363
|
+
this.logger.event("deleted", { count: deletes.length });
|
|
20364
|
+
}
|
|
20365
|
+
stageEnd("upsert", upsertStart);
|
|
20366
|
+
if (changedChunks.length > 0) {
|
|
20367
|
+
this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
|
|
20368
|
+
} else {
|
|
20369
|
+
this.logger.info("No chunks to upsert \u2014 all up to date");
|
|
20734
20370
|
}
|
|
20735
|
-
stageEnd("finalize", finalizeStart);
|
|
20736
20371
|
this.logger.info("Done.");
|
|
20737
20372
|
return {
|
|
20738
|
-
pagesProcessed:
|
|
20373
|
+
pagesProcessed: pages.length,
|
|
20739
20374
|
chunksTotal: chunks.length,
|
|
20740
20375
|
chunksChanged: changedChunks.length,
|
|
20741
|
-
|
|
20376
|
+
documentsUpserted,
|
|
20742
20377
|
deletes: deletes.length,
|
|
20743
|
-
estimatedTokens,
|
|
20744
|
-
estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
20745
20378
|
routeExact,
|
|
20746
20379
|
routeBestEffort,
|
|
20747
20380
|
stageTimingsMs
|
|
@@ -20772,30 +20405,11 @@ function shouldRunAutoIndex(options) {
|
|
|
20772
20405
|
}
|
|
20773
20406
|
return false;
|
|
20774
20407
|
}
|
|
20775
|
-
function searchsocketViteConfig() {
|
|
20776
|
-
return {
|
|
20777
|
-
name: "searchsocket:config",
|
|
20778
|
-
config() {
|
|
20779
|
-
return {
|
|
20780
|
-
ssr: {
|
|
20781
|
-
external: ["@libsql/client", "libsql"]
|
|
20782
|
-
}
|
|
20783
|
-
};
|
|
20784
|
-
}
|
|
20785
|
-
};
|
|
20786
|
-
}
|
|
20787
20408
|
function searchsocketVitePlugin(options = {}) {
|
|
20788
20409
|
let executed = false;
|
|
20789
20410
|
let running = false;
|
|
20790
20411
|
return {
|
|
20791
20412
|
name: "searchsocket:auto-index",
|
|
20792
|
-
config() {
|
|
20793
|
-
return {
|
|
20794
|
-
ssr: {
|
|
20795
|
-
external: ["@libsql/client", "libsql"]
|
|
20796
|
-
}
|
|
20797
|
-
};
|
|
20798
|
-
},
|
|
20799
20413
|
async closeBundle() {
|
|
20800
20414
|
if (executed || running) {
|
|
20801
20415
|
return;
|
|
@@ -20817,15 +20431,14 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
20817
20431
|
});
|
|
20818
20432
|
const stats = await pipeline.run({
|
|
20819
20433
|
changedOnly: options.changedOnly ?? true,
|
|
20820
|
-
force: options.force ?? false,
|
|
20434
|
+
force: (options.force ?? false) || /^(1|true|yes)$/i.test(process.env.SEARCHSOCKET_FORCE_REINDEX ?? ""),
|
|
20821
20435
|
dryRun: options.dryRun ?? false,
|
|
20822
20436
|
scopeOverride: options.scope,
|
|
20823
20437
|
verbose: options.verbose
|
|
20824
20438
|
});
|
|
20825
20439
|
logger3.info(
|
|
20826
|
-
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged}
|
|
20440
|
+
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
|
|
20827
20441
|
);
|
|
20828
|
-
logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
|
|
20829
20442
|
executed = true;
|
|
20830
20443
|
} finally {
|
|
20831
20444
|
running = false;
|
|
@@ -20833,6 +20446,186 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
20833
20446
|
}
|
|
20834
20447
|
};
|
|
20835
20448
|
}
|
|
20449
|
+
|
|
20450
|
+
// src/sveltekit/scroll-to-text.ts
|
|
20451
|
+
var HIGHLIGHT_CLASS = "ssk-highlight";
|
|
20452
|
+
var HIGHLIGHT_DURATION = 2e3;
|
|
20453
|
+
var HIGHLIGHT_MARKER_ATTR = "data-ssk-highlight-marker";
|
|
20454
|
+
var HIGHLIGHT_NAME = "ssk-search-match";
|
|
20455
|
+
var styleInjected = false;
|
|
20456
|
+
function ensureHighlightStyle() {
|
|
20457
|
+
if (styleInjected || typeof document === "undefined") return;
|
|
20458
|
+
styleInjected = true;
|
|
20459
|
+
const style = document.createElement("style");
|
|
20460
|
+
style.textContent = `
|
|
20461
|
+
@keyframes ssk-highlight-fade {
|
|
20462
|
+
0% { background-color: rgba(16, 185, 129, 0.18); }
|
|
20463
|
+
100% { background-color: transparent; }
|
|
20464
|
+
}
|
|
20465
|
+
.${HIGHLIGHT_CLASS} {
|
|
20466
|
+
animation: ssk-highlight-fade ${HIGHLIGHT_DURATION}ms ease-out forwards;
|
|
20467
|
+
border-radius: 4px;
|
|
20468
|
+
}
|
|
20469
|
+
::highlight(${HIGHLIGHT_NAME}) {
|
|
20470
|
+
background-color: rgba(16, 185, 129, 0.18);
|
|
20471
|
+
}
|
|
20472
|
+
`;
|
|
20473
|
+
document.head.appendChild(style);
|
|
20474
|
+
}
|
|
20475
|
+
var IGNORED_TAGS = /* @__PURE__ */ new Set(["SCRIPT", "STYLE", "NOSCRIPT", "TEMPLATE"]);
|
|
20476
|
+
function buildTextMap(root2) {
|
|
20477
|
+
const walker = document.createTreeWalker(root2, NodeFilter.SHOW_TEXT, {
|
|
20478
|
+
acceptNode(node) {
|
|
20479
|
+
const parent = node.parentElement;
|
|
20480
|
+
if (!parent || IGNORED_TAGS.has(parent.tagName)) return NodeFilter.FILTER_REJECT;
|
|
20481
|
+
return NodeFilter.FILTER_ACCEPT;
|
|
20482
|
+
}
|
|
20483
|
+
});
|
|
20484
|
+
const chunks = [];
|
|
20485
|
+
let text = "";
|
|
20486
|
+
let current;
|
|
20487
|
+
while (current = walker.nextNode()) {
|
|
20488
|
+
const value = current.nodeValue ?? "";
|
|
20489
|
+
if (!value) continue;
|
|
20490
|
+
chunks.push({ node: current, start: text.length, end: text.length + value.length });
|
|
20491
|
+
text += value;
|
|
20492
|
+
}
|
|
20493
|
+
return { text, chunks };
|
|
20494
|
+
}
|
|
20495
|
+
function normalize(text) {
|
|
20496
|
+
return text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
20497
|
+
}
|
|
20498
|
+
function escapeRegExp(value) {
|
|
20499
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
20500
|
+
}
|
|
20501
|
+
function buildNeedleRegex(needle) {
|
|
20502
|
+
const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
|
|
20503
|
+
if (tokenParts.length > 1) {
|
|
20504
|
+
const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]+");
|
|
20505
|
+
return new RegExp(pattern, "iu");
|
|
20506
|
+
}
|
|
20507
|
+
if (tokenParts.length === 1) {
|
|
20508
|
+
return new RegExp(escapeRegExp(tokenParts[0]), "iu");
|
|
20509
|
+
}
|
|
20510
|
+
if (!needle) return null;
|
|
20511
|
+
return new RegExp(escapeRegExp(needle).replace(/\s+/g, "\\s+"), "i");
|
|
20512
|
+
}
|
|
20513
|
+
function buildLenientRegex(needle) {
|
|
20514
|
+
const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
|
|
20515
|
+
if (tokenParts.length <= 1) return null;
|
|
20516
|
+
const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]*");
|
|
20517
|
+
return new RegExp(pattern, "iu");
|
|
20518
|
+
}
|
|
20519
|
+
function findMatch(fullText, needle) {
|
|
20520
|
+
const regex = buildNeedleRegex(needle);
|
|
20521
|
+
if (regex) {
|
|
20522
|
+
const m = regex.exec(fullText);
|
|
20523
|
+
if (m && typeof m.index === "number") {
|
|
20524
|
+
return { start: m.index, end: m.index + m[0].length };
|
|
20525
|
+
}
|
|
20526
|
+
}
|
|
20527
|
+
const lenient = buildLenientRegex(needle);
|
|
20528
|
+
if (lenient) {
|
|
20529
|
+
const m = lenient.exec(fullText);
|
|
20530
|
+
if (m && typeof m.index === "number") {
|
|
20531
|
+
return { start: m.index, end: m.index + m[0].length };
|
|
20532
|
+
}
|
|
20533
|
+
}
|
|
20534
|
+
return null;
|
|
20535
|
+
}
|
|
20536
|
+
function resolveRange(map, offsets) {
|
|
20537
|
+
let startChunk;
|
|
20538
|
+
let endChunk;
|
|
20539
|
+
for (const chunk of map.chunks) {
|
|
20540
|
+
if (!startChunk && offsets.start >= chunk.start && offsets.start < chunk.end) {
|
|
20541
|
+
startChunk = chunk;
|
|
20542
|
+
}
|
|
20543
|
+
if (offsets.end > chunk.start && offsets.end <= chunk.end) {
|
|
20544
|
+
endChunk = chunk;
|
|
20545
|
+
}
|
|
20546
|
+
if (startChunk && endChunk) break;
|
|
20547
|
+
}
|
|
20548
|
+
if (!startChunk || !endChunk) return null;
|
|
20549
|
+
const range = document.createRange();
|
|
20550
|
+
range.setStart(startChunk.node, offsets.start - startChunk.start);
|
|
20551
|
+
range.setEnd(endChunk.node, offsets.end - endChunk.start);
|
|
20552
|
+
return range;
|
|
20553
|
+
}
|
|
20554
|
+
function hasCustomHighlightAPI() {
|
|
20555
|
+
return typeof CSS !== "undefined" && typeof CSS.highlights !== "undefined";
|
|
20556
|
+
}
|
|
20557
|
+
var highlightTimer = null;
|
|
20558
|
+
function highlightWithCSS(range) {
|
|
20559
|
+
ensureHighlightStyle();
|
|
20560
|
+
const hl = new globalThis.Highlight(range);
|
|
20561
|
+
CSS.highlights.set(HIGHLIGHT_NAME, hl);
|
|
20562
|
+
if (highlightTimer) clearTimeout(highlightTimer);
|
|
20563
|
+
highlightTimer = setTimeout(() => {
|
|
20564
|
+
CSS.highlights.delete(HIGHLIGHT_NAME);
|
|
20565
|
+
highlightTimer = null;
|
|
20566
|
+
}, HIGHLIGHT_DURATION);
|
|
20567
|
+
}
|
|
20568
|
+
function unwrapMarker(marker) {
|
|
20569
|
+
if (!marker.isConnected) return;
|
|
20570
|
+
const parent = marker.parentNode;
|
|
20571
|
+
if (!parent) return;
|
|
20572
|
+
while (marker.firstChild) parent.insertBefore(marker.firstChild, marker);
|
|
20573
|
+
parent.removeChild(marker);
|
|
20574
|
+
if (parent instanceof Element) parent.normalize();
|
|
20575
|
+
}
|
|
20576
|
+
function highlightWithDOM(range) {
|
|
20577
|
+
ensureHighlightStyle();
|
|
20578
|
+
try {
|
|
20579
|
+
const marker = document.createElement("span");
|
|
20580
|
+
marker.classList.add(HIGHLIGHT_CLASS);
|
|
20581
|
+
marker.setAttribute(HIGHLIGHT_MARKER_ATTR, "true");
|
|
20582
|
+
range.surroundContents(marker);
|
|
20583
|
+
setTimeout(() => unwrapMarker(marker), HIGHLIGHT_DURATION);
|
|
20584
|
+
return marker;
|
|
20585
|
+
} catch {
|
|
20586
|
+
const ancestor = range.commonAncestorContainer;
|
|
20587
|
+
const el = ancestor instanceof Element ? ancestor : ancestor.parentElement;
|
|
20588
|
+
if (el) {
|
|
20589
|
+
el.classList.add(HIGHLIGHT_CLASS);
|
|
20590
|
+
setTimeout(() => el.classList.remove(HIGHLIGHT_CLASS), HIGHLIGHT_DURATION);
|
|
20591
|
+
return el;
|
|
20592
|
+
}
|
|
20593
|
+
return document.body;
|
|
20594
|
+
}
|
|
20595
|
+
}
|
|
20596
|
+
function scrollToRange(range) {
|
|
20597
|
+
const rect = range.getBoundingClientRect();
|
|
20598
|
+
window.scrollTo({
|
|
20599
|
+
top: window.scrollY + rect.top - window.innerHeight / 3,
|
|
20600
|
+
behavior: "smooth"
|
|
20601
|
+
});
|
|
20602
|
+
}
|
|
20603
|
+
function scrollIntoViewIfPossible(el) {
|
|
20604
|
+
if (typeof el.scrollIntoView === "function") {
|
|
20605
|
+
el.scrollIntoView({ behavior: "smooth", block: "start" });
|
|
20606
|
+
}
|
|
20607
|
+
}
|
|
20608
|
+
function searchsocketScrollToText(navigation) {
|
|
20609
|
+
if (typeof document === "undefined") return;
|
|
20610
|
+
const params = navigation.to?.url.searchParams;
|
|
20611
|
+
const raw = params?.get("_sskt") ?? params?.get("_ssk");
|
|
20612
|
+
if (!raw) return;
|
|
20613
|
+
const needle = normalize(raw);
|
|
20614
|
+
if (!needle) return;
|
|
20615
|
+
const map = buildTextMap(document.body);
|
|
20616
|
+
const offsets = findMatch(map.text, needle);
|
|
20617
|
+
if (!offsets) return;
|
|
20618
|
+
const range = resolveRange(map, offsets);
|
|
20619
|
+
if (!range) return;
|
|
20620
|
+
if (hasCustomHighlightAPI()) {
|
|
20621
|
+
highlightWithCSS(range);
|
|
20622
|
+
scrollToRange(range);
|
|
20623
|
+
} else {
|
|
20624
|
+
const marker = highlightWithDOM(range);
|
|
20625
|
+
const target = typeof marker.scrollIntoView === "function" ? marker : marker.parentElement;
|
|
20626
|
+
if (target) scrollIntoViewIfPossible(target);
|
|
20627
|
+
}
|
|
20628
|
+
}
|
|
20836
20629
|
/*! Bundled license information:
|
|
20837
20630
|
|
|
20838
20631
|
@mixmark-io/domino/lib/style_parser.js:
|
|
@@ -20846,7 +20639,7 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
20846
20639
|
*/
|
|
20847
20640
|
|
|
20848
20641
|
exports.searchsocketHandle = searchsocketHandle;
|
|
20849
|
-
exports.
|
|
20642
|
+
exports.searchsocketScrollToText = searchsocketScrollToText;
|
|
20850
20643
|
exports.searchsocketVitePlugin = searchsocketVitePlugin;
|
|
20851
20644
|
//# sourceMappingURL=sveltekit.cjs.map
|
|
20852
20645
|
//# sourceMappingURL=sveltekit.cjs.map
|