searchsocket 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -31
- package/dist/cli.js +634 -1326
- package/dist/client.cjs +41 -117
- package/dist/client.d.cts +3 -17
- package/dist/client.d.ts +3 -17
- package/dist/client.js +41 -117
- package/dist/index.cjs +608 -1398
- package/dist/index.d.cts +73 -35
- package/dist/index.d.ts +73 -35
- package/dist/index.js +605 -1392
- package/dist/plugin-B_npJSux.d.cts +36 -0
- package/dist/plugin-M-aW0ev6.d.ts +36 -0
- package/dist/scroll.cjs +185 -0
- package/dist/scroll.d.cts +42 -0
- package/dist/scroll.d.ts +42 -0
- package/dist/scroll.js +183 -0
- package/dist/sveltekit.cjs +781 -1278
- package/dist/sveltekit.d.cts +3 -43
- package/dist/sveltekit.d.ts +3 -43
- package/dist/sveltekit.js +779 -1276
- package/dist/{types-z2dw3H6E.d.cts → types-Dk43uz25.d.cts} +46 -141
- package/dist/{types-z2dw3H6E.d.ts → types-Dk43uz25.d.ts} +46 -141
- package/package.json +10 -3
package/dist/index.cjs
CHANGED
|
@@ -5,12 +5,12 @@ var path = require('path');
|
|
|
5
5
|
var jiti = require('jiti');
|
|
6
6
|
var zod = require('zod');
|
|
7
7
|
var child_process = require('child_process');
|
|
8
|
-
var pLimit2 = require('p-limit');
|
|
9
8
|
var crypto = require('crypto');
|
|
10
9
|
var cheerio = require('cheerio');
|
|
11
10
|
var matter = require('gray-matter');
|
|
12
|
-
var fs4 = require('fs/promises');
|
|
13
11
|
var fg = require('fast-glob');
|
|
12
|
+
var pLimit = require('p-limit');
|
|
13
|
+
var fs3 = require('fs/promises');
|
|
14
14
|
var net = require('net');
|
|
15
15
|
var zlib = require('zlib');
|
|
16
16
|
var mcp_js = require('@modelcontextprotocol/sdk/server/mcp.js');
|
|
@@ -22,10 +22,10 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
|
22
22
|
|
|
23
23
|
var fs__default = /*#__PURE__*/_interopDefault(fs);
|
|
24
24
|
var path__default = /*#__PURE__*/_interopDefault(path);
|
|
25
|
-
var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
|
|
26
25
|
var matter__default = /*#__PURE__*/_interopDefault(matter);
|
|
27
|
-
var fs4__default = /*#__PURE__*/_interopDefault(fs4);
|
|
28
26
|
var fg__default = /*#__PURE__*/_interopDefault(fg);
|
|
27
|
+
var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
|
|
28
|
+
var fs3__default = /*#__PURE__*/_interopDefault(fs3);
|
|
29
29
|
var net__default = /*#__PURE__*/_interopDefault(net);
|
|
30
30
|
|
|
31
31
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
@@ -2771,12 +2771,12 @@ var require_ChildNode = __commonJS({
|
|
|
2771
2771
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
|
|
2772
2772
|
var Node2 = require_Node();
|
|
2773
2773
|
var LinkedList = require_LinkedList();
|
|
2774
|
-
var createDocumentFragmentFromArguments = function(
|
|
2775
|
-
var docFrag =
|
|
2774
|
+
var createDocumentFragmentFromArguments = function(document2, args) {
|
|
2775
|
+
var docFrag = document2.createDocumentFragment();
|
|
2776
2776
|
for (var i = 0; i < args.length; i++) {
|
|
2777
2777
|
var argItem = args[i];
|
|
2778
2778
|
var isNode = argItem instanceof Node2;
|
|
2779
|
-
docFrag.appendChild(isNode ? argItem :
|
|
2779
|
+
docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
|
|
2780
2780
|
}
|
|
2781
2781
|
return docFrag;
|
|
2782
2782
|
};
|
|
@@ -2934,7 +2934,7 @@ var require_NamedNodeMap = __commonJS({
|
|
|
2934
2934
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
|
|
2935
2935
|
var require_Element = __commonJS({
|
|
2936
2936
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
|
|
2937
|
-
module.exports =
|
|
2937
|
+
module.exports = Element2;
|
|
2938
2938
|
var xml = require_xmlnames();
|
|
2939
2939
|
var utils = require_utils();
|
|
2940
2940
|
var NAMESPACE = utils.NAMESPACE;
|
|
@@ -2951,7 +2951,7 @@ var require_Element = __commonJS({
|
|
|
2951
2951
|
var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
|
|
2952
2952
|
var NamedNodeMap = require_NamedNodeMap();
|
|
2953
2953
|
var uppercaseCache = /* @__PURE__ */ Object.create(null);
|
|
2954
|
-
function
|
|
2954
|
+
function Element2(doc, localName, namespaceURI, prefix) {
|
|
2955
2955
|
ContainerNode.call(this);
|
|
2956
2956
|
this.nodeType = Node2.ELEMENT_NODE;
|
|
2957
2957
|
this.ownerDocument = doc;
|
|
@@ -2971,7 +2971,7 @@ var require_Element = __commonJS({
|
|
|
2971
2971
|
recursiveGetText(node.childNodes[i], a);
|
|
2972
2972
|
}
|
|
2973
2973
|
}
|
|
2974
|
-
|
|
2974
|
+
Element2.prototype = Object.create(ContainerNode.prototype, {
|
|
2975
2975
|
isHTML: { get: function isHTML() {
|
|
2976
2976
|
return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
|
|
2977
2977
|
} },
|
|
@@ -3041,7 +3041,7 @@ var require_Element = __commonJS({
|
|
|
3041
3041
|
return NodeUtils.serializeOne(this, { nodeType: 0 });
|
|
3042
3042
|
},
|
|
3043
3043
|
set: function(v) {
|
|
3044
|
-
var
|
|
3044
|
+
var document2 = this.ownerDocument;
|
|
3045
3045
|
var parent = this.parentNode;
|
|
3046
3046
|
if (parent === null) {
|
|
3047
3047
|
return;
|
|
@@ -3052,8 +3052,8 @@ var require_Element = __commonJS({
|
|
|
3052
3052
|
if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
|
|
3053
3053
|
parent = parent.ownerDocument.createElement("body");
|
|
3054
3054
|
}
|
|
3055
|
-
var parser =
|
|
3056
|
-
|
|
3055
|
+
var parser = document2.implementation.mozHTMLParser(
|
|
3056
|
+
document2._address,
|
|
3057
3057
|
parent
|
|
3058
3058
|
);
|
|
3059
3059
|
parser.parse(v === null ? "" : String(v), true);
|
|
@@ -3112,7 +3112,7 @@ var require_Element = __commonJS({
|
|
|
3112
3112
|
default:
|
|
3113
3113
|
utils.SyntaxError();
|
|
3114
3114
|
}
|
|
3115
|
-
if (!(context instanceof
|
|
3115
|
+
if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
|
|
3116
3116
|
context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
|
|
3117
3117
|
}
|
|
3118
3118
|
var parser = this.ownerDocument.implementation.mozHTMLParser(
|
|
@@ -3720,10 +3720,10 @@ var require_Element = __commonJS({
|
|
|
3720
3720
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
3721
3721
|
} }
|
|
3722
3722
|
});
|
|
3723
|
-
Object.defineProperties(
|
|
3724
|
-
Object.defineProperties(
|
|
3723
|
+
Object.defineProperties(Element2.prototype, ChildNode);
|
|
3724
|
+
Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
|
|
3725
3725
|
attributes.registerChangeHandler(
|
|
3726
|
-
|
|
3726
|
+
Element2,
|
|
3727
3727
|
"id",
|
|
3728
3728
|
function(element, lname, oldval, newval) {
|
|
3729
3729
|
if (element.rooted) {
|
|
@@ -3737,7 +3737,7 @@ var require_Element = __commonJS({
|
|
|
3737
3737
|
}
|
|
3738
3738
|
);
|
|
3739
3739
|
attributes.registerChangeHandler(
|
|
3740
|
-
|
|
3740
|
+
Element2,
|
|
3741
3741
|
"class",
|
|
3742
3742
|
function(element, lname, oldval, newval) {
|
|
3743
3743
|
if (element._classList) {
|
|
@@ -3836,7 +3836,7 @@ var require_Element = __commonJS({
|
|
|
3836
3836
|
}
|
|
3837
3837
|
}
|
|
3838
3838
|
});
|
|
3839
|
-
|
|
3839
|
+
Element2._Attr = Attr;
|
|
3840
3840
|
function AttributesArray(elt) {
|
|
3841
3841
|
NamedNodeMap.call(this, elt);
|
|
3842
3842
|
for (var name in elt._attrsByQName) {
|
|
@@ -4238,7 +4238,7 @@ var require_DocumentFragment = __commonJS({
|
|
|
4238
4238
|
var Node2 = require_Node();
|
|
4239
4239
|
var NodeList = require_NodeList();
|
|
4240
4240
|
var ContainerNode = require_ContainerNode();
|
|
4241
|
-
var
|
|
4241
|
+
var Element2 = require_Element();
|
|
4242
4242
|
var select = require_select();
|
|
4243
4243
|
var utils = require_utils();
|
|
4244
4244
|
function DocumentFragment(doc) {
|
|
@@ -4256,9 +4256,9 @@ var require_DocumentFragment = __commonJS({
|
|
|
4256
4256
|
}
|
|
4257
4257
|
},
|
|
4258
4258
|
// Copy the text content getter/setter from Element
|
|
4259
|
-
textContent: Object.getOwnPropertyDescriptor(
|
|
4259
|
+
textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
|
|
4260
4260
|
// Copy the text content getter/setter from Element
|
|
4261
|
-
innerText: Object.getOwnPropertyDescriptor(
|
|
4261
|
+
innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
|
|
4262
4262
|
querySelector: { value: function(selector) {
|
|
4263
4263
|
var nodes = this.querySelectorAll(selector);
|
|
4264
4264
|
return nodes.length ? nodes[0] : null;
|
|
@@ -4266,8 +4266,8 @@ var require_DocumentFragment = __commonJS({
|
|
|
4266
4266
|
querySelectorAll: { value: function(selector) {
|
|
4267
4267
|
var context = Object.create(this);
|
|
4268
4268
|
context.isHTML = true;
|
|
4269
|
-
context.getElementsByTagName =
|
|
4270
|
-
context.nextElement = Object.getOwnPropertyDescriptor(
|
|
4269
|
+
context.getElementsByTagName = Element2.prototype.getElementsByTagName;
|
|
4270
|
+
context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
|
|
4271
4271
|
var nodes = select(selector, context);
|
|
4272
4272
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
4273
4273
|
} },
|
|
@@ -4349,7 +4349,7 @@ var require_ProcessingInstruction = __commonJS({
|
|
|
4349
4349
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
|
|
4350
4350
|
var require_NodeFilter = __commonJS({
|
|
4351
4351
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
|
|
4352
|
-
var
|
|
4352
|
+
var NodeFilter2 = {
|
|
4353
4353
|
// Constants for acceptNode()
|
|
4354
4354
|
FILTER_ACCEPT: 1,
|
|
4355
4355
|
FILTER_REJECT: 2,
|
|
@@ -4374,7 +4374,7 @@ var require_NodeFilter = __commonJS({
|
|
|
4374
4374
|
SHOW_NOTATION: 2048
|
|
4375
4375
|
// historical
|
|
4376
4376
|
};
|
|
4377
|
-
module.exports =
|
|
4377
|
+
module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
|
|
4378
4378
|
}
|
|
4379
4379
|
});
|
|
4380
4380
|
|
|
@@ -4449,7 +4449,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4449
4449
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
|
|
4450
4450
|
module.exports = TreeWalker;
|
|
4451
4451
|
var Node2 = require_Node();
|
|
4452
|
-
var
|
|
4452
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4453
4453
|
var NodeTraversal = require_NodeTraversal();
|
|
4454
4454
|
var utils = require_utils();
|
|
4455
4455
|
var mapChild = {
|
|
@@ -4469,11 +4469,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4469
4469
|
node = tw._currentNode[mapChild[type]];
|
|
4470
4470
|
while (node !== null) {
|
|
4471
4471
|
result = tw._internalFilter(node);
|
|
4472
|
-
if (result ===
|
|
4472
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4473
4473
|
tw._currentNode = node;
|
|
4474
4474
|
return node;
|
|
4475
4475
|
}
|
|
4476
|
-
if (result ===
|
|
4476
|
+
if (result === NodeFilter2.FILTER_SKIP) {
|
|
4477
4477
|
child = node[mapChild[type]];
|
|
4478
4478
|
if (child !== null) {
|
|
4479
4479
|
node = child;
|
|
@@ -4507,12 +4507,12 @@ var require_TreeWalker = __commonJS({
|
|
|
4507
4507
|
while (sibling !== null) {
|
|
4508
4508
|
node = sibling;
|
|
4509
4509
|
result = tw._internalFilter(node);
|
|
4510
|
-
if (result ===
|
|
4510
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4511
4511
|
tw._currentNode = node;
|
|
4512
4512
|
return node;
|
|
4513
4513
|
}
|
|
4514
4514
|
sibling = node[mapChild[type]];
|
|
4515
|
-
if (result ===
|
|
4515
|
+
if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
|
|
4516
4516
|
sibling = node[mapSibling[type]];
|
|
4517
4517
|
}
|
|
4518
4518
|
}
|
|
@@ -4520,7 +4520,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4520
4520
|
if (node === null || node === tw.root) {
|
|
4521
4521
|
return null;
|
|
4522
4522
|
}
|
|
4523
|
-
if (tw._internalFilter(node) ===
|
|
4523
|
+
if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4524
4524
|
return null;
|
|
4525
4525
|
}
|
|
4526
4526
|
}
|
|
@@ -4568,11 +4568,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4568
4568
|
utils.InvalidStateError();
|
|
4569
4569
|
}
|
|
4570
4570
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4571
|
-
return
|
|
4571
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4572
4572
|
}
|
|
4573
4573
|
filter = this._filter;
|
|
4574
4574
|
if (filter === null) {
|
|
4575
|
-
result =
|
|
4575
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4576
4576
|
} else {
|
|
4577
4577
|
this._active = true;
|
|
4578
4578
|
try {
|
|
@@ -4601,7 +4601,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4601
4601
|
if (node === null) {
|
|
4602
4602
|
return null;
|
|
4603
4603
|
}
|
|
4604
|
-
if (this._internalFilter(node) ===
|
|
4604
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4605
4605
|
this._currentNode = node;
|
|
4606
4606
|
return node;
|
|
4607
4607
|
}
|
|
@@ -4654,17 +4654,17 @@ var require_TreeWalker = __commonJS({
|
|
|
4654
4654
|
for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
|
|
4655
4655
|
node = previousSibling;
|
|
4656
4656
|
result = this._internalFilter(node);
|
|
4657
|
-
if (result ===
|
|
4657
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4658
4658
|
continue;
|
|
4659
4659
|
}
|
|
4660
4660
|
for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
|
|
4661
4661
|
node = lastChild;
|
|
4662
4662
|
result = this._internalFilter(node);
|
|
4663
|
-
if (result ===
|
|
4663
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4664
4664
|
break;
|
|
4665
4665
|
}
|
|
4666
4666
|
}
|
|
4667
|
-
if (result ===
|
|
4667
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4668
4668
|
this._currentNode = node;
|
|
4669
4669
|
return node;
|
|
4670
4670
|
}
|
|
@@ -4673,7 +4673,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4673
4673
|
return null;
|
|
4674
4674
|
}
|
|
4675
4675
|
node = node.parentNode;
|
|
4676
|
-
if (this._internalFilter(node) ===
|
|
4676
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4677
4677
|
this._currentNode = node;
|
|
4678
4678
|
return node;
|
|
4679
4679
|
}
|
|
@@ -4690,26 +4690,26 @@ var require_TreeWalker = __commonJS({
|
|
|
4690
4690
|
nextNode: { value: function nextNode() {
|
|
4691
4691
|
var node, result, firstChild, nextSibling;
|
|
4692
4692
|
node = this._currentNode;
|
|
4693
|
-
result =
|
|
4693
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4694
4694
|
CHILDREN:
|
|
4695
4695
|
while (true) {
|
|
4696
4696
|
for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
|
|
4697
4697
|
node = firstChild;
|
|
4698
4698
|
result = this._internalFilter(node);
|
|
4699
|
-
if (result ===
|
|
4699
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4700
4700
|
this._currentNode = node;
|
|
4701
4701
|
return node;
|
|
4702
|
-
} else if (result ===
|
|
4702
|
+
} else if (result === NodeFilter2.FILTER_REJECT) {
|
|
4703
4703
|
break;
|
|
4704
4704
|
}
|
|
4705
4705
|
}
|
|
4706
4706
|
for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
|
|
4707
4707
|
node = nextSibling;
|
|
4708
4708
|
result = this._internalFilter(node);
|
|
4709
|
-
if (result ===
|
|
4709
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4710
4710
|
this._currentNode = node;
|
|
4711
4711
|
return node;
|
|
4712
|
-
} else if (result ===
|
|
4712
|
+
} else if (result === NodeFilter2.FILTER_SKIP) {
|
|
4713
4713
|
continue CHILDREN;
|
|
4714
4714
|
}
|
|
4715
4715
|
}
|
|
@@ -4728,7 +4728,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4728
4728
|
var require_NodeIterator = __commonJS({
|
|
4729
4729
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
|
|
4730
4730
|
module.exports = NodeIterator;
|
|
4731
|
-
var
|
|
4731
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4732
4732
|
var NodeTraversal = require_NodeTraversal();
|
|
4733
4733
|
var utils = require_utils();
|
|
4734
4734
|
function move(node, stayWithin, directionIsNext) {
|
|
@@ -4763,7 +4763,7 @@ var require_NodeIterator = __commonJS({
|
|
|
4763
4763
|
}
|
|
4764
4764
|
}
|
|
4765
4765
|
var result = ni._internalFilter(node);
|
|
4766
|
-
if (result ===
|
|
4766
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4767
4767
|
break;
|
|
4768
4768
|
}
|
|
4769
4769
|
}
|
|
@@ -4811,11 +4811,11 @@ var require_NodeIterator = __commonJS({
|
|
|
4811
4811
|
utils.InvalidStateError();
|
|
4812
4812
|
}
|
|
4813
4813
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4814
|
-
return
|
|
4814
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4815
4815
|
}
|
|
4816
4816
|
filter = this._filter;
|
|
4817
4817
|
if (filter === null) {
|
|
4818
|
-
result =
|
|
4818
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4819
4819
|
} else {
|
|
4820
4820
|
this._active = true;
|
|
4821
4821
|
try {
|
|
@@ -5025,32 +5025,32 @@ var require_URL = __commonJS({
|
|
|
5025
5025
|
else
|
|
5026
5026
|
return basepath.substring(0, lastslash + 1) + refpath;
|
|
5027
5027
|
}
|
|
5028
|
-
function remove_dot_segments(
|
|
5029
|
-
if (!
|
|
5028
|
+
function remove_dot_segments(path13) {
|
|
5029
|
+
if (!path13) return path13;
|
|
5030
5030
|
var output = "";
|
|
5031
|
-
while (
|
|
5032
|
-
if (
|
|
5033
|
-
|
|
5031
|
+
while (path13.length > 0) {
|
|
5032
|
+
if (path13 === "." || path13 === "..") {
|
|
5033
|
+
path13 = "";
|
|
5034
5034
|
break;
|
|
5035
5035
|
}
|
|
5036
|
-
var twochars =
|
|
5037
|
-
var threechars =
|
|
5038
|
-
var fourchars =
|
|
5036
|
+
var twochars = path13.substring(0, 2);
|
|
5037
|
+
var threechars = path13.substring(0, 3);
|
|
5038
|
+
var fourchars = path13.substring(0, 4);
|
|
5039
5039
|
if (threechars === "../") {
|
|
5040
|
-
|
|
5040
|
+
path13 = path13.substring(3);
|
|
5041
5041
|
} else if (twochars === "./") {
|
|
5042
|
-
|
|
5042
|
+
path13 = path13.substring(2);
|
|
5043
5043
|
} else if (threechars === "/./") {
|
|
5044
|
-
|
|
5045
|
-
} else if (twochars === "/." &&
|
|
5046
|
-
|
|
5047
|
-
} else if (fourchars === "/../" || threechars === "/.." &&
|
|
5048
|
-
|
|
5044
|
+
path13 = "/" + path13.substring(3);
|
|
5045
|
+
} else if (twochars === "/." && path13.length === 2) {
|
|
5046
|
+
path13 = "/";
|
|
5047
|
+
} else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
|
|
5048
|
+
path13 = "/" + path13.substring(4);
|
|
5049
5049
|
output = output.replace(/\/?[^\/]*$/, "");
|
|
5050
5050
|
} else {
|
|
5051
|
-
var segment =
|
|
5051
|
+
var segment = path13.match(/(\/?([^\/]*))/)[0];
|
|
5052
5052
|
output += segment;
|
|
5053
|
-
|
|
5053
|
+
path13 = path13.substring(segment.length);
|
|
5054
5054
|
}
|
|
5055
5055
|
}
|
|
5056
5056
|
return output;
|
|
@@ -5615,9 +5615,9 @@ var require_defineElement = __commonJS({
|
|
|
5615
5615
|
});
|
|
5616
5616
|
return c;
|
|
5617
5617
|
};
|
|
5618
|
-
function EventHandlerBuilder(body,
|
|
5618
|
+
function EventHandlerBuilder(body, document2, form, element) {
|
|
5619
5619
|
this.body = body;
|
|
5620
|
-
this.document =
|
|
5620
|
+
this.document = document2;
|
|
5621
5621
|
this.form = form;
|
|
5622
5622
|
this.element = element;
|
|
5623
5623
|
}
|
|
@@ -5651,7 +5651,7 @@ var require_defineElement = __commonJS({
|
|
|
5651
5651
|
var require_htmlelts = __commonJS({
|
|
5652
5652
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
|
|
5653
5653
|
var Node2 = require_Node();
|
|
5654
|
-
var
|
|
5654
|
+
var Element2 = require_Element();
|
|
5655
5655
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
5656
5656
|
var utils = require_utils();
|
|
5657
5657
|
var URLUtils = require_URLUtils();
|
|
@@ -5719,10 +5719,10 @@ var require_htmlelts = __commonJS({
|
|
|
5719
5719
|
this._form = null;
|
|
5720
5720
|
};
|
|
5721
5721
|
var HTMLElement = exports$1.HTMLElement = define({
|
|
5722
|
-
superclass:
|
|
5722
|
+
superclass: Element2,
|
|
5723
5723
|
name: "HTMLElement",
|
|
5724
5724
|
ctor: function HTMLElement2(doc, localName, prefix) {
|
|
5725
|
-
|
|
5725
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
|
|
5726
5726
|
},
|
|
5727
5727
|
props: {
|
|
5728
5728
|
dangerouslySetInnerHTML: {
|
|
@@ -7204,7 +7204,7 @@ var require_htmlelts = __commonJS({
|
|
|
7204
7204
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
|
|
7205
7205
|
var require_svg = __commonJS({
|
|
7206
7206
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
|
|
7207
|
-
var
|
|
7207
|
+
var Element2 = require_Element();
|
|
7208
7208
|
var defineElement = require_defineElement();
|
|
7209
7209
|
var utils = require_utils();
|
|
7210
7210
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
@@ -7218,10 +7218,10 @@ var require_svg = __commonJS({
|
|
|
7218
7218
|
return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
|
|
7219
7219
|
}
|
|
7220
7220
|
var SVGElement = define({
|
|
7221
|
-
superclass:
|
|
7221
|
+
superclass: Element2,
|
|
7222
7222
|
name: "SVGElement",
|
|
7223
7223
|
ctor: function SVGElement2(doc, localName, prefix) {
|
|
7224
|
-
|
|
7224
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
|
|
7225
7225
|
},
|
|
7226
7226
|
props: {
|
|
7227
7227
|
style: { get: function() {
|
|
@@ -7356,7 +7356,7 @@ var require_Document = __commonJS({
|
|
|
7356
7356
|
var Node2 = require_Node();
|
|
7357
7357
|
var NodeList = require_NodeList();
|
|
7358
7358
|
var ContainerNode = require_ContainerNode();
|
|
7359
|
-
var
|
|
7359
|
+
var Element2 = require_Element();
|
|
7360
7360
|
var Text = require_Text();
|
|
7361
7361
|
var Comment = require_Comment();
|
|
7362
7362
|
var Event = require_Event();
|
|
@@ -7365,7 +7365,7 @@ var require_Document = __commonJS({
|
|
|
7365
7365
|
var DOMImplementation = require_DOMImplementation();
|
|
7366
7366
|
var TreeWalker = require_TreeWalker();
|
|
7367
7367
|
var NodeIterator = require_NodeIterator();
|
|
7368
|
-
var
|
|
7368
|
+
var NodeFilter2 = require_NodeFilter();
|
|
7369
7369
|
var URL2 = require_URL();
|
|
7370
7370
|
var select = require_select();
|
|
7371
7371
|
var events = require_events();
|
|
@@ -7504,13 +7504,13 @@ var require_Document = __commonJS({
|
|
|
7504
7504
|
if (this.isHTML) {
|
|
7505
7505
|
localName = utils.toASCIILowerCase(localName);
|
|
7506
7506
|
}
|
|
7507
|
-
return new
|
|
7507
|
+
return new Element2._Attr(null, localName, null, null, "");
|
|
7508
7508
|
} },
|
|
7509
7509
|
createAttributeNS: { value: function(namespace, qualifiedName) {
|
|
7510
7510
|
namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
|
|
7511
7511
|
qualifiedName = String(qualifiedName);
|
|
7512
7512
|
var ve = validateAndExtract(namespace, qualifiedName);
|
|
7513
|
-
return new
|
|
7513
|
+
return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
|
|
7514
7514
|
} },
|
|
7515
7515
|
createElement: { value: function(localName) {
|
|
7516
7516
|
localName = String(localName);
|
|
@@ -7522,7 +7522,7 @@ var require_Document = __commonJS({
|
|
|
7522
7522
|
} else if (this.contentType === "application/xhtml+xml") {
|
|
7523
7523
|
return html.createElement(this, localName, null);
|
|
7524
7524
|
} else {
|
|
7525
|
-
return new
|
|
7525
|
+
return new Element2(this, localName, null, null);
|
|
7526
7526
|
}
|
|
7527
7527
|
}, writable: isApiWritable },
|
|
7528
7528
|
createElementNS: { value: function(namespace, qualifiedName) {
|
|
@@ -7539,7 +7539,7 @@ var require_Document = __commonJS({
|
|
|
7539
7539
|
} else if (namespace === NAMESPACE.SVG) {
|
|
7540
7540
|
return svg.createElement(this, localName, prefix);
|
|
7541
7541
|
}
|
|
7542
|
-
return new
|
|
7542
|
+
return new Element2(this, localName, namespace, prefix);
|
|
7543
7543
|
} },
|
|
7544
7544
|
createEvent: { value: function createEvent(interfaceName) {
|
|
7545
7545
|
interfaceName = interfaceName.toLowerCase();
|
|
@@ -7561,7 +7561,7 @@ var require_Document = __commonJS({
|
|
|
7561
7561
|
if (!(root3 instanceof Node2)) {
|
|
7562
7562
|
throw new TypeError("root not a node");
|
|
7563
7563
|
}
|
|
7564
|
-
whatToShow = whatToShow === void 0 ?
|
|
7564
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7565
7565
|
filter = filter === void 0 ? null : filter;
|
|
7566
7566
|
return new TreeWalker(root3, whatToShow, filter);
|
|
7567
7567
|
} },
|
|
@@ -7573,7 +7573,7 @@ var require_Document = __commonJS({
|
|
|
7573
7573
|
if (!(root3 instanceof Node2)) {
|
|
7574
7574
|
throw new TypeError("root not a node");
|
|
7575
7575
|
}
|
|
7576
|
-
whatToShow = whatToShow === void 0 ?
|
|
7576
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7577
7577
|
filter = filter === void 0 ? null : filter;
|
|
7578
7578
|
return new NodeIterator(root3, whatToShow, filter);
|
|
7579
7579
|
} },
|
|
@@ -7634,10 +7634,10 @@ var require_Document = __commonJS({
|
|
|
7634
7634
|
return this.byId[id] instanceof MultiId;
|
|
7635
7635
|
} },
|
|
7636
7636
|
// Just copy this method from the Element prototype
|
|
7637
|
-
getElementsByName: { value:
|
|
7638
|
-
getElementsByTagName: { value:
|
|
7639
|
-
getElementsByTagNameNS: { value:
|
|
7640
|
-
getElementsByClassName: { value:
|
|
7637
|
+
getElementsByName: { value: Element2.prototype.getElementsByName },
|
|
7638
|
+
getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
|
|
7639
|
+
getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
|
|
7640
|
+
getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
|
|
7641
7641
|
adoptNode: { value: function adoptNode(node) {
|
|
7642
7642
|
if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
|
|
7643
7643
|
if (node.nodeType === Node2.ATTRIBUTE_NODE) {
|
|
@@ -16463,8 +16463,8 @@ var require_Window = __commonJS({
|
|
|
16463
16463
|
var Location = require_Location();
|
|
16464
16464
|
var utils = require_utils();
|
|
16465
16465
|
module.exports = Window;
|
|
16466
|
-
function Window(
|
|
16467
|
-
this.document =
|
|
16466
|
+
function Window(document2) {
|
|
16467
|
+
this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
|
|
16468
16468
|
this.document._scripting_enabled = true;
|
|
16469
16469
|
this.document.defaultView = this;
|
|
16470
16470
|
this.location = new Location(this, this.document._address || "about:blank");
|
|
@@ -16594,11 +16594,11 @@ var require_lib = __commonJS({
|
|
|
16594
16594
|
};
|
|
16595
16595
|
};
|
|
16596
16596
|
exports$1.createWindow = function(html, address) {
|
|
16597
|
-
var
|
|
16597
|
+
var document2 = exports$1.createDocument(html);
|
|
16598
16598
|
if (address !== void 0) {
|
|
16599
|
-
|
|
16599
|
+
document2._address = address;
|
|
16600
16600
|
}
|
|
16601
|
-
return new impl.Window(
|
|
16601
|
+
return new impl.Window(document2);
|
|
16602
16602
|
};
|
|
16603
16603
|
exports$1.impl = impl;
|
|
16604
16604
|
}
|
|
@@ -16663,29 +16663,18 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16663
16663
|
prependTitle: zod.z.boolean().optional(),
|
|
16664
16664
|
pageSummaryChunk: zod.z.boolean().optional()
|
|
16665
16665
|
}).optional(),
|
|
16666
|
-
|
|
16667
|
-
|
|
16668
|
-
|
|
16669
|
-
|
|
16670
|
-
|
|
16671
|
-
batchSize: zod.z.number().int().positive().optional(),
|
|
16672
|
-
concurrency: zod.z.number().int().positive().optional(),
|
|
16673
|
-
pricePer1kTokens: zod.z.number().positive().optional()
|
|
16666
|
+
upstash: zod.z.object({
|
|
16667
|
+
url: zod.z.string().url().optional(),
|
|
16668
|
+
token: zod.z.string().min(1).optional(),
|
|
16669
|
+
urlEnv: zod.z.string().min(1).optional(),
|
|
16670
|
+
tokenEnv: zod.z.string().min(1).optional()
|
|
16674
16671
|
}).optional(),
|
|
16675
|
-
|
|
16676
|
-
|
|
16677
|
-
|
|
16678
|
-
|
|
16679
|
-
|
|
16680
|
-
|
|
16681
|
-
authTokenEnv: zod.z.string().optional(),
|
|
16682
|
-
localPath: zod.z.string().optional()
|
|
16683
|
-
}).optional()
|
|
16684
|
-
}).optional(),
|
|
16685
|
-
rerank: zod.z.object({
|
|
16686
|
-
enabled: zod.z.boolean().optional(),
|
|
16687
|
-
topN: zod.z.number().int().positive().optional(),
|
|
16688
|
-
model: zod.z.string().optional()
|
|
16672
|
+
search: zod.z.object({
|
|
16673
|
+
semanticWeight: zod.z.number().min(0).max(1).optional(),
|
|
16674
|
+
inputEnrichment: zod.z.boolean().optional(),
|
|
16675
|
+
reranking: zod.z.boolean().optional(),
|
|
16676
|
+
dualSearch: zod.z.boolean().optional(),
|
|
16677
|
+
pageSearchWeight: zod.z.number().min(0).max(1).optional()
|
|
16689
16678
|
}).optional(),
|
|
16690
16679
|
ranking: zod.z.object({
|
|
16691
16680
|
enableIncomingLinkBoost: zod.z.boolean().optional(),
|
|
@@ -16695,11 +16684,12 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16695
16684
|
aggregationDecay: zod.z.number().min(0).max(1).optional(),
|
|
16696
16685
|
minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
|
|
16697
16686
|
minScore: zod.z.number().min(0).max(1).optional(),
|
|
16687
|
+
scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
|
|
16698
16688
|
weights: zod.z.object({
|
|
16699
16689
|
incomingLinks: zod.z.number().optional(),
|
|
16700
16690
|
depth: zod.z.number().optional(),
|
|
16701
|
-
|
|
16702
|
-
|
|
16691
|
+
aggregation: zod.z.number().optional(),
|
|
16692
|
+
titleMatch: zod.z.number().optional()
|
|
16703
16693
|
}).optional()
|
|
16704
16694
|
}).optional(),
|
|
16705
16695
|
api: zod.z.object({
|
|
@@ -16721,8 +16711,7 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16721
16711
|
}).optional()
|
|
16722
16712
|
}).optional(),
|
|
16723
16713
|
state: zod.z.object({
|
|
16724
|
-
dir: zod.z.string().optional()
|
|
16725
|
-
writeMirror: zod.z.boolean().optional()
|
|
16714
|
+
dir: zod.z.string().optional()
|
|
16726
16715
|
}).optional()
|
|
16727
16716
|
});
|
|
16728
16717
|
|
|
@@ -16776,24 +16765,16 @@ function createDefaultConfig(projectId) {
|
|
|
16776
16765
|
prependTitle: true,
|
|
16777
16766
|
pageSummaryChunk: true
|
|
16778
16767
|
},
|
|
16779
|
-
|
|
16780
|
-
|
|
16781
|
-
|
|
16782
|
-
apiKeyEnv: "JINA_API_KEY",
|
|
16783
|
-
batchSize: 64,
|
|
16784
|
-
concurrency: 4
|
|
16785
|
-
},
|
|
16786
|
-
vector: {
|
|
16787
|
-
turso: {
|
|
16788
|
-
urlEnv: "TURSO_DATABASE_URL",
|
|
16789
|
-
authTokenEnv: "TURSO_AUTH_TOKEN",
|
|
16790
|
-
localPath: ".searchsocket/vectors.db"
|
|
16791
|
-
}
|
|
16768
|
+
upstash: {
|
|
16769
|
+
urlEnv: "UPSTASH_SEARCH_REST_URL",
|
|
16770
|
+
tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
|
|
16792
16771
|
},
|
|
16793
|
-
|
|
16794
|
-
|
|
16795
|
-
|
|
16796
|
-
|
|
16772
|
+
search: {
|
|
16773
|
+
semanticWeight: 0.75,
|
|
16774
|
+
inputEnrichment: true,
|
|
16775
|
+
reranking: true,
|
|
16776
|
+
dualSearch: true,
|
|
16777
|
+
pageSearchWeight: 0.3
|
|
16797
16778
|
},
|
|
16798
16779
|
ranking: {
|
|
16799
16780
|
enableIncomingLinkBoost: true,
|
|
@@ -16802,12 +16783,13 @@ function createDefaultConfig(projectId) {
|
|
|
16802
16783
|
aggregationCap: 5,
|
|
16803
16784
|
aggregationDecay: 0.5,
|
|
16804
16785
|
minChunkScoreRatio: 0.5,
|
|
16805
|
-
minScore: 0,
|
|
16786
|
+
minScore: 0.3,
|
|
16787
|
+
scoreGapThreshold: 0.4,
|
|
16806
16788
|
weights: {
|
|
16807
16789
|
incomingLinks: 0.05,
|
|
16808
16790
|
depth: 0.03,
|
|
16809
|
-
|
|
16810
|
-
|
|
16791
|
+
aggregation: 0.1,
|
|
16792
|
+
titleMatch: 0.15
|
|
16811
16793
|
}
|
|
16812
16794
|
},
|
|
16813
16795
|
api: {
|
|
@@ -16825,8 +16807,7 @@ function createDefaultConfig(projectId) {
|
|
|
16825
16807
|
}
|
|
16826
16808
|
},
|
|
16827
16809
|
state: {
|
|
16828
|
-
dir: ".searchsocket"
|
|
16829
|
-
writeMirror: false
|
|
16810
|
+
dir: ".searchsocket"
|
|
16830
16811
|
}
|
|
16831
16812
|
};
|
|
16832
16813
|
}
|
|
@@ -16950,21 +16931,13 @@ ${issues}`
|
|
|
16950
16931
|
...defaults.chunking,
|
|
16951
16932
|
...parsed.chunking
|
|
16952
16933
|
},
|
|
16953
|
-
|
|
16954
|
-
...defaults.
|
|
16955
|
-
...parsed.
|
|
16934
|
+
upstash: {
|
|
16935
|
+
...defaults.upstash,
|
|
16936
|
+
...parsed.upstash
|
|
16956
16937
|
},
|
|
16957
|
-
|
|
16958
|
-
...defaults.
|
|
16959
|
-
...parsed.
|
|
16960
|
-
turso: {
|
|
16961
|
-
...defaults.vector.turso,
|
|
16962
|
-
...parsed.vector?.turso
|
|
16963
|
-
}
|
|
16964
|
-
},
|
|
16965
|
-
rerank: {
|
|
16966
|
-
...defaults.rerank,
|
|
16967
|
-
...parsed.rerank
|
|
16938
|
+
search: {
|
|
16939
|
+
...defaults.search,
|
|
16940
|
+
...parsed.search
|
|
16968
16941
|
},
|
|
16969
16942
|
ranking: {
|
|
16970
16943
|
...defaults.ranking,
|
|
@@ -17143,660 +17116,245 @@ function resolveScope(config, override) {
|
|
|
17143
17116
|
scopeId: `${config.project.id}:${scopeName}`
|
|
17144
17117
|
};
|
|
17145
17118
|
}
|
|
17146
|
-
function sleep(ms) {
|
|
17147
|
-
return new Promise((resolve) => {
|
|
17148
|
-
setTimeout(resolve, ms);
|
|
17149
|
-
});
|
|
17150
|
-
}
|
|
17151
|
-
var JinaEmbeddingsProvider = class {
|
|
17152
|
-
apiKey;
|
|
17153
|
-
batchSize;
|
|
17154
|
-
concurrency;
|
|
17155
|
-
defaultTask;
|
|
17156
|
-
constructor(options) {
|
|
17157
|
-
if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
|
|
17158
|
-
throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
|
|
17159
|
-
}
|
|
17160
|
-
if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
|
|
17161
|
-
throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
|
|
17162
|
-
}
|
|
17163
|
-
this.apiKey = options.apiKey;
|
|
17164
|
-
this.batchSize = options.batchSize;
|
|
17165
|
-
this.concurrency = options.concurrency;
|
|
17166
|
-
this.defaultTask = options.task ?? "retrieval.passage";
|
|
17167
|
-
}
|
|
17168
|
-
estimateTokens(text) {
|
|
17169
|
-
const normalized = text.trim();
|
|
17170
|
-
if (!normalized) {
|
|
17171
|
-
return 0;
|
|
17172
|
-
}
|
|
17173
|
-
const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
|
|
17174
|
-
const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
|
|
17175
|
-
const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
|
|
17176
|
-
const charEstimate = Math.ceil(normalized.length / 4);
|
|
17177
|
-
const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
|
|
17178
|
-
return Math.max(1, Math.max(charEstimate, lexicalEstimate));
|
|
17179
|
-
}
|
|
17180
|
-
async embedTexts(texts, modelId, task) {
|
|
17181
|
-
if (texts.length === 0) {
|
|
17182
|
-
return [];
|
|
17183
|
-
}
|
|
17184
|
-
const batches = [];
|
|
17185
|
-
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
17186
|
-
batches.push({
|
|
17187
|
-
index: i,
|
|
17188
|
-
values: texts.slice(i, i + this.batchSize)
|
|
17189
|
-
});
|
|
17190
|
-
}
|
|
17191
|
-
const outputs = new Array(batches.length);
|
|
17192
|
-
const limit = pLimit2__default.default(this.concurrency);
|
|
17193
|
-
await Promise.all(
|
|
17194
|
-
batches.map(
|
|
17195
|
-
(batch, position) => limit(async () => {
|
|
17196
|
-
outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
|
|
17197
|
-
})
|
|
17198
|
-
)
|
|
17199
|
-
);
|
|
17200
|
-
return outputs.flat();
|
|
17201
|
-
}
|
|
17202
|
-
async embedWithRetry(texts, modelId, task) {
|
|
17203
|
-
const maxAttempts = 5;
|
|
17204
|
-
let attempt = 0;
|
|
17205
|
-
while (attempt < maxAttempts) {
|
|
17206
|
-
attempt += 1;
|
|
17207
|
-
let response;
|
|
17208
|
-
try {
|
|
17209
|
-
response = await fetch("https://api.jina.ai/v1/embeddings", {
|
|
17210
|
-
method: "POST",
|
|
17211
|
-
headers: {
|
|
17212
|
-
"content-type": "application/json",
|
|
17213
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17214
|
-
},
|
|
17215
|
-
body: JSON.stringify({
|
|
17216
|
-
model: modelId,
|
|
17217
|
-
input: texts,
|
|
17218
|
-
task
|
|
17219
|
-
})
|
|
17220
|
-
});
|
|
17221
|
-
} catch (error) {
|
|
17222
|
-
if (attempt >= maxAttempts) {
|
|
17223
|
-
throw error;
|
|
17224
|
-
}
|
|
17225
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17226
|
-
continue;
|
|
17227
|
-
}
|
|
17228
|
-
if (!response.ok) {
|
|
17229
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17230
|
-
if (!retryable || attempt >= maxAttempts) {
|
|
17231
|
-
const errorBody = await response.text();
|
|
17232
|
-
throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
|
|
17233
|
-
}
|
|
17234
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17235
|
-
continue;
|
|
17236
|
-
}
|
|
17237
|
-
const payload = await response.json();
|
|
17238
|
-
if (!payload.data || !Array.isArray(payload.data)) {
|
|
17239
|
-
throw new Error("Invalid Jina embeddings response format");
|
|
17240
|
-
}
|
|
17241
|
-
return payload.data.map((entry) => entry.embedding);
|
|
17242
|
-
}
|
|
17243
|
-
throw new Error("Unreachable retry state");
|
|
17244
|
-
}
|
|
17245
|
-
};
|
|
17246
|
-
|
|
17247
|
-
// src/embeddings/factory.ts
|
|
17248
|
-
function createEmbeddingsProvider(config) {
|
|
17249
|
-
if (config.embeddings.provider !== "jina") {
|
|
17250
|
-
throw new SearchSocketError(
|
|
17251
|
-
"CONFIG_MISSING",
|
|
17252
|
-
`Unsupported embeddings provider ${config.embeddings.provider}`
|
|
17253
|
-
);
|
|
17254
|
-
}
|
|
17255
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17256
|
-
if (!apiKey) {
|
|
17257
|
-
throw new SearchSocketError(
|
|
17258
|
-
"CONFIG_MISSING",
|
|
17259
|
-
`Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
|
|
17260
|
-
);
|
|
17261
|
-
}
|
|
17262
|
-
return new JinaEmbeddingsProvider({
|
|
17263
|
-
apiKey,
|
|
17264
|
-
batchSize: config.embeddings.batchSize,
|
|
17265
|
-
concurrency: config.embeddings.concurrency
|
|
17266
|
-
});
|
|
17267
|
-
}
|
|
17268
|
-
|
|
17269
|
-
// src/rerank/jina.ts
|
|
17270
|
-
function sleep2(ms) {
|
|
17271
|
-
return new Promise((resolve) => {
|
|
17272
|
-
setTimeout(resolve, ms);
|
|
17273
|
-
});
|
|
17274
|
-
}
|
|
17275
|
-
var JinaReranker = class {
|
|
17276
|
-
apiKey;
|
|
17277
|
-
model;
|
|
17278
|
-
maxRetries;
|
|
17279
|
-
constructor(options) {
|
|
17280
|
-
this.apiKey = options.apiKey;
|
|
17281
|
-
this.model = options.model;
|
|
17282
|
-
this.maxRetries = options.maxRetries ?? 2;
|
|
17283
|
-
}
|
|
17284
|
-
async rerank(query, candidates, topN) {
|
|
17285
|
-
if (candidates.length === 0) {
|
|
17286
|
-
return [];
|
|
17287
|
-
}
|
|
17288
|
-
const body = {
|
|
17289
|
-
model: this.model,
|
|
17290
|
-
query,
|
|
17291
|
-
documents: candidates.map((candidate) => candidate.text),
|
|
17292
|
-
top_n: topN ?? candidates.length,
|
|
17293
|
-
return_documents: false
|
|
17294
|
-
};
|
|
17295
|
-
let attempt = 0;
|
|
17296
|
-
while (attempt <= this.maxRetries) {
|
|
17297
|
-
attempt += 1;
|
|
17298
|
-
let response;
|
|
17299
|
-
try {
|
|
17300
|
-
response = await fetch("https://api.jina.ai/v1/rerank", {
|
|
17301
|
-
method: "POST",
|
|
17302
|
-
headers: {
|
|
17303
|
-
"content-type": "application/json",
|
|
17304
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17305
|
-
},
|
|
17306
|
-
body: JSON.stringify(body)
|
|
17307
|
-
});
|
|
17308
|
-
} catch (error) {
|
|
17309
|
-
if (attempt <= this.maxRetries) {
|
|
17310
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17311
|
-
continue;
|
|
17312
|
-
}
|
|
17313
|
-
throw error;
|
|
17314
|
-
}
|
|
17315
|
-
if (!response.ok) {
|
|
17316
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17317
|
-
if (retryable && attempt <= this.maxRetries) {
|
|
17318
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17319
|
-
continue;
|
|
17320
|
-
}
|
|
17321
|
-
const errorBody = await response.text();
|
|
17322
|
-
throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
|
|
17323
|
-
}
|
|
17324
|
-
const payload = await response.json();
|
|
17325
|
-
const rawResults = payload.results ?? payload.data ?? [];
|
|
17326
|
-
if (!Array.isArray(rawResults)) {
|
|
17327
|
-
throw new Error("Invalid Jina rerank response format");
|
|
17328
|
-
}
|
|
17329
|
-
return rawResults.flatMap((item) => {
|
|
17330
|
-
const index = item.index;
|
|
17331
|
-
if (typeof index !== "number" || index < 0 || index >= candidates.length) {
|
|
17332
|
-
return [];
|
|
17333
|
-
}
|
|
17334
|
-
const candidate = candidates[index];
|
|
17335
|
-
if (!candidate) {
|
|
17336
|
-
return [];
|
|
17337
|
-
}
|
|
17338
|
-
const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
|
|
17339
|
-
return [
|
|
17340
|
-
{
|
|
17341
|
-
id: candidate.id,
|
|
17342
|
-
score
|
|
17343
|
-
}
|
|
17344
|
-
];
|
|
17345
|
-
}).sort((a, b) => b.score - a.score);
|
|
17346
|
-
}
|
|
17347
|
-
throw new Error("Jina rerank request failed after retries");
|
|
17348
|
-
}
|
|
17349
|
-
};
|
|
17350
|
-
|
|
17351
|
-
// src/rerank/factory.ts
|
|
17352
|
-
function createReranker(config) {
|
|
17353
|
-
if (!config.rerank.enabled) {
|
|
17354
|
-
return null;
|
|
17355
|
-
}
|
|
17356
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17357
|
-
if (!apiKey) {
|
|
17358
|
-
return null;
|
|
17359
|
-
}
|
|
17360
|
-
return new JinaReranker({
|
|
17361
|
-
apiKey,
|
|
17362
|
-
model: config.rerank.model
|
|
17363
|
-
});
|
|
17364
|
-
}
|
|
17365
17119
|
function ensureStateDirs(cwd, stateDir, scope) {
|
|
17366
17120
|
const statePath = path__default.default.resolve(cwd, stateDir);
|
|
17367
|
-
|
|
17368
|
-
|
|
17369
|
-
return { statePath, pagesPath };
|
|
17121
|
+
fs__default.default.mkdirSync(statePath, { recursive: true });
|
|
17122
|
+
return { statePath };
|
|
17370
17123
|
}
|
|
17371
17124
|
|
|
17372
|
-
// src/vector/
|
|
17373
|
-
|
|
17125
|
+
// src/vector/upstash.ts
|
|
17126
|
+
function chunkIndexName(scope) {
|
|
17127
|
+
return `${scope.projectId}--${scope.scopeName}`;
|
|
17128
|
+
}
|
|
17129
|
+
function pageIndexName(scope) {
|
|
17130
|
+
return `${scope.projectId}--${scope.scopeName}--pages`;
|
|
17131
|
+
}
|
|
17132
|
+
var UpstashSearchStore = class {
|
|
17374
17133
|
client;
|
|
17375
|
-
dimension;
|
|
17376
|
-
chunksReady = false;
|
|
17377
|
-
registryReady = false;
|
|
17378
|
-
pagesReady = false;
|
|
17379
17134
|
constructor(opts) {
|
|
17380
17135
|
this.client = opts.client;
|
|
17381
|
-
this.dimension = opts.dimension;
|
|
17382
17136
|
}
|
|
17383
|
-
|
|
17384
|
-
|
|
17385
|
-
await this.client.execute(`
|
|
17386
|
-
CREATE TABLE IF NOT EXISTS registry (
|
|
17387
|
-
scope_key TEXT PRIMARY KEY,
|
|
17388
|
-
project_id TEXT NOT NULL,
|
|
17389
|
-
scope_name TEXT NOT NULL,
|
|
17390
|
-
model_id TEXT NOT NULL,
|
|
17391
|
-
last_indexed_at TEXT NOT NULL,
|
|
17392
|
-
vector_count INTEGER,
|
|
17393
|
-
last_estimate_tokens INTEGER,
|
|
17394
|
-
last_estimate_cost_usd REAL,
|
|
17395
|
-
last_estimate_changed_chunks INTEGER
|
|
17396
|
-
)
|
|
17397
|
-
`);
|
|
17398
|
-
const estimateCols = [
|
|
17399
|
-
{ name: "last_estimate_tokens", def: "INTEGER" },
|
|
17400
|
-
{ name: "last_estimate_cost_usd", def: "REAL" },
|
|
17401
|
-
{ name: "last_estimate_changed_chunks", def: "INTEGER" }
|
|
17402
|
-
];
|
|
17403
|
-
for (const col of estimateCols) {
|
|
17404
|
-
try {
|
|
17405
|
-
await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
|
|
17406
|
-
} catch (error) {
|
|
17407
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17408
|
-
throw error;
|
|
17409
|
-
}
|
|
17410
|
-
}
|
|
17411
|
-
}
|
|
17412
|
-
this.registryReady = true;
|
|
17413
|
-
}
|
|
17414
|
-
async ensureChunks(dim) {
|
|
17415
|
-
if (this.chunksReady) return;
|
|
17416
|
-
const exists = await this.chunksTableExists();
|
|
17417
|
-
if (exists) {
|
|
17418
|
-
const currentDim = await this.getChunksDimension();
|
|
17419
|
-
if (currentDim !== null && currentDim !== dim) {
|
|
17420
|
-
await this.client.batch([
|
|
17421
|
-
"DROP INDEX IF EXISTS idx",
|
|
17422
|
-
"DROP TABLE IF EXISTS chunks"
|
|
17423
|
-
]);
|
|
17424
|
-
}
|
|
17425
|
-
}
|
|
17426
|
-
await this.client.batch([
|
|
17427
|
-
`CREATE TABLE IF NOT EXISTS chunks (
|
|
17428
|
-
id TEXT PRIMARY KEY,
|
|
17429
|
-
project_id TEXT NOT NULL,
|
|
17430
|
-
scope_name TEXT NOT NULL,
|
|
17431
|
-
url TEXT NOT NULL,
|
|
17432
|
-
path TEXT NOT NULL,
|
|
17433
|
-
title TEXT NOT NULL,
|
|
17434
|
-
section_title TEXT NOT NULL DEFAULT '',
|
|
17435
|
-
heading_path TEXT NOT NULL DEFAULT '[]',
|
|
17436
|
-
snippet TEXT NOT NULL DEFAULT '',
|
|
17437
|
-
chunk_text TEXT NOT NULL DEFAULT '',
|
|
17438
|
-
ordinal INTEGER NOT NULL DEFAULT 0,
|
|
17439
|
-
content_hash TEXT NOT NULL DEFAULT '',
|
|
17440
|
-
model_id TEXT NOT NULL DEFAULT '',
|
|
17441
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17442
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17443
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17444
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17445
|
-
description TEXT NOT NULL DEFAULT '',
|
|
17446
|
-
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17447
|
-
embedding F32_BLOB(${dim})
|
|
17448
|
-
)`,
|
|
17449
|
-
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17450
|
-
]);
|
|
17451
|
-
this.chunksReady = true;
|
|
17137
|
+
chunkIndex(scope) {
|
|
17138
|
+
return this.client.index(chunkIndexName(scope));
|
|
17452
17139
|
}
|
|
17453
|
-
|
|
17454
|
-
|
|
17455
|
-
await this.client.execute(`
|
|
17456
|
-
CREATE TABLE IF NOT EXISTS pages (
|
|
17457
|
-
project_id TEXT NOT NULL,
|
|
17458
|
-
scope_name TEXT NOT NULL,
|
|
17459
|
-
url TEXT NOT NULL,
|
|
17460
|
-
title TEXT NOT NULL,
|
|
17461
|
-
markdown TEXT NOT NULL,
|
|
17462
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17463
|
-
route_resolution TEXT NOT NULL DEFAULT 'exact',
|
|
17464
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17465
|
-
outgoing_links INTEGER NOT NULL DEFAULT 0,
|
|
17466
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17467
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17468
|
-
indexed_at TEXT NOT NULL,
|
|
17469
|
-
PRIMARY KEY (project_id, scope_name, url)
|
|
17470
|
-
)
|
|
17471
|
-
`);
|
|
17472
|
-
this.pagesReady = true;
|
|
17140
|
+
pageIndex(scope) {
|
|
17141
|
+
return this.client.index(pageIndexName(scope));
|
|
17473
17142
|
}
|
|
17474
|
-
async
|
|
17475
|
-
|
|
17476
|
-
|
|
17477
|
-
return true;
|
|
17478
|
-
} catch (error) {
|
|
17479
|
-
if (error instanceof Error && error.message.includes("no such table")) {
|
|
17480
|
-
return false;
|
|
17481
|
-
}
|
|
17482
|
-
throw error;
|
|
17483
|
-
}
|
|
17484
|
-
}
|
|
17485
|
-
/**
|
|
17486
|
-
* Read the current F32_BLOB dimension from the chunks table schema.
|
|
17487
|
-
* Returns null if the table doesn't exist or the dimension can't be parsed.
|
|
17488
|
-
*/
|
|
17489
|
-
async getChunksDimension() {
|
|
17490
|
-
try {
|
|
17491
|
-
const rs = await this.client.execute(
|
|
17492
|
-
"SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
|
|
17493
|
-
);
|
|
17494
|
-
if (rs.rows.length === 0) return null;
|
|
17495
|
-
const sql = rs.rows[0].sql;
|
|
17496
|
-
const match = sql.match(/F32_BLOB\((\d+)\)/i);
|
|
17497
|
-
return match ? parseInt(match[1], 10) : null;
|
|
17498
|
-
} catch {
|
|
17499
|
-
return null;
|
|
17500
|
-
}
|
|
17501
|
-
}
|
|
17502
|
-
/**
|
|
17503
|
-
* Drop all SearchSocket tables (chunks, registry, pages) and their indexes.
|
|
17504
|
-
* Used by `clean --remote` for a full reset.
|
|
17505
|
-
*/
|
|
17506
|
-
async dropAllTables() {
|
|
17507
|
-
await this.client.batch([
|
|
17508
|
-
"DROP INDEX IF EXISTS idx",
|
|
17509
|
-
"DROP TABLE IF EXISTS chunks",
|
|
17510
|
-
"DROP TABLE IF EXISTS registry",
|
|
17511
|
-
"DROP TABLE IF EXISTS pages"
|
|
17512
|
-
]);
|
|
17513
|
-
this.chunksReady = false;
|
|
17514
|
-
this.registryReady = false;
|
|
17515
|
-
this.pagesReady = false;
|
|
17516
|
-
}
|
|
17517
|
-
async upsert(records, _scope) {
|
|
17518
|
-
if (records.length === 0) return;
|
|
17519
|
-
const dim = this.dimension ?? records[0].vector.length;
|
|
17520
|
-
await this.ensureChunks(dim);
|
|
17143
|
+
async upsertChunks(chunks, scope) {
|
|
17144
|
+
if (chunks.length === 0) return;
|
|
17145
|
+
const index = this.chunkIndex(scope);
|
|
17521
17146
|
const BATCH_SIZE = 100;
|
|
17522
|
-
for (let i = 0; i <
|
|
17523
|
-
const batch =
|
|
17524
|
-
|
|
17525
|
-
sql: `INSERT OR REPLACE INTO chunks
|
|
17526
|
-
(id, project_id, scope_name, url, path, title, section_title,
|
|
17527
|
-
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17528
|
-
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17529
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17530
|
-
args: [
|
|
17531
|
-
r.id,
|
|
17532
|
-
r.metadata.projectId,
|
|
17533
|
-
r.metadata.scopeName,
|
|
17534
|
-
r.metadata.url,
|
|
17535
|
-
r.metadata.path,
|
|
17536
|
-
r.metadata.title,
|
|
17537
|
-
r.metadata.sectionTitle,
|
|
17538
|
-
JSON.stringify(r.metadata.headingPath),
|
|
17539
|
-
r.metadata.snippet,
|
|
17540
|
-
r.metadata.chunkText,
|
|
17541
|
-
r.metadata.ordinal,
|
|
17542
|
-
r.metadata.contentHash,
|
|
17543
|
-
r.metadata.modelId,
|
|
17544
|
-
r.metadata.depth,
|
|
17545
|
-
r.metadata.incomingLinks,
|
|
17546
|
-
r.metadata.routeFile,
|
|
17547
|
-
JSON.stringify(r.metadata.tags),
|
|
17548
|
-
r.metadata.description ?? "",
|
|
17549
|
-
JSON.stringify(r.metadata.keywords ?? []),
|
|
17550
|
-
JSON.stringify(r.vector)
|
|
17551
|
-
]
|
|
17552
|
-
}));
|
|
17553
|
-
await this.client.batch(stmts);
|
|
17147
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
17148
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
17149
|
+
await index.upsert(batch);
|
|
17554
17150
|
}
|
|
17555
17151
|
}
|
|
17556
|
-
async query
|
|
17557
|
-
const
|
|
17558
|
-
await
|
|
17559
|
-
|
|
17560
|
-
|
|
17561
|
-
|
|
17562
|
-
|
|
17563
|
-
|
|
17564
|
-
|
|
17565
|
-
c.description, c.keywords,
|
|
17566
|
-
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17567
|
-
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17568
|
-
JOIN chunks AS c ON c.rowid = v.id`,
|
|
17569
|
-
args: [queryJson, queryJson, opts.topK]
|
|
17152
|
+
async search(query, opts, scope) {
|
|
17153
|
+
const index = this.chunkIndex(scope);
|
|
17154
|
+
const results = await index.search({
|
|
17155
|
+
query,
|
|
17156
|
+
limit: opts.limit,
|
|
17157
|
+
semanticWeight: opts.semanticWeight,
|
|
17158
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17159
|
+
reranking: opts.reranking,
|
|
17160
|
+
filter: opts.filter
|
|
17570
17161
|
});
|
|
17571
|
-
|
|
17572
|
-
|
|
17573
|
-
|
|
17574
|
-
|
|
17575
|
-
|
|
17576
|
-
|
|
17162
|
+
return results.map((doc) => ({
|
|
17163
|
+
id: doc.id,
|
|
17164
|
+
score: doc.score,
|
|
17165
|
+
metadata: {
|
|
17166
|
+
projectId: doc.metadata?.projectId ?? "",
|
|
17167
|
+
scopeName: doc.metadata?.scopeName ?? "",
|
|
17168
|
+
url: doc.content.url,
|
|
17169
|
+
path: doc.metadata?.path ?? "",
|
|
17170
|
+
title: doc.content.title,
|
|
17171
|
+
sectionTitle: doc.content.sectionTitle,
|
|
17172
|
+
headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
|
|
17173
|
+
snippet: doc.metadata?.snippet ?? "",
|
|
17174
|
+
chunkText: doc.content.text,
|
|
17175
|
+
ordinal: doc.metadata?.ordinal ?? 0,
|
|
17176
|
+
contentHash: doc.metadata?.contentHash ?? "",
|
|
17177
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17178
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17179
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
17180
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17181
|
+
description: doc.metadata?.description || void 0,
|
|
17182
|
+
keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
|
|
17577
17183
|
}
|
|
17578
|
-
|
|
17579
|
-
|
|
17580
|
-
|
|
17581
|
-
|
|
17582
|
-
|
|
17583
|
-
|
|
17584
|
-
|
|
17585
|
-
|
|
17586
|
-
|
|
17587
|
-
|
|
17588
|
-
|
|
17589
|
-
|
|
17590
|
-
|
|
17591
|
-
continue;
|
|
17592
|
-
}
|
|
17593
|
-
}
|
|
17594
|
-
const distance = row.distance;
|
|
17595
|
-
const score = 1 - distance;
|
|
17596
|
-
const description = row.description || void 0;
|
|
17597
|
-
const keywords = (() => {
|
|
17598
|
-
const raw = row.keywords || "[]";
|
|
17599
|
-
const parsed = JSON.parse(raw);
|
|
17600
|
-
return parsed.length > 0 ? parsed : void 0;
|
|
17601
|
-
})();
|
|
17602
|
-
hits.push({
|
|
17603
|
-
id: row.id,
|
|
17604
|
-
score,
|
|
17605
|
-
metadata: {
|
|
17606
|
-
projectId,
|
|
17607
|
-
scopeName,
|
|
17608
|
-
url: row.url,
|
|
17609
|
-
path: rowPath,
|
|
17610
|
-
title: row.title,
|
|
17611
|
-
sectionTitle: row.section_title,
|
|
17612
|
-
headingPath: JSON.parse(row.heading_path || "[]"),
|
|
17613
|
-
snippet: row.snippet,
|
|
17614
|
-
chunkText: row.chunk_text || "",
|
|
17615
|
-
ordinal: row.ordinal || 0,
|
|
17616
|
-
contentHash: row.content_hash,
|
|
17617
|
-
modelId: row.model_id,
|
|
17618
|
-
depth: row.depth,
|
|
17619
|
-
incomingLinks: row.incoming_links,
|
|
17620
|
-
routeFile: row.route_file,
|
|
17621
|
-
tags,
|
|
17622
|
-
description,
|
|
17623
|
-
keywords
|
|
17624
|
-
}
|
|
17184
|
+
}));
|
|
17185
|
+
}
|
|
17186
|
+
async searchPages(query, opts, scope) {
|
|
17187
|
+
const index = this.pageIndex(scope);
|
|
17188
|
+
let results;
|
|
17189
|
+
try {
|
|
17190
|
+
results = await index.search({
|
|
17191
|
+
query,
|
|
17192
|
+
limit: opts.limit,
|
|
17193
|
+
semanticWeight: opts.semanticWeight,
|
|
17194
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17195
|
+
reranking: true,
|
|
17196
|
+
filter: opts.filter
|
|
17625
17197
|
});
|
|
17198
|
+
} catch {
|
|
17199
|
+
return [];
|
|
17626
17200
|
}
|
|
17627
|
-
|
|
17628
|
-
|
|
17201
|
+
return results.map((doc) => ({
|
|
17202
|
+
id: doc.id,
|
|
17203
|
+
score: doc.score,
|
|
17204
|
+
title: doc.content.title,
|
|
17205
|
+
url: doc.content.url,
|
|
17206
|
+
description: doc.content.description ?? "",
|
|
17207
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17208
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17209
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17210
|
+
routeFile: doc.metadata?.routeFile ?? ""
|
|
17211
|
+
}));
|
|
17629
17212
|
}
|
|
17630
17213
|
async deleteByIds(ids, scope) {
|
|
17631
17214
|
if (ids.length === 0) return;
|
|
17215
|
+
const index = this.chunkIndex(scope);
|
|
17632
17216
|
const BATCH_SIZE = 500;
|
|
17633
17217
|
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
17634
17218
|
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
17635
|
-
|
|
17636
|
-
await this.client.execute({
|
|
17637
|
-
sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
|
|
17638
|
-
args: [scope.projectId, scope.scopeName, ...batch]
|
|
17639
|
-
});
|
|
17219
|
+
await index.delete(batch);
|
|
17640
17220
|
}
|
|
17641
17221
|
}
|
|
17642
17222
|
async deleteScope(scope) {
|
|
17643
|
-
await this.ensureRegistry();
|
|
17644
17223
|
try {
|
|
17645
|
-
|
|
17646
|
-
|
|
17647
|
-
|
|
17648
|
-
});
|
|
17649
|
-
} catch (error) {
|
|
17650
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17651
|
-
throw error;
|
|
17652
|
-
}
|
|
17224
|
+
const chunkIdx = this.chunkIndex(scope);
|
|
17225
|
+
await chunkIdx.deleteIndex();
|
|
17226
|
+
} catch {
|
|
17653
17227
|
}
|
|
17654
17228
|
try {
|
|
17655
|
-
|
|
17656
|
-
|
|
17657
|
-
|
|
17658
|
-
});
|
|
17659
|
-
} catch (error) {
|
|
17660
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17661
|
-
throw error;
|
|
17662
|
-
}
|
|
17229
|
+
const pageIdx = this.pageIndex(scope);
|
|
17230
|
+
await pageIdx.deleteIndex();
|
|
17231
|
+
} catch {
|
|
17663
17232
|
}
|
|
17664
|
-
await this.client.execute({
|
|
17665
|
-
sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17666
|
-
args: [scope.projectId, scope.scopeName]
|
|
17667
|
-
});
|
|
17668
|
-
}
|
|
17669
|
-
async listScopes(scopeProjectId) {
|
|
17670
|
-
await this.ensureRegistry();
|
|
17671
|
-
const rs = await this.client.execute({
|
|
17672
|
-
sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
|
|
17673
|
-
last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
|
|
17674
|
-
FROM registry WHERE project_id = ?`,
|
|
17675
|
-
args: [scopeProjectId]
|
|
17676
|
-
});
|
|
17677
|
-
return rs.rows.map((row) => ({
|
|
17678
|
-
projectId: row.project_id,
|
|
17679
|
-
scopeName: row.scope_name,
|
|
17680
|
-
modelId: row.model_id,
|
|
17681
|
-
lastIndexedAt: row.last_indexed_at,
|
|
17682
|
-
vectorCount: row.vector_count,
|
|
17683
|
-
lastEstimateTokens: row.last_estimate_tokens,
|
|
17684
|
-
lastEstimateCostUSD: row.last_estimate_cost_usd,
|
|
17685
|
-
lastEstimateChangedChunks: row.last_estimate_changed_chunks
|
|
17686
|
-
}));
|
|
17687
17233
|
}
|
|
17688
|
-
async
|
|
17689
|
-
await this.
|
|
17690
|
-
const
|
|
17691
|
-
|
|
17692
|
-
|
|
17693
|
-
|
|
17694
|
-
|
|
17695
|
-
|
|
17696
|
-
|
|
17697
|
-
|
|
17698
|
-
|
|
17699
|
-
|
|
17700
|
-
|
|
17701
|
-
|
|
17702
|
-
|
|
17703
|
-
|
|
17704
|
-
|
|
17705
|
-
|
|
17706
|
-
|
|
17707
|
-
|
|
17234
|
+
async listScopes(projectId) {
|
|
17235
|
+
const allIndexes = await this.client.listIndexes();
|
|
17236
|
+
const prefix = `${projectId}--`;
|
|
17237
|
+
const scopeNames = /* @__PURE__ */ new Set();
|
|
17238
|
+
for (const name of allIndexes) {
|
|
17239
|
+
if (name.startsWith(prefix) && !name.endsWith("--pages")) {
|
|
17240
|
+
const scopeName = name.slice(prefix.length);
|
|
17241
|
+
scopeNames.add(scopeName);
|
|
17242
|
+
}
|
|
17243
|
+
}
|
|
17244
|
+
const scopes = [];
|
|
17245
|
+
for (const scopeName of scopeNames) {
|
|
17246
|
+
const scope = {
|
|
17247
|
+
projectId,
|
|
17248
|
+
scopeName,
|
|
17249
|
+
scopeId: `${projectId}:${scopeName}`
|
|
17250
|
+
};
|
|
17251
|
+
try {
|
|
17252
|
+
const info = await this.chunkIndex(scope).info();
|
|
17253
|
+
scopes.push({
|
|
17254
|
+
projectId,
|
|
17255
|
+
scopeName,
|
|
17256
|
+
lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
17257
|
+
documentCount: info.documentCount
|
|
17258
|
+
});
|
|
17259
|
+
} catch {
|
|
17260
|
+
scopes.push({
|
|
17261
|
+
projectId,
|
|
17262
|
+
scopeName,
|
|
17263
|
+
lastIndexedAt: "unknown",
|
|
17264
|
+
documentCount: 0
|
|
17265
|
+
});
|
|
17266
|
+
}
|
|
17267
|
+
}
|
|
17268
|
+
return scopes;
|
|
17708
17269
|
}
|
|
17709
17270
|
async getContentHashes(scope) {
|
|
17710
|
-
const exists = await this.chunksTableExists();
|
|
17711
|
-
if (!exists) return /* @__PURE__ */ new Map();
|
|
17712
|
-
const rs = await this.client.execute({
|
|
17713
|
-
sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
|
|
17714
|
-
args: [scope.projectId, scope.scopeName]
|
|
17715
|
-
});
|
|
17716
17271
|
const map = /* @__PURE__ */ new Map();
|
|
17717
|
-
|
|
17718
|
-
|
|
17272
|
+
const index = this.chunkIndex(scope);
|
|
17273
|
+
let cursor = "0";
|
|
17274
|
+
try {
|
|
17275
|
+
for (; ; ) {
|
|
17276
|
+
const result = await index.range({ cursor, limit: 100 });
|
|
17277
|
+
for (const doc of result.documents) {
|
|
17278
|
+
if (doc.metadata?.contentHash) {
|
|
17279
|
+
map.set(doc.id, doc.metadata.contentHash);
|
|
17280
|
+
}
|
|
17281
|
+
}
|
|
17282
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17283
|
+
cursor = result.nextCursor;
|
|
17284
|
+
}
|
|
17285
|
+
} catch {
|
|
17719
17286
|
}
|
|
17720
17287
|
return map;
|
|
17721
17288
|
}
|
|
17722
17289
|
async upsertPages(pages, scope) {
|
|
17723
17290
|
if (pages.length === 0) return;
|
|
17724
|
-
|
|
17725
|
-
|
|
17726
|
-
if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
|
|
17727
|
-
throw new Error(
|
|
17728
|
-
`Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
|
|
17729
|
-
);
|
|
17730
|
-
}
|
|
17731
|
-
}
|
|
17732
|
-
const BATCH_SIZE = 100;
|
|
17291
|
+
const index = this.pageIndex(scope);
|
|
17292
|
+
const BATCH_SIZE = 50;
|
|
17733
17293
|
for (let i = 0; i < pages.length; i += BATCH_SIZE) {
|
|
17734
17294
|
const batch = pages.slice(i, i + BATCH_SIZE);
|
|
17735
|
-
const
|
|
17736
|
-
|
|
17737
|
-
|
|
17738
|
-
|
|
17739
|
-
|
|
17740
|
-
|
|
17741
|
-
p.
|
|
17742
|
-
p.
|
|
17743
|
-
p.
|
|
17744
|
-
p.
|
|
17745
|
-
|
|
17746
|
-
|
|
17747
|
-
p.
|
|
17748
|
-
p.
|
|
17749
|
-
p.
|
|
17750
|
-
p.
|
|
17751
|
-
|
|
17752
|
-
p.
|
|
17753
|
-
|
|
17295
|
+
const docs = batch.map((p) => ({
|
|
17296
|
+
id: p.url,
|
|
17297
|
+
content: {
|
|
17298
|
+
title: p.title,
|
|
17299
|
+
url: p.url,
|
|
17300
|
+
type: "page",
|
|
17301
|
+
description: p.description ?? "",
|
|
17302
|
+
keywords: (p.keywords ?? []).join(","),
|
|
17303
|
+
summary: p.summary ?? "",
|
|
17304
|
+
tags: p.tags.join(",")
|
|
17305
|
+
},
|
|
17306
|
+
metadata: {
|
|
17307
|
+
markdown: p.markdown,
|
|
17308
|
+
projectId: p.projectId,
|
|
17309
|
+
scopeName: p.scopeName,
|
|
17310
|
+
routeFile: p.routeFile,
|
|
17311
|
+
routeResolution: p.routeResolution,
|
|
17312
|
+
incomingLinks: p.incomingLinks,
|
|
17313
|
+
outgoingLinks: p.outgoingLinks,
|
|
17314
|
+
depth: p.depth,
|
|
17315
|
+
indexedAt: p.indexedAt
|
|
17316
|
+
}
|
|
17754
17317
|
}));
|
|
17755
|
-
await
|
|
17318
|
+
await index.upsert(docs);
|
|
17756
17319
|
}
|
|
17757
17320
|
}
|
|
17758
17321
|
async getPage(url, scope) {
|
|
17759
|
-
|
|
17760
|
-
|
|
17761
|
-
|
|
17762
|
-
|
|
17763
|
-
|
|
17764
|
-
|
|
17765
|
-
|
|
17766
|
-
|
|
17767
|
-
|
|
17768
|
-
|
|
17769
|
-
|
|
17770
|
-
|
|
17771
|
-
|
|
17772
|
-
|
|
17773
|
-
|
|
17774
|
-
|
|
17775
|
-
|
|
17776
|
-
|
|
17777
|
-
|
|
17778
|
-
|
|
17779
|
-
|
|
17322
|
+
const index = this.pageIndex(scope);
|
|
17323
|
+
try {
|
|
17324
|
+
const results = await index.fetch([url]);
|
|
17325
|
+
const doc = results[0];
|
|
17326
|
+
if (!doc) return null;
|
|
17327
|
+
return {
|
|
17328
|
+
url: doc.content.url,
|
|
17329
|
+
title: doc.content.title,
|
|
17330
|
+
markdown: doc.metadata.markdown,
|
|
17331
|
+
projectId: doc.metadata.projectId,
|
|
17332
|
+
scopeName: doc.metadata.scopeName,
|
|
17333
|
+
routeFile: doc.metadata.routeFile,
|
|
17334
|
+
routeResolution: doc.metadata.routeResolution,
|
|
17335
|
+
incomingLinks: doc.metadata.incomingLinks,
|
|
17336
|
+
outgoingLinks: doc.metadata.outgoingLinks,
|
|
17337
|
+
depth: doc.metadata.depth,
|
|
17338
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17339
|
+
indexedAt: doc.metadata.indexedAt,
|
|
17340
|
+
summary: doc.content.summary || void 0,
|
|
17341
|
+
description: doc.content.description || void 0,
|
|
17342
|
+
keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
|
|
17343
|
+
};
|
|
17344
|
+
} catch {
|
|
17345
|
+
return null;
|
|
17346
|
+
}
|
|
17780
17347
|
}
|
|
17781
17348
|
async deletePages(scope) {
|
|
17782
|
-
|
|
17783
|
-
|
|
17784
|
-
|
|
17785
|
-
|
|
17786
|
-
}
|
|
17787
|
-
}
|
|
17788
|
-
async getScopeModelId(scope) {
|
|
17789
|
-
await this.ensureRegistry();
|
|
17790
|
-
const rs = await this.client.execute({
|
|
17791
|
-
sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17792
|
-
args: [scope.projectId, scope.scopeName]
|
|
17793
|
-
});
|
|
17794
|
-
if (rs.rows.length === 0) return null;
|
|
17795
|
-
return rs.rows[0].model_id;
|
|
17349
|
+
try {
|
|
17350
|
+
const index = this.pageIndex(scope);
|
|
17351
|
+
await index.reset();
|
|
17352
|
+
} catch {
|
|
17353
|
+
}
|
|
17796
17354
|
}
|
|
17797
17355
|
async health() {
|
|
17798
17356
|
try {
|
|
17799
|
-
await this.client.
|
|
17357
|
+
await this.client.info();
|
|
17800
17358
|
return { ok: true };
|
|
17801
17359
|
} catch (error) {
|
|
17802
17360
|
return {
|
|
@@ -17805,40 +17363,34 @@ var TursoVectorStore = class {
|
|
|
17805
17363
|
};
|
|
17806
17364
|
}
|
|
17807
17365
|
}
|
|
17366
|
+
async dropAllIndexes(projectId) {
|
|
17367
|
+
const allIndexes = await this.client.listIndexes();
|
|
17368
|
+
const prefix = `${projectId}--`;
|
|
17369
|
+
for (const name of allIndexes) {
|
|
17370
|
+
if (name.startsWith(prefix)) {
|
|
17371
|
+
try {
|
|
17372
|
+
const index = this.client.index(name);
|
|
17373
|
+
await index.deleteIndex();
|
|
17374
|
+
} catch {
|
|
17375
|
+
}
|
|
17376
|
+
}
|
|
17377
|
+
}
|
|
17378
|
+
}
|
|
17808
17379
|
};
|
|
17809
17380
|
|
|
17810
17381
|
// src/vector/factory.ts
|
|
17811
|
-
async function
|
|
17812
|
-
const
|
|
17813
|
-
const
|
|
17814
|
-
if (
|
|
17815
|
-
const { createClient: createClient2 } = await import('@libsql/client/http');
|
|
17816
|
-
const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
|
|
17817
|
-
const client2 = createClient2({
|
|
17818
|
-
url: remoteUrl,
|
|
17819
|
-
authToken
|
|
17820
|
-
});
|
|
17821
|
-
return new TursoVectorStore({
|
|
17822
|
-
client: client2,
|
|
17823
|
-
dimension: config.vector.dimension
|
|
17824
|
-
});
|
|
17825
|
-
}
|
|
17826
|
-
if (isServerless()) {
|
|
17382
|
+
async function createUpstashStore(config) {
|
|
17383
|
+
const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
|
|
17384
|
+
const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
|
|
17385
|
+
if (!url || !token) {
|
|
17827
17386
|
throw new SearchSocketError(
|
|
17828
17387
|
"VECTOR_BACKEND_UNAVAILABLE",
|
|
17829
|
-
`
|
|
17388
|
+
`Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
|
|
17830
17389
|
);
|
|
17831
17390
|
}
|
|
17832
|
-
const {
|
|
17833
|
-
const
|
|
17834
|
-
|
|
17835
|
-
const client = createClient({
|
|
17836
|
-
url: `file:${localPath}`
|
|
17837
|
-
});
|
|
17838
|
-
return new TursoVectorStore({
|
|
17839
|
-
client,
|
|
17840
|
-
dimension: config.vector.dimension
|
|
17841
|
-
});
|
|
17391
|
+
const { Search } = await import('@upstash/search');
|
|
17392
|
+
const client = new Search({ url, token });
|
|
17393
|
+
return new UpstashSearchStore({ client });
|
|
17842
17394
|
}
|
|
17843
17395
|
function sha1(input) {
|
|
17844
17396
|
return crypto.createHash("sha1").update(input).digest("hex");
|
|
@@ -17857,13 +17409,6 @@ function normalizeUrlPath(rawPath) {
|
|
|
17857
17409
|
}
|
|
17858
17410
|
return out;
|
|
17859
17411
|
}
|
|
17860
|
-
function urlPathToMirrorRelative(urlPath) {
|
|
17861
|
-
const normalized = normalizeUrlPath(urlPath);
|
|
17862
|
-
if (normalized === "/") {
|
|
17863
|
-
return "index.md";
|
|
17864
|
-
}
|
|
17865
|
-
return `${normalized.slice(1)}.md`;
|
|
17866
|
-
}
|
|
17867
17412
|
function staticHtmlFileToUrl(filePath, rootDir) {
|
|
17868
17413
|
const relative = path__default.default.relative(rootDir, filePath).replace(/\\/g, "/");
|
|
17869
17414
|
if (relative === "index.html") {
|
|
@@ -18138,7 +17683,7 @@ function buildEmbeddingText(chunk, prependTitle) {
|
|
|
18138
17683
|
|
|
18139
17684
|
${chunk.chunkText}`;
|
|
18140
17685
|
}
|
|
18141
|
-
function
|
|
17686
|
+
function chunkPage(page, config, scope) {
|
|
18142
17687
|
const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
|
|
18143
17688
|
const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
|
|
18144
17689
|
const chunks = [];
|
|
@@ -19169,53 +18714,6 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
19169
18714
|
weight: mdWeight
|
|
19170
18715
|
};
|
|
19171
18716
|
}
|
|
19172
|
-
function yamlString(value) {
|
|
19173
|
-
return JSON.stringify(value);
|
|
19174
|
-
}
|
|
19175
|
-
function yamlArray(values) {
|
|
19176
|
-
return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
|
|
19177
|
-
}
|
|
19178
|
-
function buildMirrorMarkdown(page) {
|
|
19179
|
-
const frontmatterLines = [
|
|
19180
|
-
"---",
|
|
19181
|
-
`url: ${yamlString(page.url)}`,
|
|
19182
|
-
`title: ${yamlString(page.title)}`,
|
|
19183
|
-
`scope: ${yamlString(page.scope)}`,
|
|
19184
|
-
`routeFile: ${yamlString(page.routeFile)}`,
|
|
19185
|
-
`routeResolution: ${yamlString(page.routeResolution)}`,
|
|
19186
|
-
`generatedAt: ${yamlString(page.generatedAt)}`,
|
|
19187
|
-
`incomingLinks: ${page.incomingLinks}`,
|
|
19188
|
-
`outgoingLinks: ${page.outgoingLinks}`,
|
|
19189
|
-
`depth: ${page.depth}`,
|
|
19190
|
-
`tags: ${yamlArray(page.tags)}`,
|
|
19191
|
-
"---",
|
|
19192
|
-
""
|
|
19193
|
-
];
|
|
19194
|
-
return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
|
|
19195
|
-
}
|
|
19196
|
-
function stripGeneratedAt(content) {
|
|
19197
|
-
return content.replace(/^generatedAt: .*$/m, "");
|
|
19198
|
-
}
|
|
19199
|
-
async function writeMirrorPage(statePath, scope, page) {
|
|
19200
|
-
const relative = urlPathToMirrorRelative(page.url);
|
|
19201
|
-
const outputPath = path__default.default.join(statePath, "pages", scope.scopeName, relative);
|
|
19202
|
-
await fs4__default.default.mkdir(path__default.default.dirname(outputPath), { recursive: true });
|
|
19203
|
-
const newContent = buildMirrorMarkdown(page);
|
|
19204
|
-
try {
|
|
19205
|
-
const existing = await fs4__default.default.readFile(outputPath, "utf8");
|
|
19206
|
-
if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
|
|
19207
|
-
return outputPath;
|
|
19208
|
-
}
|
|
19209
|
-
} catch {
|
|
19210
|
-
}
|
|
19211
|
-
await fs4__default.default.writeFile(outputPath, newContent, "utf8");
|
|
19212
|
-
return outputPath;
|
|
19213
|
-
}
|
|
19214
|
-
async function cleanMirrorForScope(statePath, scope) {
|
|
19215
|
-
const target = path__default.default.join(statePath, "pages", scope.scopeName);
|
|
19216
|
-
await fs4__default.default.rm(target, { recursive: true, force: true });
|
|
19217
|
-
await fs4__default.default.mkdir(target, { recursive: true });
|
|
19218
|
-
}
|
|
19219
18717
|
function segmentToRegex(segment) {
|
|
19220
18718
|
if (segment.startsWith("(") && segment.endsWith(")")) {
|
|
19221
18719
|
return { regex: "", score: 0 };
|
|
@@ -19408,7 +18906,7 @@ async function parseManifest(cwd, outputDir) {
|
|
|
19408
18906
|
const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
|
|
19409
18907
|
let content;
|
|
19410
18908
|
try {
|
|
19411
|
-
content = await
|
|
18909
|
+
content = await fs3__default.default.readFile(manifestPath, "utf8");
|
|
19412
18910
|
} catch {
|
|
19413
18911
|
throw new SearchSocketError(
|
|
19414
18912
|
"BUILD_MANIFEST_NOT_FOUND",
|
|
@@ -19581,7 +19079,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
|
|
|
19581
19079
|
const visited = /* @__PURE__ */ new Set();
|
|
19582
19080
|
const pages = [];
|
|
19583
19081
|
const queue = [];
|
|
19584
|
-
const limit =
|
|
19082
|
+
const limit = pLimit__default.default(8);
|
|
19585
19083
|
for (const seed of seedUrls) {
|
|
19586
19084
|
const normalized = normalizeUrlPath(seed);
|
|
19587
19085
|
if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
|
|
@@ -19663,7 +19161,7 @@ async function loadBuildPages(cwd, config, maxPages) {
|
|
|
19663
19161
|
const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
|
|
19664
19162
|
const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
|
|
19665
19163
|
try {
|
|
19666
|
-
const concurrencyLimit =
|
|
19164
|
+
const concurrencyLimit = pLimit__default.default(8);
|
|
19667
19165
|
const results = await Promise.allSettled(
|
|
19668
19166
|
selected.map(
|
|
19669
19167
|
(route) => concurrencyLimit(async () => {
|
|
@@ -19737,7 +19235,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
|
|
|
19737
19235
|
const selected = typeof limit === "number" ? files.slice(0, limit) : files;
|
|
19738
19236
|
const pages = [];
|
|
19739
19237
|
for (const filePath of selected) {
|
|
19740
|
-
const raw = await
|
|
19238
|
+
const raw = await fs3__default.default.readFile(filePath, "utf8");
|
|
19741
19239
|
const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
|
|
19742
19240
|
pages.push({
|
|
19743
19241
|
url: filePathToUrl(filePath, baseDir),
|
|
@@ -19832,7 +19330,7 @@ async function loadCrawledPages(config, maxPages) {
|
|
|
19832
19330
|
const routes = await resolveRoutes(config);
|
|
19833
19331
|
const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
|
|
19834
19332
|
const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
|
|
19835
|
-
const concurrencyLimit =
|
|
19333
|
+
const concurrencyLimit = pLimit__default.default(8);
|
|
19836
19334
|
const results = await Promise.allSettled(
|
|
19837
19335
|
selected.map(
|
|
19838
19336
|
(route) => concurrencyLimit(async () => {
|
|
@@ -19873,7 +19371,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
|
|
|
19873
19371
|
const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
|
|
19874
19372
|
const pages = [];
|
|
19875
19373
|
for (const filePath of selected) {
|
|
19876
|
-
const html = await
|
|
19374
|
+
const html = await fs3__default.default.readFile(filePath, "utf8");
|
|
19877
19375
|
pages.push({
|
|
19878
19376
|
url: staticHtmlFileToUrl(filePath, outputDir),
|
|
19879
19377
|
html,
|
|
@@ -19936,7 +19434,7 @@ function isBlockedByRobots(urlPath, rules3) {
|
|
|
19936
19434
|
}
|
|
19937
19435
|
async function loadRobotsTxtFromDir(dir) {
|
|
19938
19436
|
try {
|
|
19939
|
-
const content = await
|
|
19437
|
+
const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
|
|
19940
19438
|
return parseRobotsTxt(content);
|
|
19941
19439
|
} catch {
|
|
19942
19440
|
return null;
|
|
@@ -19961,7 +19459,12 @@ function nonNegativeOrZero(value) {
|
|
|
19961
19459
|
}
|
|
19962
19460
|
return Math.max(0, value);
|
|
19963
19461
|
}
|
|
19964
|
-
function
|
|
19462
|
+
function normalizeForTitleMatch(text) {
|
|
19463
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
|
|
19464
|
+
}
|
|
19465
|
+
function rankHits(hits, config, query) {
|
|
19466
|
+
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
19467
|
+
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
19965
19468
|
return hits.map((hit) => {
|
|
19966
19469
|
let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
19967
19470
|
if (config.ranking.enableIncomingLinkBoost) {
|
|
@@ -19972,6 +19475,12 @@ function rankHits(hits, config) {
|
|
|
19972
19475
|
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
|
|
19973
19476
|
score += depthBoost * config.ranking.weights.depth;
|
|
19974
19477
|
}
|
|
19478
|
+
if (normalizedQuery && titleMatchWeight > 0) {
|
|
19479
|
+
const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
|
|
19480
|
+
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
19481
|
+
score += titleMatchWeight;
|
|
19482
|
+
}
|
|
19483
|
+
}
|
|
19975
19484
|
return {
|
|
19976
19485
|
hit,
|
|
19977
19486
|
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
|
|
@@ -19981,6 +19490,30 @@ function rankHits(hits, config) {
|
|
|
19981
19490
|
return Number.isNaN(delta) ? 0 : delta;
|
|
19982
19491
|
});
|
|
19983
19492
|
}
|
|
19493
|
+
function trimByScoreGap(results, config) {
|
|
19494
|
+
if (results.length === 0) return results;
|
|
19495
|
+
const threshold = config.ranking.scoreGapThreshold;
|
|
19496
|
+
const minScore = config.ranking.minScore;
|
|
19497
|
+
if (minScore > 0 && results.length > 0) {
|
|
19498
|
+
const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
|
|
19499
|
+
const mid = Math.floor(sortedScores.length / 2);
|
|
19500
|
+
const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
|
|
19501
|
+
if (median < minScore) return [];
|
|
19502
|
+
}
|
|
19503
|
+
if (threshold > 0 && results.length > 1) {
|
|
19504
|
+
for (let i = 1; i < results.length; i++) {
|
|
19505
|
+
const prev = results[i - 1].pageScore;
|
|
19506
|
+
const current = results[i].pageScore;
|
|
19507
|
+
if (prev > 0) {
|
|
19508
|
+
const gap = (prev - current) / prev;
|
|
19509
|
+
if (gap >= threshold) {
|
|
19510
|
+
return results.slice(0, i);
|
|
19511
|
+
}
|
|
19512
|
+
}
|
|
19513
|
+
}
|
|
19514
|
+
}
|
|
19515
|
+
return results;
|
|
19516
|
+
}
|
|
19984
19517
|
function findPageWeight(url, pageWeights) {
|
|
19985
19518
|
let bestPattern = "";
|
|
19986
19519
|
let bestWeight = 1;
|
|
@@ -20035,6 +19568,61 @@ function aggregateByPage(ranked, config) {
|
|
|
20035
19568
|
return Number.isNaN(delta) ? 0 : delta;
|
|
20036
19569
|
});
|
|
20037
19570
|
}
|
|
19571
|
+
function mergePageAndChunkResults(pageHits, rankedChunks, config) {
|
|
19572
|
+
if (pageHits.length === 0) return rankedChunks;
|
|
19573
|
+
const w = config.search.pageSearchWeight;
|
|
19574
|
+
const pageScoreMap = /* @__PURE__ */ new Map();
|
|
19575
|
+
for (const ph of pageHits) {
|
|
19576
|
+
pageScoreMap.set(ph.url, ph);
|
|
19577
|
+
}
|
|
19578
|
+
const pagesWithChunks = /* @__PURE__ */ new Set();
|
|
19579
|
+
const merged = rankedChunks.map((ranked) => {
|
|
19580
|
+
const url = ranked.hit.metadata.url;
|
|
19581
|
+
const pageHit = pageScoreMap.get(url);
|
|
19582
|
+
if (pageHit) {
|
|
19583
|
+
pagesWithChunks.add(url);
|
|
19584
|
+
const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
|
|
19585
|
+
return {
|
|
19586
|
+
hit: ranked.hit,
|
|
19587
|
+
finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
|
|
19588
|
+
};
|
|
19589
|
+
}
|
|
19590
|
+
return ranked;
|
|
19591
|
+
});
|
|
19592
|
+
for (const [url, pageHit] of pageScoreMap) {
|
|
19593
|
+
if (pagesWithChunks.has(url)) continue;
|
|
19594
|
+
const syntheticScore = pageHit.score * w;
|
|
19595
|
+
const syntheticHit = {
|
|
19596
|
+
id: `page:${url}`,
|
|
19597
|
+
score: pageHit.score,
|
|
19598
|
+
metadata: {
|
|
19599
|
+
projectId: "",
|
|
19600
|
+
scopeName: "",
|
|
19601
|
+
url: pageHit.url,
|
|
19602
|
+
path: pageHit.url,
|
|
19603
|
+
title: pageHit.title,
|
|
19604
|
+
sectionTitle: "",
|
|
19605
|
+
headingPath: [],
|
|
19606
|
+
snippet: pageHit.description || pageHit.title,
|
|
19607
|
+
chunkText: pageHit.description || pageHit.title,
|
|
19608
|
+
ordinal: 0,
|
|
19609
|
+
contentHash: "",
|
|
19610
|
+
depth: pageHit.depth,
|
|
19611
|
+
incomingLinks: pageHit.incomingLinks,
|
|
19612
|
+
routeFile: pageHit.routeFile,
|
|
19613
|
+
tags: pageHit.tags
|
|
19614
|
+
}
|
|
19615
|
+
};
|
|
19616
|
+
merged.push({
|
|
19617
|
+
hit: syntheticHit,
|
|
19618
|
+
finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
|
|
19619
|
+
});
|
|
19620
|
+
}
|
|
19621
|
+
return merged.sort((a, b) => {
|
|
19622
|
+
const delta = b.finalScore - a.finalScore;
|
|
19623
|
+
return Number.isNaN(delta) ? 0 : delta;
|
|
19624
|
+
});
|
|
19625
|
+
}
|
|
20038
19626
|
|
|
20039
19627
|
// src/utils/time.ts
|
|
20040
19628
|
function nowIso() {
|
|
@@ -20045,34 +19633,41 @@ function hrTimeMs(start) {
|
|
|
20045
19633
|
}
|
|
20046
19634
|
|
|
20047
19635
|
// src/indexing/pipeline.ts
|
|
20048
|
-
|
|
20049
|
-
|
|
20050
|
-
|
|
20051
|
-
|
|
20052
|
-
|
|
19636
|
+
function buildPageSummary(page, maxChars = 3500) {
|
|
19637
|
+
const parts = [page.title];
|
|
19638
|
+
if (page.description) {
|
|
19639
|
+
parts.push(page.description);
|
|
19640
|
+
}
|
|
19641
|
+
if (page.keywords && page.keywords.length > 0) {
|
|
19642
|
+
parts.push(page.keywords.join(", "));
|
|
19643
|
+
}
|
|
19644
|
+
const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
|
|
19645
|
+
if (plainBody) {
|
|
19646
|
+
parts.push(plainBody);
|
|
19647
|
+
}
|
|
19648
|
+
const joined = parts.join("\n\n");
|
|
19649
|
+
if (joined.length <= maxChars) return joined;
|
|
19650
|
+
return joined.slice(0, maxChars).trim();
|
|
19651
|
+
}
|
|
20053
19652
|
var IndexPipeline = class _IndexPipeline {
|
|
20054
19653
|
cwd;
|
|
20055
19654
|
config;
|
|
20056
|
-
|
|
20057
|
-
vectorStore;
|
|
19655
|
+
store;
|
|
20058
19656
|
logger;
|
|
20059
19657
|
constructor(options) {
|
|
20060
19658
|
this.cwd = options.cwd;
|
|
20061
19659
|
this.config = options.config;
|
|
20062
|
-
this.
|
|
20063
|
-
this.vectorStore = options.vectorStore;
|
|
19660
|
+
this.store = options.store;
|
|
20064
19661
|
this.logger = options.logger;
|
|
20065
19662
|
}
|
|
20066
19663
|
static async create(options = {}) {
|
|
20067
19664
|
const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
|
|
20068
19665
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
20069
|
-
const
|
|
20070
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
19666
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
20071
19667
|
return new _IndexPipeline({
|
|
20072
19668
|
cwd,
|
|
20073
19669
|
config,
|
|
20074
|
-
|
|
20075
|
-
vectorStore,
|
|
19670
|
+
store,
|
|
20076
19671
|
logger: options.logger ?? new Logger()
|
|
20077
19672
|
});
|
|
20078
19673
|
}
|
|
@@ -20092,25 +19687,17 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20092
19687
|
stageTimingsMs[name] = Math.round(hrTimeMs(start));
|
|
20093
19688
|
};
|
|
20094
19689
|
const scope = resolveScope(this.config, options.scopeOverride);
|
|
20095
|
-
|
|
19690
|
+
ensureStateDirs(this.cwd, this.config.state.dir);
|
|
20096
19691
|
const sourceMode = options.sourceOverride ?? this.config.source.mode;
|
|
20097
|
-
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode},
|
|
19692
|
+
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
|
|
20098
19693
|
if (options.force) {
|
|
20099
19694
|
this.logger.info("Force mode enabled \u2014 full rebuild");
|
|
20100
|
-
await cleanMirrorForScope(statePath, scope);
|
|
20101
19695
|
}
|
|
20102
19696
|
if (options.dryRun) {
|
|
20103
19697
|
this.logger.info("Dry run \u2014 no writes will be performed");
|
|
20104
19698
|
}
|
|
20105
19699
|
const manifestStart = stageStart();
|
|
20106
|
-
const existingHashes = await this.
|
|
20107
|
-
const existingModelId = await this.vectorStore.getScopeModelId(scope);
|
|
20108
|
-
if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
|
|
20109
|
-
throw new SearchSocketError(
|
|
20110
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
20111
|
-
`Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
|
|
20112
|
-
);
|
|
20113
|
-
}
|
|
19700
|
+
const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
|
|
20114
19701
|
stageEnd("manifest", manifestStart);
|
|
20115
19702
|
this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
|
|
20116
19703
|
const sourceStart = stageStart();
|
|
@@ -20239,9 +19826,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20239
19826
|
}
|
|
20240
19827
|
stageEnd("links", linkStart);
|
|
20241
19828
|
this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
|
|
20242
|
-
const
|
|
20243
|
-
this.logger.info("
|
|
20244
|
-
const
|
|
19829
|
+
const pagesStart = stageStart();
|
|
19830
|
+
this.logger.info("Building indexed pages...");
|
|
19831
|
+
const pages = [];
|
|
20245
19832
|
let routeExact = 0;
|
|
20246
19833
|
let routeBestEffort = 0;
|
|
20247
19834
|
const precomputedRoutes = /* @__PURE__ */ new Map();
|
|
@@ -20270,7 +19857,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20270
19857
|
} else {
|
|
20271
19858
|
routeExact += 1;
|
|
20272
19859
|
}
|
|
20273
|
-
const
|
|
19860
|
+
const indexedPage = {
|
|
20274
19861
|
url: page.url,
|
|
20275
19862
|
title: page.title,
|
|
20276
19863
|
scope: scope.scopeName,
|
|
@@ -20285,35 +19872,38 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20285
19872
|
description: page.description,
|
|
20286
19873
|
keywords: page.keywords
|
|
20287
19874
|
};
|
|
20288
|
-
|
|
20289
|
-
|
|
20290
|
-
await writeMirrorPage(statePath, scope, mirror);
|
|
20291
|
-
}
|
|
20292
|
-
this.logger.event("markdown_written", { url: page.url });
|
|
19875
|
+
pages.push(indexedPage);
|
|
19876
|
+
this.logger.event("page_indexed", { url: page.url });
|
|
20293
19877
|
}
|
|
20294
19878
|
if (!options.dryRun) {
|
|
20295
|
-
const pageRecords =
|
|
20296
|
-
|
|
20297
|
-
|
|
20298
|
-
|
|
20299
|
-
|
|
20300
|
-
|
|
20301
|
-
|
|
20302
|
-
|
|
20303
|
-
|
|
20304
|
-
|
|
20305
|
-
|
|
20306
|
-
|
|
20307
|
-
|
|
20308
|
-
|
|
20309
|
-
|
|
20310
|
-
|
|
19879
|
+
const pageRecords = pages.map((p) => {
|
|
19880
|
+
const summary = buildPageSummary(p);
|
|
19881
|
+
return {
|
|
19882
|
+
url: p.url,
|
|
19883
|
+
title: p.title,
|
|
19884
|
+
markdown: p.markdown,
|
|
19885
|
+
projectId: scope.projectId,
|
|
19886
|
+
scopeName: scope.scopeName,
|
|
19887
|
+
routeFile: p.routeFile,
|
|
19888
|
+
routeResolution: p.routeResolution,
|
|
19889
|
+
incomingLinks: p.incomingLinks,
|
|
19890
|
+
outgoingLinks: p.outgoingLinks,
|
|
19891
|
+
depth: p.depth,
|
|
19892
|
+
tags: p.tags,
|
|
19893
|
+
indexedAt: p.generatedAt,
|
|
19894
|
+
summary,
|
|
19895
|
+
description: p.description,
|
|
19896
|
+
keywords: p.keywords
|
|
19897
|
+
};
|
|
19898
|
+
});
|
|
19899
|
+
await this.store.deletePages(scope);
|
|
19900
|
+
await this.store.upsertPages(pageRecords, scope);
|
|
20311
19901
|
}
|
|
20312
|
-
stageEnd("
|
|
20313
|
-
this.logger.info(`
|
|
19902
|
+
stageEnd("pages", pagesStart);
|
|
19903
|
+
this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
|
|
20314
19904
|
const chunkStart = stageStart();
|
|
20315
19905
|
this.logger.info("Chunking pages...");
|
|
20316
|
-
let chunks =
|
|
19906
|
+
let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
|
|
20317
19907
|
const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
|
|
20318
19908
|
if (typeof maxChunks === "number") {
|
|
20319
19909
|
chunks = chunks.slice(0, maxChunks);
|
|
@@ -20345,125 +19935,59 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20345
19935
|
});
|
|
20346
19936
|
const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
|
|
20347
19937
|
this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
|
|
20348
|
-
const
|
|
20349
|
-
|
|
20350
|
-
for (const chunk of changedChunks) {
|
|
20351
|
-
chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
|
|
20352
|
-
}
|
|
20353
|
-
const estimatedTokens = changedChunks.reduce(
|
|
20354
|
-
(sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
|
|
20355
|
-
0
|
|
20356
|
-
);
|
|
20357
|
-
const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
|
|
20358
|
-
const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
|
|
20359
|
-
let newEmbeddings = 0;
|
|
20360
|
-
const vectorsByChunk = /* @__PURE__ */ new Map();
|
|
19938
|
+
const upsertStart = stageStart();
|
|
19939
|
+
let documentsUpserted = 0;
|
|
20361
19940
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
20362
|
-
this.logger.info(`
|
|
20363
|
-
const
|
|
20364
|
-
|
|
20365
|
-
|
|
20366
|
-
|
|
20367
|
-
|
|
20368
|
-
|
|
20369
|
-
|
|
20370
|
-
|
|
20371
|
-
|
|
20372
|
-
);
|
|
20373
|
-
|
|
20374
|
-
for (let i = 0; i < changedChunks.length; i += 1) {
|
|
20375
|
-
const chunk = changedChunks[i];
|
|
20376
|
-
const embedding = embeddings[i];
|
|
20377
|
-
if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
|
|
20378
|
-
throw new SearchSocketError(
|
|
20379
|
-
"VECTOR_BACKEND_UNAVAILABLE",
|
|
20380
|
-
`Embedding provider returned an invalid vector for chunk index ${i}.`
|
|
20381
|
-
);
|
|
20382
|
-
}
|
|
20383
|
-
vectorsByChunk.set(chunk.chunkKey, embedding);
|
|
20384
|
-
newEmbeddings += 1;
|
|
20385
|
-
this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
|
|
20386
|
-
}
|
|
20387
|
-
}
|
|
20388
|
-
stageEnd("embedding", embedStart);
|
|
20389
|
-
if (changedChunks.length > 0) {
|
|
20390
|
-
this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
|
|
20391
|
-
} else {
|
|
20392
|
-
this.logger.info("No chunks to embed \u2014 all up to date");
|
|
20393
|
-
}
|
|
20394
|
-
const syncStart = stageStart();
|
|
20395
|
-
if (!options.dryRun) {
|
|
20396
|
-
this.logger.info("Syncing vectors...");
|
|
20397
|
-
const upserts = [];
|
|
20398
|
-
for (const chunk of changedChunks) {
|
|
20399
|
-
const vector = vectorsByChunk.get(chunk.chunkKey);
|
|
20400
|
-
if (!vector) {
|
|
20401
|
-
continue;
|
|
20402
|
-
}
|
|
20403
|
-
upserts.push({
|
|
19941
|
+
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
|
|
19942
|
+
const UPSTASH_CONTENT_LIMIT = 4096;
|
|
19943
|
+
const docs = changedChunks.map((chunk) => {
|
|
19944
|
+
const title = chunk.title;
|
|
19945
|
+
const sectionTitle = chunk.sectionTitle ?? "";
|
|
19946
|
+
const url = chunk.url;
|
|
19947
|
+
const tags = chunk.tags.join(",");
|
|
19948
|
+
const headingPath = chunk.headingPath.join(" > ");
|
|
19949
|
+
const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
|
|
19950
|
+
const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
|
|
19951
|
+
const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
|
|
19952
|
+
return {
|
|
20404
19953
|
id: chunk.chunkKey,
|
|
20405
|
-
|
|
19954
|
+
content: { title, sectionTitle, text, url, tags, headingPath },
|
|
20406
19955
|
metadata: {
|
|
20407
19956
|
projectId: scope.projectId,
|
|
20408
19957
|
scopeName: scope.scopeName,
|
|
20409
|
-
url: chunk.url,
|
|
20410
19958
|
path: chunk.path,
|
|
20411
|
-
title: chunk.title,
|
|
20412
|
-
sectionTitle: chunk.sectionTitle ?? "",
|
|
20413
|
-
headingPath: chunk.headingPath,
|
|
20414
19959
|
snippet: chunk.snippet,
|
|
20415
|
-
chunkText: chunk.chunkText.slice(0, 4e3),
|
|
20416
19960
|
ordinal: chunk.ordinal,
|
|
20417
19961
|
contentHash: chunk.contentHash,
|
|
20418
|
-
modelId: this.config.embeddings.model,
|
|
20419
19962
|
depth: chunk.depth,
|
|
20420
19963
|
incomingLinks: chunk.incomingLinks,
|
|
20421
19964
|
routeFile: chunk.routeFile,
|
|
20422
|
-
|
|
20423
|
-
|
|
20424
|
-
keywords: chunk.keywords
|
|
19965
|
+
description: chunk.description ?? "",
|
|
19966
|
+
keywords: (chunk.keywords ?? []).join(",")
|
|
20425
19967
|
}
|
|
20426
|
-
}
|
|
20427
|
-
}
|
|
20428
|
-
if (upserts.length > 0) {
|
|
20429
|
-
await this.vectorStore.upsert(upserts, scope);
|
|
20430
|
-
this.logger.event("upserted", { count: upserts.length });
|
|
20431
|
-
}
|
|
20432
|
-
if (deletes.length > 0) {
|
|
20433
|
-
await this.vectorStore.deleteByIds(deletes, scope);
|
|
20434
|
-
this.logger.event("deleted", { count: deletes.length });
|
|
20435
|
-
}
|
|
20436
|
-
}
|
|
20437
|
-
stageEnd("sync", syncStart);
|
|
20438
|
-
this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
|
|
20439
|
-
const finalizeStart = stageStart();
|
|
20440
|
-
if (!options.dryRun) {
|
|
20441
|
-
const scopeInfo = {
|
|
20442
|
-
projectId: scope.projectId,
|
|
20443
|
-
scopeName: scope.scopeName,
|
|
20444
|
-
modelId: this.config.embeddings.model,
|
|
20445
|
-
lastIndexedAt: nowIso(),
|
|
20446
|
-
vectorCount: chunks.length,
|
|
20447
|
-
lastEstimateTokens: estimatedTokens,
|
|
20448
|
-
lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
20449
|
-
lastEstimateChangedChunks: changedChunks.length
|
|
20450
|
-
};
|
|
20451
|
-
await this.vectorStore.recordScope(scopeInfo);
|
|
20452
|
-
this.logger.event("registry_updated", {
|
|
20453
|
-
scope: scope.scopeName,
|
|
20454
|
-
vectorCount: chunks.length
|
|
19968
|
+
};
|
|
20455
19969
|
});
|
|
19970
|
+
await this.store.upsertChunks(docs, scope);
|
|
19971
|
+
documentsUpserted = docs.length;
|
|
19972
|
+
this.logger.event("upserted", { count: docs.length });
|
|
19973
|
+
}
|
|
19974
|
+
if (!options.dryRun && deletes.length > 0) {
|
|
19975
|
+
await this.store.deleteByIds(deletes, scope);
|
|
19976
|
+
this.logger.event("deleted", { count: deletes.length });
|
|
19977
|
+
}
|
|
19978
|
+
stageEnd("upsert", upsertStart);
|
|
19979
|
+
if (changedChunks.length > 0) {
|
|
19980
|
+
this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
|
|
19981
|
+
} else {
|
|
19982
|
+
this.logger.info("No chunks to upsert \u2014 all up to date");
|
|
20456
19983
|
}
|
|
20457
|
-
stageEnd("finalize", finalizeStart);
|
|
20458
19984
|
this.logger.info("Done.");
|
|
20459
19985
|
return {
|
|
20460
|
-
pagesProcessed:
|
|
19986
|
+
pagesProcessed: pages.length,
|
|
20461
19987
|
chunksTotal: chunks.length,
|
|
20462
19988
|
chunksChanged: changedChunks.length,
|
|
20463
|
-
|
|
19989
|
+
documentsUpserted,
|
|
20464
19990
|
deletes: deletes.length,
|
|
20465
|
-
estimatedTokens,
|
|
20466
|
-
estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
20467
19991
|
routeExact,
|
|
20468
19992
|
routeBestEffort,
|
|
20469
19993
|
stageTimingsMs
|
|
@@ -20476,35 +20000,25 @@ var requestSchema = zod.z.object({
|
|
|
20476
20000
|
scope: zod.z.string().optional(),
|
|
20477
20001
|
pathPrefix: zod.z.string().optional(),
|
|
20478
20002
|
tags: zod.z.array(zod.z.string()).optional(),
|
|
20479
|
-
|
|
20480
|
-
groupBy: zod.z.enum(["page", "chunk"]).optional(),
|
|
20481
|
-
stream: zod.z.boolean().optional()
|
|
20003
|
+
groupBy: zod.z.enum(["page", "chunk"]).optional()
|
|
20482
20004
|
});
|
|
20483
20005
|
var SearchEngine = class _SearchEngine {
|
|
20484
20006
|
cwd;
|
|
20485
20007
|
config;
|
|
20486
|
-
|
|
20487
|
-
vectorStore;
|
|
20488
|
-
reranker;
|
|
20008
|
+
store;
|
|
20489
20009
|
constructor(options) {
|
|
20490
20010
|
this.cwd = options.cwd;
|
|
20491
20011
|
this.config = options.config;
|
|
20492
|
-
this.
|
|
20493
|
-
this.vectorStore = options.vectorStore;
|
|
20494
|
-
this.reranker = options.reranker;
|
|
20012
|
+
this.store = options.store;
|
|
20495
20013
|
}
|
|
20496
20014
|
static async create(options = {}) {
|
|
20497
20015
|
const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
|
|
20498
20016
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
20499
|
-
const
|
|
20500
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
20501
|
-
const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
|
|
20017
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
20502
20018
|
return new _SearchEngine({
|
|
20503
20019
|
cwd,
|
|
20504
20020
|
config,
|
|
20505
|
-
|
|
20506
|
-
vectorStore,
|
|
20507
|
-
reranker
|
|
20021
|
+
store
|
|
20508
20022
|
});
|
|
20509
20023
|
}
|
|
20510
20024
|
getConfig() {
|
|
@@ -20518,142 +20032,90 @@ var SearchEngine = class _SearchEngine {
|
|
|
20518
20032
|
const input = parsed.data;
|
|
20519
20033
|
const totalStart = process.hrtime.bigint();
|
|
20520
20034
|
const resolvedScope = resolveScope(this.config, input.scope);
|
|
20521
|
-
await this.assertModelCompatibility(resolvedScope);
|
|
20522
20035
|
const topK = input.topK ?? 10;
|
|
20523
|
-
const wantsRerank = Boolean(input.rerank);
|
|
20524
20036
|
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
20525
20037
|
const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
|
|
20526
|
-
const
|
|
20527
|
-
|
|
20528
|
-
|
|
20529
|
-
|
|
20530
|
-
|
|
20531
|
-
|
|
20532
|
-
|
|
20533
|
-
|
|
20534
|
-
|
|
20535
|
-
|
|
20536
|
-
|
|
20537
|
-
|
|
20538
|
-
|
|
20539
|
-
|
|
20540
|
-
|
|
20541
|
-
|
|
20542
|
-
|
|
20543
|
-
|
|
20544
|
-
|
|
20545
|
-
|
|
20546
|
-
|
|
20547
|
-
|
|
20548
|
-
|
|
20549
|
-
|
|
20550
|
-
|
|
20551
|
-
|
|
20552
|
-
|
|
20038
|
+
const filterParts = [];
|
|
20039
|
+
if (input.pathPrefix) {
|
|
20040
|
+
const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
|
|
20041
|
+
filterParts.push(`url GLOB '${prefix}*'`);
|
|
20042
|
+
}
|
|
20043
|
+
if (input.tags && input.tags.length > 0) {
|
|
20044
|
+
for (const tag of input.tags) {
|
|
20045
|
+
filterParts.push(`tags GLOB '*${tag}*'`);
|
|
20046
|
+
}
|
|
20047
|
+
}
|
|
20048
|
+
const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
|
|
20049
|
+
const useDualSearch = this.config.search.dualSearch && groupByPage;
|
|
20050
|
+
const searchStart = process.hrtime.bigint();
|
|
20051
|
+
let ranked;
|
|
20052
|
+
if (useDualSearch) {
|
|
20053
|
+
const chunkLimit = Math.max(topK * 10, 100);
|
|
20054
|
+
const pageLimit = 20;
|
|
20055
|
+
const [pageHits, chunkHits] = await Promise.all([
|
|
20056
|
+
this.store.searchPages(
|
|
20057
|
+
input.q,
|
|
20058
|
+
{
|
|
20059
|
+
limit: pageLimit,
|
|
20060
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
20061
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
20062
|
+
filter
|
|
20063
|
+
},
|
|
20064
|
+
resolvedScope
|
|
20065
|
+
),
|
|
20066
|
+
this.store.search(
|
|
20067
|
+
input.q,
|
|
20068
|
+
{
|
|
20069
|
+
limit: chunkLimit,
|
|
20070
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
20071
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
20072
|
+
reranking: false,
|
|
20073
|
+
filter
|
|
20074
|
+
},
|
|
20075
|
+
resolvedScope
|
|
20076
|
+
)
|
|
20077
|
+
]);
|
|
20078
|
+
const rankedChunks = rankHits(chunkHits, this.config, input.q);
|
|
20079
|
+
ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
|
|
20080
|
+
} else {
|
|
20081
|
+
const hits = await this.store.search(
|
|
20082
|
+
input.q,
|
|
20083
|
+
{
|
|
20084
|
+
limit: candidateK,
|
|
20085
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
20086
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
20087
|
+
reranking: this.config.search.reranking,
|
|
20088
|
+
filter
|
|
20089
|
+
},
|
|
20090
|
+
resolvedScope
|
|
20091
|
+
);
|
|
20092
|
+
ranked = rankHits(hits, this.config, input.q);
|
|
20553
20093
|
}
|
|
20554
|
-
const
|
|
20094
|
+
const searchMs = hrTimeMs(searchStart);
|
|
20095
|
+
const results = this.buildResults(ranked, topK, groupByPage, input.q);
|
|
20555
20096
|
return {
|
|
20556
20097
|
q: input.q,
|
|
20557
20098
|
scope: resolvedScope.scopeName,
|
|
20558
20099
|
results,
|
|
20559
20100
|
meta: {
|
|
20560
20101
|
timingsMs: {
|
|
20561
|
-
|
|
20562
|
-
vector: Math.round(vectorMs),
|
|
20563
|
-
rerank: Math.round(rerankMs),
|
|
20102
|
+
search: Math.round(searchMs),
|
|
20564
20103
|
total: Math.round(hrTimeMs(totalStart))
|
|
20565
|
-
},
|
|
20566
|
-
usedRerank,
|
|
20567
|
-
modelId: this.config.embeddings.model
|
|
20568
|
-
}
|
|
20569
|
-
};
|
|
20570
|
-
}
|
|
20571
|
-
async *searchStreaming(request) {
|
|
20572
|
-
const parsed = requestSchema.safeParse(request);
|
|
20573
|
-
if (!parsed.success) {
|
|
20574
|
-
throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
|
|
20575
|
-
}
|
|
20576
|
-
const input = parsed.data;
|
|
20577
|
-
const wantsRerank = Boolean(input.rerank);
|
|
20578
|
-
if (!wantsRerank) {
|
|
20579
|
-
const response = await this.search(request);
|
|
20580
|
-
yield { phase: "initial", data: response };
|
|
20581
|
-
return;
|
|
20582
|
-
}
|
|
20583
|
-
const totalStart = process.hrtime.bigint();
|
|
20584
|
-
const resolvedScope = resolveScope(this.config, input.scope);
|
|
20585
|
-
await this.assertModelCompatibility(resolvedScope);
|
|
20586
|
-
const topK = input.topK ?? 10;
|
|
20587
|
-
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
20588
|
-
const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
|
|
20589
|
-
const embedStart = process.hrtime.bigint();
|
|
20590
|
-
const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
|
|
20591
|
-
const queryVector = queryEmbeddings[0];
|
|
20592
|
-
if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
|
|
20593
|
-
throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
|
|
20594
|
-
}
|
|
20595
|
-
const embedMs = hrTimeMs(embedStart);
|
|
20596
|
-
const vectorStart = process.hrtime.bigint();
|
|
20597
|
-
const hits = await this.vectorStore.query(
|
|
20598
|
-
queryVector,
|
|
20599
|
-
{
|
|
20600
|
-
topK: candidateK,
|
|
20601
|
-
pathPrefix: input.pathPrefix,
|
|
20602
|
-
tags: input.tags
|
|
20603
|
-
},
|
|
20604
|
-
resolvedScope
|
|
20605
|
-
);
|
|
20606
|
-
const vectorMs = hrTimeMs(vectorStart);
|
|
20607
|
-
const ranked = rankHits(hits, this.config);
|
|
20608
|
-
const initialResults = this.buildResults(ranked, topK, groupByPage);
|
|
20609
|
-
yield {
|
|
20610
|
-
phase: "initial",
|
|
20611
|
-
data: {
|
|
20612
|
-
q: input.q,
|
|
20613
|
-
scope: resolvedScope.scopeName,
|
|
20614
|
-
results: initialResults,
|
|
20615
|
-
meta: {
|
|
20616
|
-
timingsMs: {
|
|
20617
|
-
embed: Math.round(embedMs),
|
|
20618
|
-
vector: Math.round(vectorMs),
|
|
20619
|
-
rerank: 0,
|
|
20620
|
-
total: Math.round(hrTimeMs(totalStart))
|
|
20621
|
-
},
|
|
20622
|
-
usedRerank: false,
|
|
20623
|
-
modelId: this.config.embeddings.model
|
|
20624
|
-
}
|
|
20625
|
-
}
|
|
20626
|
-
};
|
|
20627
|
-
const rerankStart = process.hrtime.bigint();
|
|
20628
|
-
const reranked = await this.rerankHits(input.q, ranked, topK);
|
|
20629
|
-
const rerankMs = hrTimeMs(rerankStart);
|
|
20630
|
-
const rerankedResults = this.buildResults(reranked, topK, groupByPage);
|
|
20631
|
-
yield {
|
|
20632
|
-
phase: "reranked",
|
|
20633
|
-
data: {
|
|
20634
|
-
q: input.q,
|
|
20635
|
-
scope: resolvedScope.scopeName,
|
|
20636
|
-
results: rerankedResults,
|
|
20637
|
-
meta: {
|
|
20638
|
-
timingsMs: {
|
|
20639
|
-
embed: Math.round(embedMs),
|
|
20640
|
-
vector: Math.round(vectorMs),
|
|
20641
|
-
rerank: Math.round(rerankMs),
|
|
20642
|
-
total: Math.round(hrTimeMs(totalStart))
|
|
20643
|
-
},
|
|
20644
|
-
usedRerank: true,
|
|
20645
|
-
modelId: this.config.embeddings.model
|
|
20646
20104
|
}
|
|
20647
20105
|
}
|
|
20648
20106
|
};
|
|
20649
20107
|
}
|
|
20650
|
-
|
|
20651
|
-
const
|
|
20108
|
+
ensureSnippet(hit) {
|
|
20109
|
+
const snippet = hit.hit.metadata.snippet;
|
|
20110
|
+
if (snippet && snippet.length >= 30) return snippet;
|
|
20111
|
+
const chunkText = hit.hit.metadata.chunkText;
|
|
20112
|
+
if (chunkText) return toSnippet(chunkText);
|
|
20113
|
+
return snippet || "";
|
|
20114
|
+
}
|
|
20115
|
+
buildResults(ordered, topK, groupByPage, _query) {
|
|
20652
20116
|
if (groupByPage) {
|
|
20653
20117
|
let pages = aggregateByPage(ordered, this.config);
|
|
20654
|
-
|
|
20655
|
-
pages = pages.filter((p) => p.pageScore >= minScore);
|
|
20656
|
-
}
|
|
20118
|
+
pages = trimByScoreGap(pages, this.config);
|
|
20657
20119
|
const minRatio = this.config.ranking.minChunkScoreRatio;
|
|
20658
20120
|
return pages.slice(0, topK).map((page) => {
|
|
20659
20121
|
const bestScore = page.bestChunk.finalScore;
|
|
@@ -20663,12 +20125,12 @@ var SearchEngine = class _SearchEngine {
|
|
|
20663
20125
|
url: page.url,
|
|
20664
20126
|
title: page.title,
|
|
20665
20127
|
sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
|
|
20666
|
-
snippet: page.bestChunk
|
|
20128
|
+
snippet: this.ensureSnippet(page.bestChunk),
|
|
20667
20129
|
score: Number(page.pageScore.toFixed(6)),
|
|
20668
20130
|
routeFile: page.routeFile,
|
|
20669
20131
|
chunks: meaningful.length > 1 ? meaningful.map((c) => ({
|
|
20670
20132
|
sectionTitle: c.hit.metadata.sectionTitle || void 0,
|
|
20671
|
-
snippet: c
|
|
20133
|
+
snippet: this.ensureSnippet(c),
|
|
20672
20134
|
headingPath: c.hit.metadata.headingPath,
|
|
20673
20135
|
score: Number(c.finalScore.toFixed(6))
|
|
20674
20136
|
})) : void 0
|
|
@@ -20676,6 +20138,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
20676
20138
|
});
|
|
20677
20139
|
} else {
|
|
20678
20140
|
let filtered = ordered;
|
|
20141
|
+
const minScore = this.config.ranking.minScore;
|
|
20679
20142
|
if (minScore > 0) {
|
|
20680
20143
|
filtered = ordered.filter((entry) => entry.finalScore >= minScore);
|
|
20681
20144
|
}
|
|
@@ -20683,7 +20146,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
20683
20146
|
url: hit.metadata.url,
|
|
20684
20147
|
title: hit.metadata.title,
|
|
20685
20148
|
sectionTitle: hit.metadata.sectionTitle || void 0,
|
|
20686
|
-
snippet: hit
|
|
20149
|
+
snippet: this.ensureSnippet({ hit, finalScore }),
|
|
20687
20150
|
score: Number(finalScore.toFixed(6)),
|
|
20688
20151
|
routeFile: hit.metadata.routeFile
|
|
20689
20152
|
}));
|
|
@@ -20692,7 +20155,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
20692
20155
|
async getPage(pathOrUrl, scope) {
|
|
20693
20156
|
const resolvedScope = resolveScope(this.config, scope);
|
|
20694
20157
|
const urlPath = this.resolveInputPath(pathOrUrl);
|
|
20695
|
-
const page = await this.
|
|
20158
|
+
const page = await this.store.getPage(urlPath, resolvedScope);
|
|
20696
20159
|
if (!page) {
|
|
20697
20160
|
throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
|
|
20698
20161
|
}
|
|
@@ -20713,7 +20176,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
20713
20176
|
};
|
|
20714
20177
|
}
|
|
20715
20178
|
async health() {
|
|
20716
|
-
return this.
|
|
20179
|
+
return this.store.health();
|
|
20717
20180
|
}
|
|
20718
20181
|
resolveInputPath(pathOrUrl) {
|
|
20719
20182
|
try {
|
|
@@ -20725,92 +20188,8 @@ var SearchEngine = class _SearchEngine {
|
|
|
20725
20188
|
const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
|
|
20726
20189
|
return normalizeUrlPath(withoutQueryOrHash);
|
|
20727
20190
|
}
|
|
20728
|
-
async assertModelCompatibility(scope) {
|
|
20729
|
-
const modelId = await this.vectorStore.getScopeModelId(scope);
|
|
20730
|
-
if (modelId && modelId !== this.config.embeddings.model) {
|
|
20731
|
-
throw new SearchSocketError(
|
|
20732
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
20733
|
-
`Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
|
|
20734
|
-
);
|
|
20735
|
-
}
|
|
20736
|
-
}
|
|
20737
|
-
async rerankHits(query, ranked, topK) {
|
|
20738
|
-
if (!this.config.rerank.enabled) {
|
|
20739
|
-
throw new SearchSocketError(
|
|
20740
|
-
"INVALID_REQUEST",
|
|
20741
|
-
"rerank=true requested but rerank.enabled is not set to true.",
|
|
20742
|
-
400
|
|
20743
|
-
);
|
|
20744
|
-
}
|
|
20745
|
-
if (!this.reranker) {
|
|
20746
|
-
throw new SearchSocketError(
|
|
20747
|
-
"CONFIG_MISSING",
|
|
20748
|
-
`rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
|
|
20749
|
-
400
|
|
20750
|
-
);
|
|
20751
|
-
}
|
|
20752
|
-
const pageGroups = /* @__PURE__ */ new Map();
|
|
20753
|
-
for (const entry of ranked) {
|
|
20754
|
-
const url = entry.hit.metadata.url;
|
|
20755
|
-
const group = pageGroups.get(url);
|
|
20756
|
-
if (group) group.push(entry);
|
|
20757
|
-
else pageGroups.set(url, [entry]);
|
|
20758
|
-
}
|
|
20759
|
-
const MAX_CHUNKS_PER_PAGE = 5;
|
|
20760
|
-
const MIN_CHUNKS_PER_PAGE = 1;
|
|
20761
|
-
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
20762
|
-
const MAX_DOC_CHARS = 2e3;
|
|
20763
|
-
const pageCandidates = [];
|
|
20764
|
-
for (const [url, chunks] of pageGroups) {
|
|
20765
|
-
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
20766
|
-
const bestScore = byScore[0].finalScore;
|
|
20767
|
-
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
20768
|
-
const selected = byScore.filter(
|
|
20769
|
-
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
20770
|
-
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
20771
|
-
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
20772
|
-
const first = selected[0].hit.metadata;
|
|
20773
|
-
const parts = [first.title];
|
|
20774
|
-
if (first.description) {
|
|
20775
|
-
parts.push(first.description);
|
|
20776
|
-
}
|
|
20777
|
-
if (first.keywords && first.keywords.length > 0) {
|
|
20778
|
-
parts.push(first.keywords.join(", "));
|
|
20779
|
-
}
|
|
20780
|
-
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
20781
|
-
parts.push(body);
|
|
20782
|
-
let text = parts.join("\n\n");
|
|
20783
|
-
if (text.length > MAX_DOC_CHARS) {
|
|
20784
|
-
text = text.slice(0, MAX_DOC_CHARS);
|
|
20785
|
-
}
|
|
20786
|
-
pageCandidates.push({ id: url, text });
|
|
20787
|
-
}
|
|
20788
|
-
const maxCandidates = Math.max(topK, this.config.rerank.topN);
|
|
20789
|
-
const cappedCandidates = pageCandidates.slice(0, maxCandidates);
|
|
20790
|
-
const reranked = await this.reranker.rerank(
|
|
20791
|
-
query,
|
|
20792
|
-
cappedCandidates,
|
|
20793
|
-
maxCandidates
|
|
20794
|
-
);
|
|
20795
|
-
const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
|
|
20796
|
-
return ranked.map((entry) => {
|
|
20797
|
-
const pageScore = scoreByUrl.get(entry.hit.metadata.url);
|
|
20798
|
-
const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
|
|
20799
|
-
if (pageScore === void 0 || !Number.isFinite(pageScore)) {
|
|
20800
|
-
return { ...entry, finalScore: base };
|
|
20801
|
-
}
|
|
20802
|
-
const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
|
|
20803
|
-
return {
|
|
20804
|
-
...entry,
|
|
20805
|
-
finalScore: Number.isFinite(combined) ? combined : base
|
|
20806
|
-
};
|
|
20807
|
-
}).sort((a, b) => {
|
|
20808
|
-
const delta = b.finalScore - a.finalScore;
|
|
20809
|
-
return Number.isNaN(delta) ? 0 : delta;
|
|
20810
|
-
});
|
|
20811
|
-
}
|
|
20812
20191
|
};
|
|
20813
|
-
function createServer(engine
|
|
20192
|
+
function createServer(engine) {
|
|
20814
20193
|
const server = new mcp_js.McpServer({
|
|
20815
20194
|
name: "searchsocket-mcp",
|
|
20816
20195
|
version: "0.1.0"
|
|
@@ -20818,15 +20197,14 @@ function createServer(engine, config) {
|
|
|
20818
20197
|
server.registerTool(
|
|
20819
20198
|
"search",
|
|
20820
20199
|
{
|
|
20821
|
-
description: "Semantic site search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and
|
|
20200
|
+
description: "Semantic site search powered by Upstash Search. Returns url/title/snippet/score/routeFile for each match. Supports optional scope, pathPrefix, tags, topK, and groupBy.",
|
|
20822
20201
|
inputSchema: {
|
|
20823
20202
|
query: zod.z.string().min(1),
|
|
20824
20203
|
scope: zod.z.string().optional(),
|
|
20825
20204
|
topK: zod.z.number().int().positive().max(100).optional(),
|
|
20826
20205
|
pathPrefix: zod.z.string().optional(),
|
|
20827
20206
|
tags: zod.z.array(zod.z.string()).optional(),
|
|
20828
|
-
groupBy: zod.z.enum(["page", "chunk"]).optional()
|
|
20829
|
-
rerank: zod.z.boolean().optional().describe("Enable reranking for better relevance (uses Jina Reranker). Defaults to true when rerank is enabled in config.")
|
|
20207
|
+
groupBy: zod.z.enum(["page", "chunk"]).optional()
|
|
20830
20208
|
}
|
|
20831
20209
|
},
|
|
20832
20210
|
async (input) => {
|
|
@@ -20836,8 +20214,7 @@ function createServer(engine, config) {
|
|
|
20836
20214
|
scope: input.scope,
|
|
20837
20215
|
pathPrefix: input.pathPrefix,
|
|
20838
20216
|
tags: input.tags,
|
|
20839
|
-
groupBy: input.groupBy
|
|
20840
|
-
rerank: input.rerank ?? config.rerank.enabled
|
|
20217
|
+
groupBy: input.groupBy
|
|
20841
20218
|
});
|
|
20842
20219
|
return {
|
|
20843
20220
|
content: [
|
|
@@ -20963,10 +20340,10 @@ async function runMcpServer(options = {}) {
|
|
|
20963
20340
|
config
|
|
20964
20341
|
});
|
|
20965
20342
|
if (resolvedTransport === "http") {
|
|
20966
|
-
await startHttpServer(() => createServer(engine
|
|
20343
|
+
await startHttpServer(() => createServer(engine), config, options);
|
|
20967
20344
|
return;
|
|
20968
20345
|
}
|
|
20969
|
-
const server = createServer(engine
|
|
20346
|
+
const server = createServer(engine);
|
|
20970
20347
|
const stdioTransport = new stdio_js.StdioServerTransport();
|
|
20971
20348
|
await server.connect(stdioTransport);
|
|
20972
20349
|
}
|
|
@@ -21123,42 +20500,6 @@ function searchsocketHandle(options = {}) {
|
|
|
21123
20500
|
}
|
|
21124
20501
|
const engine = await getEngine();
|
|
21125
20502
|
const searchRequest = body;
|
|
21126
|
-
if (searchRequest.stream && searchRequest.rerank) {
|
|
21127
|
-
const encoder = new TextEncoder();
|
|
21128
|
-
const stream = new ReadableStream({
|
|
21129
|
-
async start(controller) {
|
|
21130
|
-
try {
|
|
21131
|
-
for await (const event2 of engine.searchStreaming(searchRequest)) {
|
|
21132
|
-
const line = JSON.stringify(event2) + "\n";
|
|
21133
|
-
controller.enqueue(encoder.encode(line));
|
|
21134
|
-
}
|
|
21135
|
-
} catch (streamError) {
|
|
21136
|
-
const errorEvent = {
|
|
21137
|
-
phase: "error",
|
|
21138
|
-
data: {
|
|
21139
|
-
error: {
|
|
21140
|
-
code: streamError instanceof SearchSocketError ? streamError.code : "INTERNAL_ERROR",
|
|
21141
|
-
message: streamError instanceof Error ? streamError.message : "Unknown error"
|
|
21142
|
-
}
|
|
21143
|
-
}
|
|
21144
|
-
};
|
|
21145
|
-
controller.enqueue(encoder.encode(JSON.stringify(errorEvent) + "\n"));
|
|
21146
|
-
} finally {
|
|
21147
|
-
controller.close();
|
|
21148
|
-
}
|
|
21149
|
-
}
|
|
21150
|
-
});
|
|
21151
|
-
return withCors(
|
|
21152
|
-
new Response(stream, {
|
|
21153
|
-
status: 200,
|
|
21154
|
-
headers: {
|
|
21155
|
-
"content-type": "application/x-ndjson"
|
|
21156
|
-
}
|
|
21157
|
-
}),
|
|
21158
|
-
event.request,
|
|
21159
|
-
config
|
|
21160
|
-
);
|
|
21161
|
-
}
|
|
21162
20503
|
const result = await engine.search(searchRequest);
|
|
21163
20504
|
return withCors(
|
|
21164
20505
|
new Response(JSON.stringify(result), {
|
|
@@ -21244,13 +20585,6 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21244
20585
|
let running = false;
|
|
21245
20586
|
return {
|
|
21246
20587
|
name: "searchsocket:auto-index",
|
|
21247
|
-
config() {
|
|
21248
|
-
return {
|
|
21249
|
-
ssr: {
|
|
21250
|
-
external: ["@libsql/client", "libsql"]
|
|
21251
|
-
}
|
|
21252
|
-
};
|
|
21253
|
-
},
|
|
21254
20588
|
async closeBundle() {
|
|
21255
20589
|
if (executed || running) {
|
|
21256
20590
|
return;
|
|
@@ -21278,9 +20612,8 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21278
20612
|
verbose: options.verbose
|
|
21279
20613
|
});
|
|
21280
20614
|
logger3.info(
|
|
21281
|
-
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged}
|
|
20615
|
+
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
|
|
21282
20616
|
);
|
|
21283
|
-
logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
|
|
21284
20617
|
executed = true;
|
|
21285
20618
|
} finally {
|
|
21286
20619
|
running = false;
|
|
@@ -21289,60 +20622,6 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21289
20622
|
};
|
|
21290
20623
|
}
|
|
21291
20624
|
|
|
21292
|
-
// src/merge.ts
|
|
21293
|
-
function mergeSearchResults(initial, reranked, options) {
|
|
21294
|
-
const maxDisplacement = options?.maxDisplacement ?? 3;
|
|
21295
|
-
const initialUrls = initial.results.map((r) => r.url);
|
|
21296
|
-
const rerankedUrls = reranked.results.map((r) => r.url);
|
|
21297
|
-
const initialPos = /* @__PURE__ */ new Map();
|
|
21298
|
-
for (let i = 0; i < initialUrls.length; i++) {
|
|
21299
|
-
initialPos.set(initialUrls[i], i);
|
|
21300
|
-
}
|
|
21301
|
-
const rerankedPos = /* @__PURE__ */ new Map();
|
|
21302
|
-
for (let i = 0; i < rerankedUrls.length; i++) {
|
|
21303
|
-
rerankedPos.set(rerankedUrls[i], i);
|
|
21304
|
-
}
|
|
21305
|
-
const displacements = [];
|
|
21306
|
-
for (const url of initialUrls) {
|
|
21307
|
-
const iPos = initialPos.get(url);
|
|
21308
|
-
const rPos = rerankedPos.get(url);
|
|
21309
|
-
const displacement = rPos !== void 0 ? Math.abs(iPos - rPos) : 0;
|
|
21310
|
-
displacements.push({ url, displacement });
|
|
21311
|
-
}
|
|
21312
|
-
const totalResults = displacements.length;
|
|
21313
|
-
if (totalResults === 0) {
|
|
21314
|
-
return {
|
|
21315
|
-
response: reranked,
|
|
21316
|
-
usedRerankedOrder: true,
|
|
21317
|
-
displacements
|
|
21318
|
-
};
|
|
21319
|
-
}
|
|
21320
|
-
const hasLargeDisplacement = displacements.some((d) => d.displacement > maxDisplacement);
|
|
21321
|
-
if (hasLargeDisplacement) {
|
|
21322
|
-
return {
|
|
21323
|
-
response: reranked,
|
|
21324
|
-
usedRerankedOrder: true,
|
|
21325
|
-
displacements
|
|
21326
|
-
};
|
|
21327
|
-
}
|
|
21328
|
-
const rerankedScoreMap = /* @__PURE__ */ new Map();
|
|
21329
|
-
for (const result of reranked.results) {
|
|
21330
|
-
rerankedScoreMap.set(result.url, result.score);
|
|
21331
|
-
}
|
|
21332
|
-
const mergedResults = initial.results.map((result) => ({
|
|
21333
|
-
...result,
|
|
21334
|
-
score: rerankedScoreMap.get(result.url) ?? result.score
|
|
21335
|
-
}));
|
|
21336
|
-
return {
|
|
21337
|
-
response: {
|
|
21338
|
-
...reranked,
|
|
21339
|
-
results: mergedResults
|
|
21340
|
-
},
|
|
21341
|
-
usedRerankedOrder: false,
|
|
21342
|
-
displacements
|
|
21343
|
-
};
|
|
21344
|
-
}
|
|
21345
|
-
|
|
21346
20625
|
// src/client.ts
|
|
21347
20626
|
function createSearchClient(options = {}) {
|
|
21348
20627
|
const endpoint = options.endpoint ?? "/api/search";
|
|
@@ -21370,72 +20649,6 @@ function createSearchClient(options = {}) {
|
|
|
21370
20649
|
throw new Error(message);
|
|
21371
20650
|
}
|
|
21372
20651
|
return payload;
|
|
21373
|
-
},
|
|
21374
|
-
async streamSearch(request, onPhase) {
|
|
21375
|
-
const response = await fetchImpl(endpoint, {
|
|
21376
|
-
method: "POST",
|
|
21377
|
-
headers: {
|
|
21378
|
-
"content-type": "application/json"
|
|
21379
|
-
},
|
|
21380
|
-
body: JSON.stringify(request)
|
|
21381
|
-
});
|
|
21382
|
-
if (!response.ok) {
|
|
21383
|
-
let payload;
|
|
21384
|
-
try {
|
|
21385
|
-
payload = await response.json();
|
|
21386
|
-
} catch {
|
|
21387
|
-
throw new Error("Search failed");
|
|
21388
|
-
}
|
|
21389
|
-
const message = payload.error?.message ?? "Search failed";
|
|
21390
|
-
throw new Error(message);
|
|
21391
|
-
}
|
|
21392
|
-
const contentType = response.headers.get("content-type") ?? "";
|
|
21393
|
-
if (contentType.includes("application/json")) {
|
|
21394
|
-
const data = await response.json();
|
|
21395
|
-
onPhase({ phase: "initial", data });
|
|
21396
|
-
return data;
|
|
21397
|
-
}
|
|
21398
|
-
if (!response.body) {
|
|
21399
|
-
throw new Error("Response body is not readable");
|
|
21400
|
-
}
|
|
21401
|
-
const reader = response.body.getReader();
|
|
21402
|
-
const decoder = new TextDecoder();
|
|
21403
|
-
let buffer = "";
|
|
21404
|
-
let lastResponse = null;
|
|
21405
|
-
for (; ; ) {
|
|
21406
|
-
const { done, value } = await reader.read();
|
|
21407
|
-
if (done) break;
|
|
21408
|
-
buffer += decoder.decode(value, { stream: true });
|
|
21409
|
-
let newlineIdx;
|
|
21410
|
-
while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
|
|
21411
|
-
const line = buffer.slice(0, newlineIdx).trim();
|
|
21412
|
-
buffer = buffer.slice(newlineIdx + 1);
|
|
21413
|
-
if (line.length === 0) continue;
|
|
21414
|
-
const event = JSON.parse(line);
|
|
21415
|
-
if (event.phase === "error") {
|
|
21416
|
-
const errData = event.data;
|
|
21417
|
-
throw new Error(errData.error.message ?? "Streaming search error");
|
|
21418
|
-
}
|
|
21419
|
-
const searchEvent = event;
|
|
21420
|
-
onPhase(searchEvent);
|
|
21421
|
-
lastResponse = searchEvent.data;
|
|
21422
|
-
}
|
|
21423
|
-
}
|
|
21424
|
-
const remaining = buffer.trim();
|
|
21425
|
-
if (remaining.length > 0) {
|
|
21426
|
-
const event = JSON.parse(remaining);
|
|
21427
|
-
if (event.phase === "error") {
|
|
21428
|
-
const errData = event.data;
|
|
21429
|
-
throw new Error(errData.error.message ?? "Streaming search error");
|
|
21430
|
-
}
|
|
21431
|
-
const searchEvent = event;
|
|
21432
|
-
onPhase(searchEvent);
|
|
21433
|
-
lastResponse = searchEvent.data;
|
|
21434
|
-
}
|
|
21435
|
-
if (!lastResponse) {
|
|
21436
|
-
throw new Error("No search results received");
|
|
21437
|
-
}
|
|
21438
|
-
return lastResponse;
|
|
21439
20652
|
}
|
|
21440
20653
|
};
|
|
21441
20654
|
}
|
|
@@ -21452,17 +20665,14 @@ function createSearchClient(options = {}) {
|
|
|
21452
20665
|
*/
|
|
21453
20666
|
|
|
21454
20667
|
exports.IndexPipeline = IndexPipeline;
|
|
21455
|
-
exports.JinaReranker = JinaReranker;
|
|
21456
20668
|
exports.SearchEngine = SearchEngine;
|
|
21457
|
-
exports.
|
|
21458
|
-
exports.createReranker = createReranker;
|
|
20669
|
+
exports.UpstashSearchStore = UpstashSearchStore;
|
|
21459
20670
|
exports.createSearchClient = createSearchClient;
|
|
21460
|
-
exports.
|
|
20671
|
+
exports.createUpstashStore = createUpstashStore;
|
|
21461
20672
|
exports.isServerless = isServerless;
|
|
21462
20673
|
exports.loadConfig = loadConfig;
|
|
21463
20674
|
exports.mergeConfig = mergeConfig;
|
|
21464
20675
|
exports.mergeConfigServerless = mergeConfigServerless;
|
|
21465
|
-
exports.mergeSearchResults = mergeSearchResults;
|
|
21466
20676
|
exports.resolveScope = resolveScope;
|
|
21467
20677
|
exports.runMcpServer = runMcpServer;
|
|
21468
20678
|
exports.searchsocketHandle = searchsocketHandle;
|