searchsocket 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -31
- package/dist/cli.js +634 -1326
- package/dist/client.cjs +41 -117
- package/dist/client.d.cts +3 -17
- package/dist/client.d.ts +3 -17
- package/dist/client.js +41 -117
- package/dist/index.cjs +608 -1398
- package/dist/index.d.cts +73 -35
- package/dist/index.d.ts +73 -35
- package/dist/index.js +605 -1392
- package/dist/plugin-B_npJSux.d.cts +36 -0
- package/dist/plugin-M-aW0ev6.d.ts +36 -0
- package/dist/scroll.cjs +185 -0
- package/dist/scroll.d.cts +42 -0
- package/dist/scroll.d.ts +42 -0
- package/dist/scroll.js +183 -0
- package/dist/sveltekit.cjs +781 -1278
- package/dist/sveltekit.d.cts +3 -43
- package/dist/sveltekit.d.ts +3 -43
- package/dist/sveltekit.js +779 -1276
- package/dist/{types-z2dw3H6E.d.cts → types-Dk43uz25.d.cts} +46 -141
- package/dist/{types-z2dw3H6E.d.ts → types-Dk43uz25.d.ts} +46 -141
- package/package.json +10 -3
package/dist/sveltekit.cjs
CHANGED
|
@@ -4,13 +4,13 @@ var fs = require('fs');
|
|
|
4
4
|
var path = require('path');
|
|
5
5
|
var jiti = require('jiti');
|
|
6
6
|
var zod = require('zod');
|
|
7
|
-
var pLimit2 = require('p-limit');
|
|
8
7
|
var child_process = require('child_process');
|
|
9
8
|
var crypto = require('crypto');
|
|
10
9
|
var cheerio = require('cheerio');
|
|
11
10
|
var matter = require('gray-matter');
|
|
12
|
-
var fs4 = require('fs/promises');
|
|
13
11
|
var fg = require('fast-glob');
|
|
12
|
+
var pLimit = require('p-limit');
|
|
13
|
+
var fs3 = require('fs/promises');
|
|
14
14
|
var net = require('net');
|
|
15
15
|
var zlib = require('zlib');
|
|
16
16
|
|
|
@@ -18,10 +18,10 @@ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
|
18
18
|
|
|
19
19
|
var fs__default = /*#__PURE__*/_interopDefault(fs);
|
|
20
20
|
var path__default = /*#__PURE__*/_interopDefault(path);
|
|
21
|
-
var pLimit2__default = /*#__PURE__*/_interopDefault(pLimit2);
|
|
22
21
|
var matter__default = /*#__PURE__*/_interopDefault(matter);
|
|
23
|
-
var fs4__default = /*#__PURE__*/_interopDefault(fs4);
|
|
24
22
|
var fg__default = /*#__PURE__*/_interopDefault(fg);
|
|
23
|
+
var pLimit__default = /*#__PURE__*/_interopDefault(pLimit);
|
|
24
|
+
var fs3__default = /*#__PURE__*/_interopDefault(fs3);
|
|
25
25
|
var net__default = /*#__PURE__*/_interopDefault(net);
|
|
26
26
|
|
|
27
27
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
@@ -2767,12 +2767,12 @@ var require_ChildNode = __commonJS({
|
|
|
2767
2767
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
|
|
2768
2768
|
var Node2 = require_Node();
|
|
2769
2769
|
var LinkedList = require_LinkedList();
|
|
2770
|
-
var createDocumentFragmentFromArguments = function(
|
|
2771
|
-
var docFrag =
|
|
2770
|
+
var createDocumentFragmentFromArguments = function(document2, args) {
|
|
2771
|
+
var docFrag = document2.createDocumentFragment();
|
|
2772
2772
|
for (var i = 0; i < args.length; i++) {
|
|
2773
2773
|
var argItem = args[i];
|
|
2774
2774
|
var isNode = argItem instanceof Node2;
|
|
2775
|
-
docFrag.appendChild(isNode ? argItem :
|
|
2775
|
+
docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
|
|
2776
2776
|
}
|
|
2777
2777
|
return docFrag;
|
|
2778
2778
|
};
|
|
@@ -2930,7 +2930,7 @@ var require_NamedNodeMap = __commonJS({
|
|
|
2930
2930
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
|
|
2931
2931
|
var require_Element = __commonJS({
|
|
2932
2932
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
|
|
2933
|
-
module.exports =
|
|
2933
|
+
module.exports = Element2;
|
|
2934
2934
|
var xml = require_xmlnames();
|
|
2935
2935
|
var utils = require_utils();
|
|
2936
2936
|
var NAMESPACE = utils.NAMESPACE;
|
|
@@ -2947,7 +2947,7 @@ var require_Element = __commonJS({
|
|
|
2947
2947
|
var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
|
|
2948
2948
|
var NamedNodeMap = require_NamedNodeMap();
|
|
2949
2949
|
var uppercaseCache = /* @__PURE__ */ Object.create(null);
|
|
2950
|
-
function
|
|
2950
|
+
function Element2(doc, localName, namespaceURI, prefix) {
|
|
2951
2951
|
ContainerNode.call(this);
|
|
2952
2952
|
this.nodeType = Node2.ELEMENT_NODE;
|
|
2953
2953
|
this.ownerDocument = doc;
|
|
@@ -2967,7 +2967,7 @@ var require_Element = __commonJS({
|
|
|
2967
2967
|
recursiveGetText(node.childNodes[i], a);
|
|
2968
2968
|
}
|
|
2969
2969
|
}
|
|
2970
|
-
|
|
2970
|
+
Element2.prototype = Object.create(ContainerNode.prototype, {
|
|
2971
2971
|
isHTML: { get: function isHTML() {
|
|
2972
2972
|
return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
|
|
2973
2973
|
} },
|
|
@@ -3037,7 +3037,7 @@ var require_Element = __commonJS({
|
|
|
3037
3037
|
return NodeUtils.serializeOne(this, { nodeType: 0 });
|
|
3038
3038
|
},
|
|
3039
3039
|
set: function(v) {
|
|
3040
|
-
var
|
|
3040
|
+
var document2 = this.ownerDocument;
|
|
3041
3041
|
var parent = this.parentNode;
|
|
3042
3042
|
if (parent === null) {
|
|
3043
3043
|
return;
|
|
@@ -3048,8 +3048,8 @@ var require_Element = __commonJS({
|
|
|
3048
3048
|
if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
|
|
3049
3049
|
parent = parent.ownerDocument.createElement("body");
|
|
3050
3050
|
}
|
|
3051
|
-
var parser =
|
|
3052
|
-
|
|
3051
|
+
var parser = document2.implementation.mozHTMLParser(
|
|
3052
|
+
document2._address,
|
|
3053
3053
|
parent
|
|
3054
3054
|
);
|
|
3055
3055
|
parser.parse(v === null ? "" : String(v), true);
|
|
@@ -3108,7 +3108,7 @@ var require_Element = __commonJS({
|
|
|
3108
3108
|
default:
|
|
3109
3109
|
utils.SyntaxError();
|
|
3110
3110
|
}
|
|
3111
|
-
if (!(context instanceof
|
|
3111
|
+
if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
|
|
3112
3112
|
context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
|
|
3113
3113
|
}
|
|
3114
3114
|
var parser = this.ownerDocument.implementation.mozHTMLParser(
|
|
@@ -3716,10 +3716,10 @@ var require_Element = __commonJS({
|
|
|
3716
3716
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
3717
3717
|
} }
|
|
3718
3718
|
});
|
|
3719
|
-
Object.defineProperties(
|
|
3720
|
-
Object.defineProperties(
|
|
3719
|
+
Object.defineProperties(Element2.prototype, ChildNode);
|
|
3720
|
+
Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
|
|
3721
3721
|
attributes.registerChangeHandler(
|
|
3722
|
-
|
|
3722
|
+
Element2,
|
|
3723
3723
|
"id",
|
|
3724
3724
|
function(element, lname, oldval, newval) {
|
|
3725
3725
|
if (element.rooted) {
|
|
@@ -3733,7 +3733,7 @@ var require_Element = __commonJS({
|
|
|
3733
3733
|
}
|
|
3734
3734
|
);
|
|
3735
3735
|
attributes.registerChangeHandler(
|
|
3736
|
-
|
|
3736
|
+
Element2,
|
|
3737
3737
|
"class",
|
|
3738
3738
|
function(element, lname, oldval, newval) {
|
|
3739
3739
|
if (element._classList) {
|
|
@@ -3832,7 +3832,7 @@ var require_Element = __commonJS({
|
|
|
3832
3832
|
}
|
|
3833
3833
|
}
|
|
3834
3834
|
});
|
|
3835
|
-
|
|
3835
|
+
Element2._Attr = Attr;
|
|
3836
3836
|
function AttributesArray(elt) {
|
|
3837
3837
|
NamedNodeMap.call(this, elt);
|
|
3838
3838
|
for (var name in elt._attrsByQName) {
|
|
@@ -4234,7 +4234,7 @@ var require_DocumentFragment = __commonJS({
|
|
|
4234
4234
|
var Node2 = require_Node();
|
|
4235
4235
|
var NodeList = require_NodeList();
|
|
4236
4236
|
var ContainerNode = require_ContainerNode();
|
|
4237
|
-
var
|
|
4237
|
+
var Element2 = require_Element();
|
|
4238
4238
|
var select = require_select();
|
|
4239
4239
|
var utils = require_utils();
|
|
4240
4240
|
function DocumentFragment(doc) {
|
|
@@ -4252,9 +4252,9 @@ var require_DocumentFragment = __commonJS({
|
|
|
4252
4252
|
}
|
|
4253
4253
|
},
|
|
4254
4254
|
// Copy the text content getter/setter from Element
|
|
4255
|
-
textContent: Object.getOwnPropertyDescriptor(
|
|
4255
|
+
textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
|
|
4256
4256
|
// Copy the text content getter/setter from Element
|
|
4257
|
-
innerText: Object.getOwnPropertyDescriptor(
|
|
4257
|
+
innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
|
|
4258
4258
|
querySelector: { value: function(selector) {
|
|
4259
4259
|
var nodes = this.querySelectorAll(selector);
|
|
4260
4260
|
return nodes.length ? nodes[0] : null;
|
|
@@ -4262,8 +4262,8 @@ var require_DocumentFragment = __commonJS({
|
|
|
4262
4262
|
querySelectorAll: { value: function(selector) {
|
|
4263
4263
|
var context = Object.create(this);
|
|
4264
4264
|
context.isHTML = true;
|
|
4265
|
-
context.getElementsByTagName =
|
|
4266
|
-
context.nextElement = Object.getOwnPropertyDescriptor(
|
|
4265
|
+
context.getElementsByTagName = Element2.prototype.getElementsByTagName;
|
|
4266
|
+
context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
|
|
4267
4267
|
var nodes = select(selector, context);
|
|
4268
4268
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
4269
4269
|
} },
|
|
@@ -4345,7 +4345,7 @@ var require_ProcessingInstruction = __commonJS({
|
|
|
4345
4345
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
|
|
4346
4346
|
var require_NodeFilter = __commonJS({
|
|
4347
4347
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
|
|
4348
|
-
var
|
|
4348
|
+
var NodeFilter2 = {
|
|
4349
4349
|
// Constants for acceptNode()
|
|
4350
4350
|
FILTER_ACCEPT: 1,
|
|
4351
4351
|
FILTER_REJECT: 2,
|
|
@@ -4370,7 +4370,7 @@ var require_NodeFilter = __commonJS({
|
|
|
4370
4370
|
SHOW_NOTATION: 2048
|
|
4371
4371
|
// historical
|
|
4372
4372
|
};
|
|
4373
|
-
module.exports =
|
|
4373
|
+
module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
|
|
4374
4374
|
}
|
|
4375
4375
|
});
|
|
4376
4376
|
|
|
@@ -4445,7 +4445,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4445
4445
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
|
|
4446
4446
|
module.exports = TreeWalker;
|
|
4447
4447
|
var Node2 = require_Node();
|
|
4448
|
-
var
|
|
4448
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4449
4449
|
var NodeTraversal = require_NodeTraversal();
|
|
4450
4450
|
var utils = require_utils();
|
|
4451
4451
|
var mapChild = {
|
|
@@ -4465,11 +4465,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4465
4465
|
node = tw._currentNode[mapChild[type]];
|
|
4466
4466
|
while (node !== null) {
|
|
4467
4467
|
result = tw._internalFilter(node);
|
|
4468
|
-
if (result ===
|
|
4468
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4469
4469
|
tw._currentNode = node;
|
|
4470
4470
|
return node;
|
|
4471
4471
|
}
|
|
4472
|
-
if (result ===
|
|
4472
|
+
if (result === NodeFilter2.FILTER_SKIP) {
|
|
4473
4473
|
child = node[mapChild[type]];
|
|
4474
4474
|
if (child !== null) {
|
|
4475
4475
|
node = child;
|
|
@@ -4503,12 +4503,12 @@ var require_TreeWalker = __commonJS({
|
|
|
4503
4503
|
while (sibling !== null) {
|
|
4504
4504
|
node = sibling;
|
|
4505
4505
|
result = tw._internalFilter(node);
|
|
4506
|
-
if (result ===
|
|
4506
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4507
4507
|
tw._currentNode = node;
|
|
4508
4508
|
return node;
|
|
4509
4509
|
}
|
|
4510
4510
|
sibling = node[mapChild[type]];
|
|
4511
|
-
if (result ===
|
|
4511
|
+
if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
|
|
4512
4512
|
sibling = node[mapSibling[type]];
|
|
4513
4513
|
}
|
|
4514
4514
|
}
|
|
@@ -4516,7 +4516,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4516
4516
|
if (node === null || node === tw.root) {
|
|
4517
4517
|
return null;
|
|
4518
4518
|
}
|
|
4519
|
-
if (tw._internalFilter(node) ===
|
|
4519
|
+
if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4520
4520
|
return null;
|
|
4521
4521
|
}
|
|
4522
4522
|
}
|
|
@@ -4564,11 +4564,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4564
4564
|
utils.InvalidStateError();
|
|
4565
4565
|
}
|
|
4566
4566
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4567
|
-
return
|
|
4567
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4568
4568
|
}
|
|
4569
4569
|
filter = this._filter;
|
|
4570
4570
|
if (filter === null) {
|
|
4571
|
-
result =
|
|
4571
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4572
4572
|
} else {
|
|
4573
4573
|
this._active = true;
|
|
4574
4574
|
try {
|
|
@@ -4597,7 +4597,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4597
4597
|
if (node === null) {
|
|
4598
4598
|
return null;
|
|
4599
4599
|
}
|
|
4600
|
-
if (this._internalFilter(node) ===
|
|
4600
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4601
4601
|
this._currentNode = node;
|
|
4602
4602
|
return node;
|
|
4603
4603
|
}
|
|
@@ -4650,17 +4650,17 @@ var require_TreeWalker = __commonJS({
|
|
|
4650
4650
|
for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
|
|
4651
4651
|
node = previousSibling;
|
|
4652
4652
|
result = this._internalFilter(node);
|
|
4653
|
-
if (result ===
|
|
4653
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4654
4654
|
continue;
|
|
4655
4655
|
}
|
|
4656
4656
|
for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
|
|
4657
4657
|
node = lastChild;
|
|
4658
4658
|
result = this._internalFilter(node);
|
|
4659
|
-
if (result ===
|
|
4659
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4660
4660
|
break;
|
|
4661
4661
|
}
|
|
4662
4662
|
}
|
|
4663
|
-
if (result ===
|
|
4663
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4664
4664
|
this._currentNode = node;
|
|
4665
4665
|
return node;
|
|
4666
4666
|
}
|
|
@@ -4669,7 +4669,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4669
4669
|
return null;
|
|
4670
4670
|
}
|
|
4671
4671
|
node = node.parentNode;
|
|
4672
|
-
if (this._internalFilter(node) ===
|
|
4672
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4673
4673
|
this._currentNode = node;
|
|
4674
4674
|
return node;
|
|
4675
4675
|
}
|
|
@@ -4686,26 +4686,26 @@ var require_TreeWalker = __commonJS({
|
|
|
4686
4686
|
nextNode: { value: function nextNode() {
|
|
4687
4687
|
var node, result, firstChild, nextSibling;
|
|
4688
4688
|
node = this._currentNode;
|
|
4689
|
-
result =
|
|
4689
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4690
4690
|
CHILDREN:
|
|
4691
4691
|
while (true) {
|
|
4692
4692
|
for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
|
|
4693
4693
|
node = firstChild;
|
|
4694
4694
|
result = this._internalFilter(node);
|
|
4695
|
-
if (result ===
|
|
4695
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4696
4696
|
this._currentNode = node;
|
|
4697
4697
|
return node;
|
|
4698
|
-
} else if (result ===
|
|
4698
|
+
} else if (result === NodeFilter2.FILTER_REJECT) {
|
|
4699
4699
|
break;
|
|
4700
4700
|
}
|
|
4701
4701
|
}
|
|
4702
4702
|
for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
|
|
4703
4703
|
node = nextSibling;
|
|
4704
4704
|
result = this._internalFilter(node);
|
|
4705
|
-
if (result ===
|
|
4705
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4706
4706
|
this._currentNode = node;
|
|
4707
4707
|
return node;
|
|
4708
|
-
} else if (result ===
|
|
4708
|
+
} else if (result === NodeFilter2.FILTER_SKIP) {
|
|
4709
4709
|
continue CHILDREN;
|
|
4710
4710
|
}
|
|
4711
4711
|
}
|
|
@@ -4724,7 +4724,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4724
4724
|
var require_NodeIterator = __commonJS({
|
|
4725
4725
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
|
|
4726
4726
|
module.exports = NodeIterator;
|
|
4727
|
-
var
|
|
4727
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4728
4728
|
var NodeTraversal = require_NodeTraversal();
|
|
4729
4729
|
var utils = require_utils();
|
|
4730
4730
|
function move(node, stayWithin, directionIsNext) {
|
|
@@ -4759,7 +4759,7 @@ var require_NodeIterator = __commonJS({
|
|
|
4759
4759
|
}
|
|
4760
4760
|
}
|
|
4761
4761
|
var result = ni._internalFilter(node);
|
|
4762
|
-
if (result ===
|
|
4762
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4763
4763
|
break;
|
|
4764
4764
|
}
|
|
4765
4765
|
}
|
|
@@ -4807,11 +4807,11 @@ var require_NodeIterator = __commonJS({
|
|
|
4807
4807
|
utils.InvalidStateError();
|
|
4808
4808
|
}
|
|
4809
4809
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4810
|
-
return
|
|
4810
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4811
4811
|
}
|
|
4812
4812
|
filter = this._filter;
|
|
4813
4813
|
if (filter === null) {
|
|
4814
|
-
result =
|
|
4814
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4815
4815
|
} else {
|
|
4816
4816
|
this._active = true;
|
|
4817
4817
|
try {
|
|
@@ -5021,32 +5021,32 @@ var require_URL = __commonJS({
|
|
|
5021
5021
|
else
|
|
5022
5022
|
return basepath.substring(0, lastslash + 1) + refpath;
|
|
5023
5023
|
}
|
|
5024
|
-
function remove_dot_segments(
|
|
5025
|
-
if (!
|
|
5024
|
+
function remove_dot_segments(path13) {
|
|
5025
|
+
if (!path13) return path13;
|
|
5026
5026
|
var output = "";
|
|
5027
|
-
while (
|
|
5028
|
-
if (
|
|
5029
|
-
|
|
5027
|
+
while (path13.length > 0) {
|
|
5028
|
+
if (path13 === "." || path13 === "..") {
|
|
5029
|
+
path13 = "";
|
|
5030
5030
|
break;
|
|
5031
5031
|
}
|
|
5032
|
-
var twochars =
|
|
5033
|
-
var threechars =
|
|
5034
|
-
var fourchars =
|
|
5032
|
+
var twochars = path13.substring(0, 2);
|
|
5033
|
+
var threechars = path13.substring(0, 3);
|
|
5034
|
+
var fourchars = path13.substring(0, 4);
|
|
5035
5035
|
if (threechars === "../") {
|
|
5036
|
-
|
|
5036
|
+
path13 = path13.substring(3);
|
|
5037
5037
|
} else if (twochars === "./") {
|
|
5038
|
-
|
|
5038
|
+
path13 = path13.substring(2);
|
|
5039
5039
|
} else if (threechars === "/./") {
|
|
5040
|
-
|
|
5041
|
-
} else if (twochars === "/." &&
|
|
5042
|
-
|
|
5043
|
-
} else if (fourchars === "/../" || threechars === "/.." &&
|
|
5044
|
-
|
|
5040
|
+
path13 = "/" + path13.substring(3);
|
|
5041
|
+
} else if (twochars === "/." && path13.length === 2) {
|
|
5042
|
+
path13 = "/";
|
|
5043
|
+
} else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
|
|
5044
|
+
path13 = "/" + path13.substring(4);
|
|
5045
5045
|
output = output.replace(/\/?[^\/]*$/, "");
|
|
5046
5046
|
} else {
|
|
5047
|
-
var segment =
|
|
5047
|
+
var segment = path13.match(/(\/?([^\/]*))/)[0];
|
|
5048
5048
|
output += segment;
|
|
5049
|
-
|
|
5049
|
+
path13 = path13.substring(segment.length);
|
|
5050
5050
|
}
|
|
5051
5051
|
}
|
|
5052
5052
|
return output;
|
|
@@ -5611,9 +5611,9 @@ var require_defineElement = __commonJS({
|
|
|
5611
5611
|
});
|
|
5612
5612
|
return c;
|
|
5613
5613
|
};
|
|
5614
|
-
function EventHandlerBuilder(body,
|
|
5614
|
+
function EventHandlerBuilder(body, document2, form, element) {
|
|
5615
5615
|
this.body = body;
|
|
5616
|
-
this.document =
|
|
5616
|
+
this.document = document2;
|
|
5617
5617
|
this.form = form;
|
|
5618
5618
|
this.element = element;
|
|
5619
5619
|
}
|
|
@@ -5647,7 +5647,7 @@ var require_defineElement = __commonJS({
|
|
|
5647
5647
|
var require_htmlelts = __commonJS({
|
|
5648
5648
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
|
|
5649
5649
|
var Node2 = require_Node();
|
|
5650
|
-
var
|
|
5650
|
+
var Element2 = require_Element();
|
|
5651
5651
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
5652
5652
|
var utils = require_utils();
|
|
5653
5653
|
var URLUtils = require_URLUtils();
|
|
@@ -5715,10 +5715,10 @@ var require_htmlelts = __commonJS({
|
|
|
5715
5715
|
this._form = null;
|
|
5716
5716
|
};
|
|
5717
5717
|
var HTMLElement = exports$1.HTMLElement = define({
|
|
5718
|
-
superclass:
|
|
5718
|
+
superclass: Element2,
|
|
5719
5719
|
name: "HTMLElement",
|
|
5720
5720
|
ctor: function HTMLElement2(doc, localName, prefix) {
|
|
5721
|
-
|
|
5721
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
|
|
5722
5722
|
},
|
|
5723
5723
|
props: {
|
|
5724
5724
|
dangerouslySetInnerHTML: {
|
|
@@ -7200,7 +7200,7 @@ var require_htmlelts = __commonJS({
|
|
|
7200
7200
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
|
|
7201
7201
|
var require_svg = __commonJS({
|
|
7202
7202
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
|
|
7203
|
-
var
|
|
7203
|
+
var Element2 = require_Element();
|
|
7204
7204
|
var defineElement = require_defineElement();
|
|
7205
7205
|
var utils = require_utils();
|
|
7206
7206
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
@@ -7214,10 +7214,10 @@ var require_svg = __commonJS({
|
|
|
7214
7214
|
return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
|
|
7215
7215
|
}
|
|
7216
7216
|
var SVGElement = define({
|
|
7217
|
-
superclass:
|
|
7217
|
+
superclass: Element2,
|
|
7218
7218
|
name: "SVGElement",
|
|
7219
7219
|
ctor: function SVGElement2(doc, localName, prefix) {
|
|
7220
|
-
|
|
7220
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
|
|
7221
7221
|
},
|
|
7222
7222
|
props: {
|
|
7223
7223
|
style: { get: function() {
|
|
@@ -7352,7 +7352,7 @@ var require_Document = __commonJS({
|
|
|
7352
7352
|
var Node2 = require_Node();
|
|
7353
7353
|
var NodeList = require_NodeList();
|
|
7354
7354
|
var ContainerNode = require_ContainerNode();
|
|
7355
|
-
var
|
|
7355
|
+
var Element2 = require_Element();
|
|
7356
7356
|
var Text = require_Text();
|
|
7357
7357
|
var Comment = require_Comment();
|
|
7358
7358
|
var Event = require_Event();
|
|
@@ -7361,7 +7361,7 @@ var require_Document = __commonJS({
|
|
|
7361
7361
|
var DOMImplementation = require_DOMImplementation();
|
|
7362
7362
|
var TreeWalker = require_TreeWalker();
|
|
7363
7363
|
var NodeIterator = require_NodeIterator();
|
|
7364
|
-
var
|
|
7364
|
+
var NodeFilter2 = require_NodeFilter();
|
|
7365
7365
|
var URL2 = require_URL();
|
|
7366
7366
|
var select = require_select();
|
|
7367
7367
|
var events = require_events();
|
|
@@ -7500,13 +7500,13 @@ var require_Document = __commonJS({
|
|
|
7500
7500
|
if (this.isHTML) {
|
|
7501
7501
|
localName = utils.toASCIILowerCase(localName);
|
|
7502
7502
|
}
|
|
7503
|
-
return new
|
|
7503
|
+
return new Element2._Attr(null, localName, null, null, "");
|
|
7504
7504
|
} },
|
|
7505
7505
|
createAttributeNS: { value: function(namespace, qualifiedName) {
|
|
7506
7506
|
namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
|
|
7507
7507
|
qualifiedName = String(qualifiedName);
|
|
7508
7508
|
var ve = validateAndExtract(namespace, qualifiedName);
|
|
7509
|
-
return new
|
|
7509
|
+
return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
|
|
7510
7510
|
} },
|
|
7511
7511
|
createElement: { value: function(localName) {
|
|
7512
7512
|
localName = String(localName);
|
|
@@ -7518,7 +7518,7 @@ var require_Document = __commonJS({
|
|
|
7518
7518
|
} else if (this.contentType === "application/xhtml+xml") {
|
|
7519
7519
|
return html.createElement(this, localName, null);
|
|
7520
7520
|
} else {
|
|
7521
|
-
return new
|
|
7521
|
+
return new Element2(this, localName, null, null);
|
|
7522
7522
|
}
|
|
7523
7523
|
}, writable: isApiWritable },
|
|
7524
7524
|
createElementNS: { value: function(namespace, qualifiedName) {
|
|
@@ -7535,7 +7535,7 @@ var require_Document = __commonJS({
|
|
|
7535
7535
|
} else if (namespace === NAMESPACE.SVG) {
|
|
7536
7536
|
return svg.createElement(this, localName, prefix);
|
|
7537
7537
|
}
|
|
7538
|
-
return new
|
|
7538
|
+
return new Element2(this, localName, namespace, prefix);
|
|
7539
7539
|
} },
|
|
7540
7540
|
createEvent: { value: function createEvent(interfaceName) {
|
|
7541
7541
|
interfaceName = interfaceName.toLowerCase();
|
|
@@ -7557,7 +7557,7 @@ var require_Document = __commonJS({
|
|
|
7557
7557
|
if (!(root3 instanceof Node2)) {
|
|
7558
7558
|
throw new TypeError("root not a node");
|
|
7559
7559
|
}
|
|
7560
|
-
whatToShow = whatToShow === void 0 ?
|
|
7560
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7561
7561
|
filter = filter === void 0 ? null : filter;
|
|
7562
7562
|
return new TreeWalker(root3, whatToShow, filter);
|
|
7563
7563
|
} },
|
|
@@ -7569,7 +7569,7 @@ var require_Document = __commonJS({
|
|
|
7569
7569
|
if (!(root3 instanceof Node2)) {
|
|
7570
7570
|
throw new TypeError("root not a node");
|
|
7571
7571
|
}
|
|
7572
|
-
whatToShow = whatToShow === void 0 ?
|
|
7572
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7573
7573
|
filter = filter === void 0 ? null : filter;
|
|
7574
7574
|
return new NodeIterator(root3, whatToShow, filter);
|
|
7575
7575
|
} },
|
|
@@ -7630,10 +7630,10 @@ var require_Document = __commonJS({
|
|
|
7630
7630
|
return this.byId[id] instanceof MultiId;
|
|
7631
7631
|
} },
|
|
7632
7632
|
// Just copy this method from the Element prototype
|
|
7633
|
-
getElementsByName: { value:
|
|
7634
|
-
getElementsByTagName: { value:
|
|
7635
|
-
getElementsByTagNameNS: { value:
|
|
7636
|
-
getElementsByClassName: { value:
|
|
7633
|
+
getElementsByName: { value: Element2.prototype.getElementsByName },
|
|
7634
|
+
getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
|
|
7635
|
+
getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
|
|
7636
|
+
getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
|
|
7637
7637
|
adoptNode: { value: function adoptNode(node) {
|
|
7638
7638
|
if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
|
|
7639
7639
|
if (node.nodeType === Node2.ATTRIBUTE_NODE) {
|
|
@@ -16459,8 +16459,8 @@ var require_Window = __commonJS({
|
|
|
16459
16459
|
var Location = require_Location();
|
|
16460
16460
|
var utils = require_utils();
|
|
16461
16461
|
module.exports = Window;
|
|
16462
|
-
function Window(
|
|
16463
|
-
this.document =
|
|
16462
|
+
function Window(document2) {
|
|
16463
|
+
this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
|
|
16464
16464
|
this.document._scripting_enabled = true;
|
|
16465
16465
|
this.document.defaultView = this;
|
|
16466
16466
|
this.location = new Location(this, this.document._address || "about:blank");
|
|
@@ -16590,11 +16590,11 @@ var require_lib = __commonJS({
|
|
|
16590
16590
|
};
|
|
16591
16591
|
};
|
|
16592
16592
|
exports$1.createWindow = function(html, address) {
|
|
16593
|
-
var
|
|
16593
|
+
var document2 = exports$1.createDocument(html);
|
|
16594
16594
|
if (address !== void 0) {
|
|
16595
|
-
|
|
16595
|
+
document2._address = address;
|
|
16596
16596
|
}
|
|
16597
|
-
return new impl.Window(
|
|
16597
|
+
return new impl.Window(document2);
|
|
16598
16598
|
};
|
|
16599
16599
|
exports$1.impl = impl;
|
|
16600
16600
|
}
|
|
@@ -16659,29 +16659,18 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16659
16659
|
prependTitle: zod.z.boolean().optional(),
|
|
16660
16660
|
pageSummaryChunk: zod.z.boolean().optional()
|
|
16661
16661
|
}).optional(),
|
|
16662
|
-
|
|
16663
|
-
|
|
16664
|
-
|
|
16665
|
-
|
|
16666
|
-
|
|
16667
|
-
batchSize: zod.z.number().int().positive().optional(),
|
|
16668
|
-
concurrency: zod.z.number().int().positive().optional(),
|
|
16669
|
-
pricePer1kTokens: zod.z.number().positive().optional()
|
|
16662
|
+
upstash: zod.z.object({
|
|
16663
|
+
url: zod.z.string().url().optional(),
|
|
16664
|
+
token: zod.z.string().min(1).optional(),
|
|
16665
|
+
urlEnv: zod.z.string().min(1).optional(),
|
|
16666
|
+
tokenEnv: zod.z.string().min(1).optional()
|
|
16670
16667
|
}).optional(),
|
|
16671
|
-
|
|
16672
|
-
|
|
16673
|
-
|
|
16674
|
-
|
|
16675
|
-
|
|
16676
|
-
|
|
16677
|
-
authTokenEnv: zod.z.string().optional(),
|
|
16678
|
-
localPath: zod.z.string().optional()
|
|
16679
|
-
}).optional()
|
|
16680
|
-
}).optional(),
|
|
16681
|
-
rerank: zod.z.object({
|
|
16682
|
-
enabled: zod.z.boolean().optional(),
|
|
16683
|
-
topN: zod.z.number().int().positive().optional(),
|
|
16684
|
-
model: zod.z.string().optional()
|
|
16668
|
+
search: zod.z.object({
|
|
16669
|
+
semanticWeight: zod.z.number().min(0).max(1).optional(),
|
|
16670
|
+
inputEnrichment: zod.z.boolean().optional(),
|
|
16671
|
+
reranking: zod.z.boolean().optional(),
|
|
16672
|
+
dualSearch: zod.z.boolean().optional(),
|
|
16673
|
+
pageSearchWeight: zod.z.number().min(0).max(1).optional()
|
|
16685
16674
|
}).optional(),
|
|
16686
16675
|
ranking: zod.z.object({
|
|
16687
16676
|
enableIncomingLinkBoost: zod.z.boolean().optional(),
|
|
@@ -16691,11 +16680,12 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16691
16680
|
aggregationDecay: zod.z.number().min(0).max(1).optional(),
|
|
16692
16681
|
minChunkScoreRatio: zod.z.number().min(0).max(1).optional(),
|
|
16693
16682
|
minScore: zod.z.number().min(0).max(1).optional(),
|
|
16683
|
+
scoreGapThreshold: zod.z.number().min(0).max(1).optional(),
|
|
16694
16684
|
weights: zod.z.object({
|
|
16695
16685
|
incomingLinks: zod.z.number().optional(),
|
|
16696
16686
|
depth: zod.z.number().optional(),
|
|
16697
|
-
|
|
16698
|
-
|
|
16687
|
+
aggregation: zod.z.number().optional(),
|
|
16688
|
+
titleMatch: zod.z.number().optional()
|
|
16699
16689
|
}).optional()
|
|
16700
16690
|
}).optional(),
|
|
16701
16691
|
api: zod.z.object({
|
|
@@ -16717,8 +16707,7 @@ var searchSocketConfigSchema = zod.z.object({
|
|
|
16717
16707
|
}).optional()
|
|
16718
16708
|
}).optional(),
|
|
16719
16709
|
state: zod.z.object({
|
|
16720
|
-
dir: zod.z.string().optional()
|
|
16721
|
-
writeMirror: zod.z.boolean().optional()
|
|
16710
|
+
dir: zod.z.string().optional()
|
|
16722
16711
|
}).optional()
|
|
16723
16712
|
});
|
|
16724
16713
|
|
|
@@ -16772,24 +16761,16 @@ function createDefaultConfig(projectId) {
|
|
|
16772
16761
|
prependTitle: true,
|
|
16773
16762
|
pageSummaryChunk: true
|
|
16774
16763
|
},
|
|
16775
|
-
|
|
16776
|
-
|
|
16777
|
-
|
|
16778
|
-
apiKeyEnv: "JINA_API_KEY",
|
|
16779
|
-
batchSize: 64,
|
|
16780
|
-
concurrency: 4
|
|
16781
|
-
},
|
|
16782
|
-
vector: {
|
|
16783
|
-
turso: {
|
|
16784
|
-
urlEnv: "TURSO_DATABASE_URL",
|
|
16785
|
-
authTokenEnv: "TURSO_AUTH_TOKEN",
|
|
16786
|
-
localPath: ".searchsocket/vectors.db"
|
|
16787
|
-
}
|
|
16764
|
+
upstash: {
|
|
16765
|
+
urlEnv: "UPSTASH_SEARCH_REST_URL",
|
|
16766
|
+
tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
|
|
16788
16767
|
},
|
|
16789
|
-
|
|
16790
|
-
|
|
16791
|
-
|
|
16792
|
-
|
|
16768
|
+
search: {
|
|
16769
|
+
semanticWeight: 0.75,
|
|
16770
|
+
inputEnrichment: true,
|
|
16771
|
+
reranking: true,
|
|
16772
|
+
dualSearch: true,
|
|
16773
|
+
pageSearchWeight: 0.3
|
|
16793
16774
|
},
|
|
16794
16775
|
ranking: {
|
|
16795
16776
|
enableIncomingLinkBoost: true,
|
|
@@ -16798,12 +16779,13 @@ function createDefaultConfig(projectId) {
|
|
|
16798
16779
|
aggregationCap: 5,
|
|
16799
16780
|
aggregationDecay: 0.5,
|
|
16800
16781
|
minChunkScoreRatio: 0.5,
|
|
16801
|
-
minScore: 0,
|
|
16782
|
+
minScore: 0.3,
|
|
16783
|
+
scoreGapThreshold: 0.4,
|
|
16802
16784
|
weights: {
|
|
16803
16785
|
incomingLinks: 0.05,
|
|
16804
16786
|
depth: 0.03,
|
|
16805
|
-
|
|
16806
|
-
|
|
16787
|
+
aggregation: 0.1,
|
|
16788
|
+
titleMatch: 0.15
|
|
16807
16789
|
}
|
|
16808
16790
|
},
|
|
16809
16791
|
api: {
|
|
@@ -16821,8 +16803,7 @@ function createDefaultConfig(projectId) {
|
|
|
16821
16803
|
}
|
|
16822
16804
|
},
|
|
16823
16805
|
state: {
|
|
16824
|
-
dir: ".searchsocket"
|
|
16825
|
-
writeMirror: false
|
|
16806
|
+
dir: ".searchsocket"
|
|
16826
16807
|
}
|
|
16827
16808
|
};
|
|
16828
16809
|
}
|
|
@@ -16946,21 +16927,13 @@ ${issues}`
|
|
|
16946
16927
|
...defaults.chunking,
|
|
16947
16928
|
...parsed.chunking
|
|
16948
16929
|
},
|
|
16949
|
-
|
|
16950
|
-
...defaults.
|
|
16951
|
-
...parsed.
|
|
16930
|
+
upstash: {
|
|
16931
|
+
...defaults.upstash,
|
|
16932
|
+
...parsed.upstash
|
|
16952
16933
|
},
|
|
16953
|
-
|
|
16954
|
-
...defaults.
|
|
16955
|
-
...parsed.
|
|
16956
|
-
turso: {
|
|
16957
|
-
...defaults.vector.turso,
|
|
16958
|
-
...parsed.vector?.turso
|
|
16959
|
-
}
|
|
16960
|
-
},
|
|
16961
|
-
rerank: {
|
|
16962
|
-
...defaults.rerank,
|
|
16963
|
-
...parsed.rerank
|
|
16934
|
+
search: {
|
|
16935
|
+
...defaults.search,
|
|
16936
|
+
...parsed.search
|
|
16964
16937
|
},
|
|
16965
16938
|
ranking: {
|
|
16966
16939
|
...defaults.ranking,
|
|
@@ -17051,128 +17024,6 @@ async function loadConfig(options = {}) {
|
|
|
17051
17024
|
function isServerless() {
|
|
17052
17025
|
return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
|
|
17053
17026
|
}
|
|
17054
|
-
function sleep(ms) {
|
|
17055
|
-
return new Promise((resolve) => {
|
|
17056
|
-
setTimeout(resolve, ms);
|
|
17057
|
-
});
|
|
17058
|
-
}
|
|
17059
|
-
var JinaEmbeddingsProvider = class {
|
|
17060
|
-
apiKey;
|
|
17061
|
-
batchSize;
|
|
17062
|
-
concurrency;
|
|
17063
|
-
defaultTask;
|
|
17064
|
-
constructor(options) {
|
|
17065
|
-
if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
|
|
17066
|
-
throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
|
|
17067
|
-
}
|
|
17068
|
-
if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
|
|
17069
|
-
throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
|
|
17070
|
-
}
|
|
17071
|
-
this.apiKey = options.apiKey;
|
|
17072
|
-
this.batchSize = options.batchSize;
|
|
17073
|
-
this.concurrency = options.concurrency;
|
|
17074
|
-
this.defaultTask = options.task ?? "retrieval.passage";
|
|
17075
|
-
}
|
|
17076
|
-
estimateTokens(text) {
|
|
17077
|
-
const normalized = text.trim();
|
|
17078
|
-
if (!normalized) {
|
|
17079
|
-
return 0;
|
|
17080
|
-
}
|
|
17081
|
-
const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
|
|
17082
|
-
const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
|
|
17083
|
-
const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
|
|
17084
|
-
const charEstimate = Math.ceil(normalized.length / 4);
|
|
17085
|
-
const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
|
|
17086
|
-
return Math.max(1, Math.max(charEstimate, lexicalEstimate));
|
|
17087
|
-
}
|
|
17088
|
-
async embedTexts(texts, modelId, task) {
|
|
17089
|
-
if (texts.length === 0) {
|
|
17090
|
-
return [];
|
|
17091
|
-
}
|
|
17092
|
-
const batches = [];
|
|
17093
|
-
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
17094
|
-
batches.push({
|
|
17095
|
-
index: i,
|
|
17096
|
-
values: texts.slice(i, i + this.batchSize)
|
|
17097
|
-
});
|
|
17098
|
-
}
|
|
17099
|
-
const outputs = new Array(batches.length);
|
|
17100
|
-
const limit = pLimit2__default.default(this.concurrency);
|
|
17101
|
-
await Promise.all(
|
|
17102
|
-
batches.map(
|
|
17103
|
-
(batch, position) => limit(async () => {
|
|
17104
|
-
outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
|
|
17105
|
-
})
|
|
17106
|
-
)
|
|
17107
|
-
);
|
|
17108
|
-
return outputs.flat();
|
|
17109
|
-
}
|
|
17110
|
-
async embedWithRetry(texts, modelId, task) {
|
|
17111
|
-
const maxAttempts = 5;
|
|
17112
|
-
let attempt = 0;
|
|
17113
|
-
while (attempt < maxAttempts) {
|
|
17114
|
-
attempt += 1;
|
|
17115
|
-
let response;
|
|
17116
|
-
try {
|
|
17117
|
-
response = await fetch("https://api.jina.ai/v1/embeddings", {
|
|
17118
|
-
method: "POST",
|
|
17119
|
-
headers: {
|
|
17120
|
-
"content-type": "application/json",
|
|
17121
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17122
|
-
},
|
|
17123
|
-
body: JSON.stringify({
|
|
17124
|
-
model: modelId,
|
|
17125
|
-
input: texts,
|
|
17126
|
-
task
|
|
17127
|
-
})
|
|
17128
|
-
});
|
|
17129
|
-
} catch (error) {
|
|
17130
|
-
if (attempt >= maxAttempts) {
|
|
17131
|
-
throw error;
|
|
17132
|
-
}
|
|
17133
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17134
|
-
continue;
|
|
17135
|
-
}
|
|
17136
|
-
if (!response.ok) {
|
|
17137
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17138
|
-
if (!retryable || attempt >= maxAttempts) {
|
|
17139
|
-
const errorBody = await response.text();
|
|
17140
|
-
throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
|
|
17141
|
-
}
|
|
17142
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17143
|
-
continue;
|
|
17144
|
-
}
|
|
17145
|
-
const payload = await response.json();
|
|
17146
|
-
if (!payload.data || !Array.isArray(payload.data)) {
|
|
17147
|
-
throw new Error("Invalid Jina embeddings response format");
|
|
17148
|
-
}
|
|
17149
|
-
return payload.data.map((entry) => entry.embedding);
|
|
17150
|
-
}
|
|
17151
|
-
throw new Error("Unreachable retry state");
|
|
17152
|
-
}
|
|
17153
|
-
};
|
|
17154
|
-
|
|
17155
|
-
// src/embeddings/factory.ts
|
|
17156
|
-
function createEmbeddingsProvider(config) {
|
|
17157
|
-
if (config.embeddings.provider !== "jina") {
|
|
17158
|
-
throw new SearchSocketError(
|
|
17159
|
-
"CONFIG_MISSING",
|
|
17160
|
-
`Unsupported embeddings provider ${config.embeddings.provider}`
|
|
17161
|
-
);
|
|
17162
|
-
}
|
|
17163
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17164
|
-
if (!apiKey) {
|
|
17165
|
-
throw new SearchSocketError(
|
|
17166
|
-
"CONFIG_MISSING",
|
|
17167
|
-
`Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
|
|
17168
|
-
);
|
|
17169
|
-
}
|
|
17170
|
-
return new JinaEmbeddingsProvider({
|
|
17171
|
-
apiKey,
|
|
17172
|
-
batchSize: config.embeddings.batchSize,
|
|
17173
|
-
concurrency: config.embeddings.concurrency
|
|
17174
|
-
});
|
|
17175
|
-
}
|
|
17176
17027
|
|
|
17177
17028
|
// src/utils/text.ts
|
|
17178
17029
|
function normalizeText(input) {
|
|
@@ -17247,103 +17098,6 @@ function resolveScope(config, override) {
|
|
|
17247
17098
|
};
|
|
17248
17099
|
}
|
|
17249
17100
|
|
|
17250
|
-
// src/rerank/jina.ts
|
|
17251
|
-
function sleep2(ms) {
|
|
17252
|
-
return new Promise((resolve) => {
|
|
17253
|
-
setTimeout(resolve, ms);
|
|
17254
|
-
});
|
|
17255
|
-
}
|
|
17256
|
-
var JinaReranker = class {
|
|
17257
|
-
apiKey;
|
|
17258
|
-
model;
|
|
17259
|
-
maxRetries;
|
|
17260
|
-
constructor(options) {
|
|
17261
|
-
this.apiKey = options.apiKey;
|
|
17262
|
-
this.model = options.model;
|
|
17263
|
-
this.maxRetries = options.maxRetries ?? 2;
|
|
17264
|
-
}
|
|
17265
|
-
async rerank(query, candidates, topN) {
|
|
17266
|
-
if (candidates.length === 0) {
|
|
17267
|
-
return [];
|
|
17268
|
-
}
|
|
17269
|
-
const body = {
|
|
17270
|
-
model: this.model,
|
|
17271
|
-
query,
|
|
17272
|
-
documents: candidates.map((candidate) => candidate.text),
|
|
17273
|
-
top_n: topN ?? candidates.length,
|
|
17274
|
-
return_documents: false
|
|
17275
|
-
};
|
|
17276
|
-
let attempt = 0;
|
|
17277
|
-
while (attempt <= this.maxRetries) {
|
|
17278
|
-
attempt += 1;
|
|
17279
|
-
let response;
|
|
17280
|
-
try {
|
|
17281
|
-
response = await fetch("https://api.jina.ai/v1/rerank", {
|
|
17282
|
-
method: "POST",
|
|
17283
|
-
headers: {
|
|
17284
|
-
"content-type": "application/json",
|
|
17285
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17286
|
-
},
|
|
17287
|
-
body: JSON.stringify(body)
|
|
17288
|
-
});
|
|
17289
|
-
} catch (error) {
|
|
17290
|
-
if (attempt <= this.maxRetries) {
|
|
17291
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17292
|
-
continue;
|
|
17293
|
-
}
|
|
17294
|
-
throw error;
|
|
17295
|
-
}
|
|
17296
|
-
if (!response.ok) {
|
|
17297
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17298
|
-
if (retryable && attempt <= this.maxRetries) {
|
|
17299
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17300
|
-
continue;
|
|
17301
|
-
}
|
|
17302
|
-
const errorBody = await response.text();
|
|
17303
|
-
throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
|
|
17304
|
-
}
|
|
17305
|
-
const payload = await response.json();
|
|
17306
|
-
const rawResults = payload.results ?? payload.data ?? [];
|
|
17307
|
-
if (!Array.isArray(rawResults)) {
|
|
17308
|
-
throw new Error("Invalid Jina rerank response format");
|
|
17309
|
-
}
|
|
17310
|
-
return rawResults.flatMap((item) => {
|
|
17311
|
-
const index = item.index;
|
|
17312
|
-
if (typeof index !== "number" || index < 0 || index >= candidates.length) {
|
|
17313
|
-
return [];
|
|
17314
|
-
}
|
|
17315
|
-
const candidate = candidates[index];
|
|
17316
|
-
if (!candidate) {
|
|
17317
|
-
return [];
|
|
17318
|
-
}
|
|
17319
|
-
const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
|
|
17320
|
-
return [
|
|
17321
|
-
{
|
|
17322
|
-
id: candidate.id,
|
|
17323
|
-
score
|
|
17324
|
-
}
|
|
17325
|
-
];
|
|
17326
|
-
}).sort((a, b) => b.score - a.score);
|
|
17327
|
-
}
|
|
17328
|
-
throw new Error("Jina rerank request failed after retries");
|
|
17329
|
-
}
|
|
17330
|
-
};
|
|
17331
|
-
|
|
17332
|
-
// src/rerank/factory.ts
|
|
17333
|
-
function createReranker(config) {
|
|
17334
|
-
if (!config.rerank.enabled) {
|
|
17335
|
-
return null;
|
|
17336
|
-
}
|
|
17337
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17338
|
-
if (!apiKey) {
|
|
17339
|
-
return null;
|
|
17340
|
-
}
|
|
17341
|
-
return new JinaReranker({
|
|
17342
|
-
apiKey,
|
|
17343
|
-
model: config.rerank.model
|
|
17344
|
-
});
|
|
17345
|
-
}
|
|
17346
|
-
|
|
17347
17101
|
// src/utils/time.ts
|
|
17348
17102
|
function nowIso() {
|
|
17349
17103
|
return (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -17362,13 +17116,6 @@ function normalizeUrlPath(rawPath) {
|
|
|
17362
17116
|
}
|
|
17363
17117
|
return out;
|
|
17364
17118
|
}
|
|
17365
|
-
function urlPathToMirrorRelative(urlPath) {
|
|
17366
|
-
const normalized = normalizeUrlPath(urlPath);
|
|
17367
|
-
if (normalized === "/") {
|
|
17368
|
-
return "index.md";
|
|
17369
|
-
}
|
|
17370
|
-
return `${normalized.slice(1)}.md`;
|
|
17371
|
-
}
|
|
17372
17119
|
function staticHtmlFileToUrl(filePath, rootDir) {
|
|
17373
17120
|
const relative = path__default.default.relative(rootDir, filePath).replace(/\\/g, "/");
|
|
17374
17121
|
if (relative === "index.html") {
|
|
@@ -17402,434 +17149,239 @@ function joinUrl(baseUrl, route) {
|
|
|
17402
17149
|
return `${base}${routePart}`;
|
|
17403
17150
|
}
|
|
17404
17151
|
|
|
17405
|
-
// src/vector/
|
|
17406
|
-
|
|
17152
|
+
// src/vector/upstash.ts
|
|
17153
|
+
function chunkIndexName(scope) {
|
|
17154
|
+
return `${scope.projectId}--${scope.scopeName}`;
|
|
17155
|
+
}
|
|
17156
|
+
function pageIndexName(scope) {
|
|
17157
|
+
return `${scope.projectId}--${scope.scopeName}--pages`;
|
|
17158
|
+
}
|
|
17159
|
+
var UpstashSearchStore = class {
|
|
17407
17160
|
client;
|
|
17408
|
-
dimension;
|
|
17409
|
-
chunksReady = false;
|
|
17410
|
-
registryReady = false;
|
|
17411
|
-
pagesReady = false;
|
|
17412
17161
|
constructor(opts) {
|
|
17413
17162
|
this.client = opts.client;
|
|
17414
|
-
this.dimension = opts.dimension;
|
|
17415
|
-
}
|
|
17416
|
-
async ensureRegistry() {
|
|
17417
|
-
if (this.registryReady) return;
|
|
17418
|
-
await this.client.execute(`
|
|
17419
|
-
CREATE TABLE IF NOT EXISTS registry (
|
|
17420
|
-
scope_key TEXT PRIMARY KEY,
|
|
17421
|
-
project_id TEXT NOT NULL,
|
|
17422
|
-
scope_name TEXT NOT NULL,
|
|
17423
|
-
model_id TEXT NOT NULL,
|
|
17424
|
-
last_indexed_at TEXT NOT NULL,
|
|
17425
|
-
vector_count INTEGER,
|
|
17426
|
-
last_estimate_tokens INTEGER,
|
|
17427
|
-
last_estimate_cost_usd REAL,
|
|
17428
|
-
last_estimate_changed_chunks INTEGER
|
|
17429
|
-
)
|
|
17430
|
-
`);
|
|
17431
|
-
const estimateCols = [
|
|
17432
|
-
{ name: "last_estimate_tokens", def: "INTEGER" },
|
|
17433
|
-
{ name: "last_estimate_cost_usd", def: "REAL" },
|
|
17434
|
-
{ name: "last_estimate_changed_chunks", def: "INTEGER" }
|
|
17435
|
-
];
|
|
17436
|
-
for (const col of estimateCols) {
|
|
17437
|
-
try {
|
|
17438
|
-
await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
|
|
17439
|
-
} catch (error) {
|
|
17440
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17441
|
-
throw error;
|
|
17442
|
-
}
|
|
17443
|
-
}
|
|
17444
|
-
}
|
|
17445
|
-
this.registryReady = true;
|
|
17446
|
-
}
|
|
17447
|
-
async ensureChunks(dim) {
|
|
17448
|
-
if (this.chunksReady) return;
|
|
17449
|
-
const exists = await this.chunksTableExists();
|
|
17450
|
-
if (exists) {
|
|
17451
|
-
const currentDim = await this.getChunksDimension();
|
|
17452
|
-
if (currentDim !== null && currentDim !== dim) {
|
|
17453
|
-
await this.client.batch([
|
|
17454
|
-
"DROP INDEX IF EXISTS idx",
|
|
17455
|
-
"DROP TABLE IF EXISTS chunks"
|
|
17456
|
-
]);
|
|
17457
|
-
}
|
|
17458
|
-
}
|
|
17459
|
-
await this.client.batch([
|
|
17460
|
-
`CREATE TABLE IF NOT EXISTS chunks (
|
|
17461
|
-
id TEXT PRIMARY KEY,
|
|
17462
|
-
project_id TEXT NOT NULL,
|
|
17463
|
-
scope_name TEXT NOT NULL,
|
|
17464
|
-
url TEXT NOT NULL,
|
|
17465
|
-
path TEXT NOT NULL,
|
|
17466
|
-
title TEXT NOT NULL,
|
|
17467
|
-
section_title TEXT NOT NULL DEFAULT '',
|
|
17468
|
-
heading_path TEXT NOT NULL DEFAULT '[]',
|
|
17469
|
-
snippet TEXT NOT NULL DEFAULT '',
|
|
17470
|
-
chunk_text TEXT NOT NULL DEFAULT '',
|
|
17471
|
-
ordinal INTEGER NOT NULL DEFAULT 0,
|
|
17472
|
-
content_hash TEXT NOT NULL DEFAULT '',
|
|
17473
|
-
model_id TEXT NOT NULL DEFAULT '',
|
|
17474
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17475
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17476
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17477
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17478
|
-
description TEXT NOT NULL DEFAULT '',
|
|
17479
|
-
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17480
|
-
embedding F32_BLOB(${dim})
|
|
17481
|
-
)`,
|
|
17482
|
-
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17483
|
-
]);
|
|
17484
|
-
this.chunksReady = true;
|
|
17485
|
-
}
|
|
17486
|
-
async ensurePages() {
|
|
17487
|
-
if (this.pagesReady) return;
|
|
17488
|
-
await this.client.execute(`
|
|
17489
|
-
CREATE TABLE IF NOT EXISTS pages (
|
|
17490
|
-
project_id TEXT NOT NULL,
|
|
17491
|
-
scope_name TEXT NOT NULL,
|
|
17492
|
-
url TEXT NOT NULL,
|
|
17493
|
-
title TEXT NOT NULL,
|
|
17494
|
-
markdown TEXT NOT NULL,
|
|
17495
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17496
|
-
route_resolution TEXT NOT NULL DEFAULT 'exact',
|
|
17497
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17498
|
-
outgoing_links INTEGER NOT NULL DEFAULT 0,
|
|
17499
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17500
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17501
|
-
indexed_at TEXT NOT NULL,
|
|
17502
|
-
PRIMARY KEY (project_id, scope_name, url)
|
|
17503
|
-
)
|
|
17504
|
-
`);
|
|
17505
|
-
this.pagesReady = true;
|
|
17506
17163
|
}
|
|
17507
|
-
|
|
17508
|
-
|
|
17509
|
-
await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
|
|
17510
|
-
return true;
|
|
17511
|
-
} catch (error) {
|
|
17512
|
-
if (error instanceof Error && error.message.includes("no such table")) {
|
|
17513
|
-
return false;
|
|
17514
|
-
}
|
|
17515
|
-
throw error;
|
|
17516
|
-
}
|
|
17164
|
+
chunkIndex(scope) {
|
|
17165
|
+
return this.client.index(chunkIndexName(scope));
|
|
17517
17166
|
}
|
|
17518
|
-
|
|
17519
|
-
|
|
17520
|
-
* Returns null if the table doesn't exist or the dimension can't be parsed.
|
|
17521
|
-
*/
|
|
17522
|
-
async getChunksDimension() {
|
|
17523
|
-
try {
|
|
17524
|
-
const rs = await this.client.execute(
|
|
17525
|
-
"SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
|
|
17526
|
-
);
|
|
17527
|
-
if (rs.rows.length === 0) return null;
|
|
17528
|
-
const sql = rs.rows[0].sql;
|
|
17529
|
-
const match = sql.match(/F32_BLOB\((\d+)\)/i);
|
|
17530
|
-
return match ? parseInt(match[1], 10) : null;
|
|
17531
|
-
} catch {
|
|
17532
|
-
return null;
|
|
17533
|
-
}
|
|
17167
|
+
pageIndex(scope) {
|
|
17168
|
+
return this.client.index(pageIndexName(scope));
|
|
17534
17169
|
}
|
|
17535
|
-
|
|
17536
|
-
|
|
17537
|
-
|
|
17538
|
-
*/
|
|
17539
|
-
async dropAllTables() {
|
|
17540
|
-
await this.client.batch([
|
|
17541
|
-
"DROP INDEX IF EXISTS idx",
|
|
17542
|
-
"DROP TABLE IF EXISTS chunks",
|
|
17543
|
-
"DROP TABLE IF EXISTS registry",
|
|
17544
|
-
"DROP TABLE IF EXISTS pages"
|
|
17545
|
-
]);
|
|
17546
|
-
this.chunksReady = false;
|
|
17547
|
-
this.registryReady = false;
|
|
17548
|
-
this.pagesReady = false;
|
|
17549
|
-
}
|
|
17550
|
-
async upsert(records, _scope) {
|
|
17551
|
-
if (records.length === 0) return;
|
|
17552
|
-
const dim = this.dimension ?? records[0].vector.length;
|
|
17553
|
-
await this.ensureChunks(dim);
|
|
17170
|
+
async upsertChunks(chunks, scope) {
|
|
17171
|
+
if (chunks.length === 0) return;
|
|
17172
|
+
const index = this.chunkIndex(scope);
|
|
17554
17173
|
const BATCH_SIZE = 100;
|
|
17555
|
-
for (let i = 0; i <
|
|
17556
|
-
const batch =
|
|
17557
|
-
|
|
17558
|
-
sql: `INSERT OR REPLACE INTO chunks
|
|
17559
|
-
(id, project_id, scope_name, url, path, title, section_title,
|
|
17560
|
-
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17561
|
-
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17562
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17563
|
-
args: [
|
|
17564
|
-
r.id,
|
|
17565
|
-
r.metadata.projectId,
|
|
17566
|
-
r.metadata.scopeName,
|
|
17567
|
-
r.metadata.url,
|
|
17568
|
-
r.metadata.path,
|
|
17569
|
-
r.metadata.title,
|
|
17570
|
-
r.metadata.sectionTitle,
|
|
17571
|
-
JSON.stringify(r.metadata.headingPath),
|
|
17572
|
-
r.metadata.snippet,
|
|
17573
|
-
r.metadata.chunkText,
|
|
17574
|
-
r.metadata.ordinal,
|
|
17575
|
-
r.metadata.contentHash,
|
|
17576
|
-
r.metadata.modelId,
|
|
17577
|
-
r.metadata.depth,
|
|
17578
|
-
r.metadata.incomingLinks,
|
|
17579
|
-
r.metadata.routeFile,
|
|
17580
|
-
JSON.stringify(r.metadata.tags),
|
|
17581
|
-
r.metadata.description ?? "",
|
|
17582
|
-
JSON.stringify(r.metadata.keywords ?? []),
|
|
17583
|
-
JSON.stringify(r.vector)
|
|
17584
|
-
]
|
|
17585
|
-
}));
|
|
17586
|
-
await this.client.batch(stmts);
|
|
17174
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
17175
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
17176
|
+
await index.upsert(batch);
|
|
17587
17177
|
}
|
|
17588
17178
|
}
|
|
17589
|
-
async query
|
|
17590
|
-
const
|
|
17591
|
-
await
|
|
17592
|
-
|
|
17593
|
-
|
|
17594
|
-
|
|
17595
|
-
|
|
17596
|
-
|
|
17597
|
-
|
|
17598
|
-
c.description, c.keywords,
|
|
17599
|
-
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17600
|
-
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17601
|
-
JOIN chunks AS c ON c.rowid = v.id`,
|
|
17602
|
-
args: [queryJson, queryJson, opts.topK]
|
|
17179
|
+
async search(query, opts, scope) {
|
|
17180
|
+
const index = this.chunkIndex(scope);
|
|
17181
|
+
const results = await index.search({
|
|
17182
|
+
query,
|
|
17183
|
+
limit: opts.limit,
|
|
17184
|
+
semanticWeight: opts.semanticWeight,
|
|
17185
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17186
|
+
reranking: opts.reranking,
|
|
17187
|
+
filter: opts.filter
|
|
17603
17188
|
});
|
|
17604
|
-
|
|
17605
|
-
|
|
17606
|
-
|
|
17607
|
-
|
|
17608
|
-
|
|
17609
|
-
|
|
17610
|
-
|
|
17611
|
-
|
|
17612
|
-
|
|
17613
|
-
|
|
17614
|
-
|
|
17615
|
-
|
|
17616
|
-
|
|
17617
|
-
|
|
17618
|
-
|
|
17619
|
-
|
|
17620
|
-
|
|
17621
|
-
|
|
17622
|
-
|
|
17623
|
-
|
|
17624
|
-
|
|
17625
|
-
}
|
|
17189
|
+
return results.map((doc) => ({
|
|
17190
|
+
id: doc.id,
|
|
17191
|
+
score: doc.score,
|
|
17192
|
+
metadata: {
|
|
17193
|
+
projectId: doc.metadata?.projectId ?? "",
|
|
17194
|
+
scopeName: doc.metadata?.scopeName ?? "",
|
|
17195
|
+
url: doc.content.url,
|
|
17196
|
+
path: doc.metadata?.path ?? "",
|
|
17197
|
+
title: doc.content.title,
|
|
17198
|
+
sectionTitle: doc.content.sectionTitle,
|
|
17199
|
+
headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
|
|
17200
|
+
snippet: doc.metadata?.snippet ?? "",
|
|
17201
|
+
chunkText: doc.content.text,
|
|
17202
|
+
ordinal: doc.metadata?.ordinal ?? 0,
|
|
17203
|
+
contentHash: doc.metadata?.contentHash ?? "",
|
|
17204
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17205
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17206
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
17207
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17208
|
+
description: doc.metadata?.description || void 0,
|
|
17209
|
+
keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
|
|
17626
17210
|
}
|
|
17627
|
-
|
|
17628
|
-
|
|
17629
|
-
|
|
17630
|
-
|
|
17631
|
-
|
|
17632
|
-
|
|
17633
|
-
|
|
17634
|
-
|
|
17635
|
-
|
|
17636
|
-
|
|
17637
|
-
|
|
17638
|
-
|
|
17639
|
-
|
|
17640
|
-
scopeName,
|
|
17641
|
-
url: row.url,
|
|
17642
|
-
path: rowPath,
|
|
17643
|
-
title: row.title,
|
|
17644
|
-
sectionTitle: row.section_title,
|
|
17645
|
-
headingPath: JSON.parse(row.heading_path || "[]"),
|
|
17646
|
-
snippet: row.snippet,
|
|
17647
|
-
chunkText: row.chunk_text || "",
|
|
17648
|
-
ordinal: row.ordinal || 0,
|
|
17649
|
-
contentHash: row.content_hash,
|
|
17650
|
-
modelId: row.model_id,
|
|
17651
|
-
depth: row.depth,
|
|
17652
|
-
incomingLinks: row.incoming_links,
|
|
17653
|
-
routeFile: row.route_file,
|
|
17654
|
-
tags,
|
|
17655
|
-
description,
|
|
17656
|
-
keywords
|
|
17657
|
-
}
|
|
17211
|
+
}));
|
|
17212
|
+
}
|
|
17213
|
+
async searchPages(query, opts, scope) {
|
|
17214
|
+
const index = this.pageIndex(scope);
|
|
17215
|
+
let results;
|
|
17216
|
+
try {
|
|
17217
|
+
results = await index.search({
|
|
17218
|
+
query,
|
|
17219
|
+
limit: opts.limit,
|
|
17220
|
+
semanticWeight: opts.semanticWeight,
|
|
17221
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17222
|
+
reranking: true,
|
|
17223
|
+
filter: opts.filter
|
|
17658
17224
|
});
|
|
17225
|
+
} catch {
|
|
17226
|
+
return [];
|
|
17659
17227
|
}
|
|
17660
|
-
|
|
17661
|
-
|
|
17228
|
+
return results.map((doc) => ({
|
|
17229
|
+
id: doc.id,
|
|
17230
|
+
score: doc.score,
|
|
17231
|
+
title: doc.content.title,
|
|
17232
|
+
url: doc.content.url,
|
|
17233
|
+
description: doc.content.description ?? "",
|
|
17234
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17235
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17236
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17237
|
+
routeFile: doc.metadata?.routeFile ?? ""
|
|
17238
|
+
}));
|
|
17662
17239
|
}
|
|
17663
17240
|
async deleteByIds(ids, scope) {
|
|
17664
17241
|
if (ids.length === 0) return;
|
|
17242
|
+
const index = this.chunkIndex(scope);
|
|
17665
17243
|
const BATCH_SIZE = 500;
|
|
17666
17244
|
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
17667
17245
|
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
17668
|
-
|
|
17669
|
-
await this.client.execute({
|
|
17670
|
-
sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
|
|
17671
|
-
args: [scope.projectId, scope.scopeName, ...batch]
|
|
17672
|
-
});
|
|
17246
|
+
await index.delete(batch);
|
|
17673
17247
|
}
|
|
17674
17248
|
}
|
|
17675
17249
|
async deleteScope(scope) {
|
|
17676
|
-
await this.ensureRegistry();
|
|
17677
17250
|
try {
|
|
17678
|
-
|
|
17679
|
-
|
|
17680
|
-
|
|
17681
|
-
});
|
|
17682
|
-
} catch (error) {
|
|
17683
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17684
|
-
throw error;
|
|
17685
|
-
}
|
|
17251
|
+
const chunkIdx = this.chunkIndex(scope);
|
|
17252
|
+
await chunkIdx.deleteIndex();
|
|
17253
|
+
} catch {
|
|
17686
17254
|
}
|
|
17687
17255
|
try {
|
|
17688
|
-
|
|
17689
|
-
|
|
17690
|
-
|
|
17691
|
-
});
|
|
17692
|
-
} catch (error) {
|
|
17693
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17694
|
-
throw error;
|
|
17695
|
-
}
|
|
17256
|
+
const pageIdx = this.pageIndex(scope);
|
|
17257
|
+
await pageIdx.deleteIndex();
|
|
17258
|
+
} catch {
|
|
17696
17259
|
}
|
|
17697
|
-
await this.client.execute({
|
|
17698
|
-
sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17699
|
-
args: [scope.projectId, scope.scopeName]
|
|
17700
|
-
});
|
|
17701
|
-
}
|
|
17702
|
-
async listScopes(scopeProjectId) {
|
|
17703
|
-
await this.ensureRegistry();
|
|
17704
|
-
const rs = await this.client.execute({
|
|
17705
|
-
sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
|
|
17706
|
-
last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
|
|
17707
|
-
FROM registry WHERE project_id = ?`,
|
|
17708
|
-
args: [scopeProjectId]
|
|
17709
|
-
});
|
|
17710
|
-
return rs.rows.map((row) => ({
|
|
17711
|
-
projectId: row.project_id,
|
|
17712
|
-
scopeName: row.scope_name,
|
|
17713
|
-
modelId: row.model_id,
|
|
17714
|
-
lastIndexedAt: row.last_indexed_at,
|
|
17715
|
-
vectorCount: row.vector_count,
|
|
17716
|
-
lastEstimateTokens: row.last_estimate_tokens,
|
|
17717
|
-
lastEstimateCostUSD: row.last_estimate_cost_usd,
|
|
17718
|
-
lastEstimateChangedChunks: row.last_estimate_changed_chunks
|
|
17719
|
-
}));
|
|
17720
17260
|
}
|
|
17721
|
-
async
|
|
17722
|
-
await this.
|
|
17723
|
-
const
|
|
17724
|
-
|
|
17725
|
-
|
|
17726
|
-
|
|
17727
|
-
|
|
17728
|
-
|
|
17729
|
-
|
|
17730
|
-
|
|
17731
|
-
|
|
17732
|
-
|
|
17733
|
-
|
|
17734
|
-
|
|
17735
|
-
|
|
17736
|
-
|
|
17737
|
-
|
|
17738
|
-
|
|
17739
|
-
|
|
17740
|
-
|
|
17261
|
+
async listScopes(projectId) {
|
|
17262
|
+
const allIndexes = await this.client.listIndexes();
|
|
17263
|
+
const prefix = `${projectId}--`;
|
|
17264
|
+
const scopeNames = /* @__PURE__ */ new Set();
|
|
17265
|
+
for (const name of allIndexes) {
|
|
17266
|
+
if (name.startsWith(prefix) && !name.endsWith("--pages")) {
|
|
17267
|
+
const scopeName = name.slice(prefix.length);
|
|
17268
|
+
scopeNames.add(scopeName);
|
|
17269
|
+
}
|
|
17270
|
+
}
|
|
17271
|
+
const scopes = [];
|
|
17272
|
+
for (const scopeName of scopeNames) {
|
|
17273
|
+
const scope = {
|
|
17274
|
+
projectId,
|
|
17275
|
+
scopeName,
|
|
17276
|
+
scopeId: `${projectId}:${scopeName}`
|
|
17277
|
+
};
|
|
17278
|
+
try {
|
|
17279
|
+
const info = await this.chunkIndex(scope).info();
|
|
17280
|
+
scopes.push({
|
|
17281
|
+
projectId,
|
|
17282
|
+
scopeName,
|
|
17283
|
+
lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
17284
|
+
documentCount: info.documentCount
|
|
17285
|
+
});
|
|
17286
|
+
} catch {
|
|
17287
|
+
scopes.push({
|
|
17288
|
+
projectId,
|
|
17289
|
+
scopeName,
|
|
17290
|
+
lastIndexedAt: "unknown",
|
|
17291
|
+
documentCount: 0
|
|
17292
|
+
});
|
|
17293
|
+
}
|
|
17294
|
+
}
|
|
17295
|
+
return scopes;
|
|
17741
17296
|
}
|
|
17742
17297
|
async getContentHashes(scope) {
|
|
17743
|
-
const exists = await this.chunksTableExists();
|
|
17744
|
-
if (!exists) return /* @__PURE__ */ new Map();
|
|
17745
|
-
const rs = await this.client.execute({
|
|
17746
|
-
sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
|
|
17747
|
-
args: [scope.projectId, scope.scopeName]
|
|
17748
|
-
});
|
|
17749
17298
|
const map = /* @__PURE__ */ new Map();
|
|
17750
|
-
|
|
17751
|
-
|
|
17299
|
+
const index = this.chunkIndex(scope);
|
|
17300
|
+
let cursor = "0";
|
|
17301
|
+
try {
|
|
17302
|
+
for (; ; ) {
|
|
17303
|
+
const result = await index.range({ cursor, limit: 100 });
|
|
17304
|
+
for (const doc of result.documents) {
|
|
17305
|
+
if (doc.metadata?.contentHash) {
|
|
17306
|
+
map.set(doc.id, doc.metadata.contentHash);
|
|
17307
|
+
}
|
|
17308
|
+
}
|
|
17309
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17310
|
+
cursor = result.nextCursor;
|
|
17311
|
+
}
|
|
17312
|
+
} catch {
|
|
17752
17313
|
}
|
|
17753
17314
|
return map;
|
|
17754
17315
|
}
|
|
17755
17316
|
async upsertPages(pages, scope) {
|
|
17756
17317
|
if (pages.length === 0) return;
|
|
17757
|
-
|
|
17758
|
-
|
|
17759
|
-
if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
|
|
17760
|
-
throw new Error(
|
|
17761
|
-
`Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
|
|
17762
|
-
);
|
|
17763
|
-
}
|
|
17764
|
-
}
|
|
17765
|
-
const BATCH_SIZE = 100;
|
|
17318
|
+
const index = this.pageIndex(scope);
|
|
17319
|
+
const BATCH_SIZE = 50;
|
|
17766
17320
|
for (let i = 0; i < pages.length; i += BATCH_SIZE) {
|
|
17767
17321
|
const batch = pages.slice(i, i + BATCH_SIZE);
|
|
17768
|
-
const
|
|
17769
|
-
|
|
17770
|
-
|
|
17771
|
-
|
|
17772
|
-
|
|
17773
|
-
|
|
17774
|
-
p.
|
|
17775
|
-
p.
|
|
17776
|
-
p.
|
|
17777
|
-
p.
|
|
17778
|
-
|
|
17779
|
-
|
|
17780
|
-
p.
|
|
17781
|
-
p.
|
|
17782
|
-
p.
|
|
17783
|
-
p.
|
|
17784
|
-
|
|
17785
|
-
p.
|
|
17786
|
-
|
|
17322
|
+
const docs = batch.map((p) => ({
|
|
17323
|
+
id: p.url,
|
|
17324
|
+
content: {
|
|
17325
|
+
title: p.title,
|
|
17326
|
+
url: p.url,
|
|
17327
|
+
type: "page",
|
|
17328
|
+
description: p.description ?? "",
|
|
17329
|
+
keywords: (p.keywords ?? []).join(","),
|
|
17330
|
+
summary: p.summary ?? "",
|
|
17331
|
+
tags: p.tags.join(",")
|
|
17332
|
+
},
|
|
17333
|
+
metadata: {
|
|
17334
|
+
markdown: p.markdown,
|
|
17335
|
+
projectId: p.projectId,
|
|
17336
|
+
scopeName: p.scopeName,
|
|
17337
|
+
routeFile: p.routeFile,
|
|
17338
|
+
routeResolution: p.routeResolution,
|
|
17339
|
+
incomingLinks: p.incomingLinks,
|
|
17340
|
+
outgoingLinks: p.outgoingLinks,
|
|
17341
|
+
depth: p.depth,
|
|
17342
|
+
indexedAt: p.indexedAt
|
|
17343
|
+
}
|
|
17787
17344
|
}));
|
|
17788
|
-
await
|
|
17345
|
+
await index.upsert(docs);
|
|
17789
17346
|
}
|
|
17790
17347
|
}
|
|
17791
17348
|
async getPage(url, scope) {
|
|
17792
|
-
|
|
17793
|
-
|
|
17794
|
-
|
|
17795
|
-
|
|
17796
|
-
|
|
17797
|
-
|
|
17798
|
-
|
|
17799
|
-
|
|
17800
|
-
|
|
17801
|
-
|
|
17802
|
-
|
|
17803
|
-
|
|
17804
|
-
|
|
17805
|
-
|
|
17806
|
-
|
|
17807
|
-
|
|
17808
|
-
|
|
17809
|
-
|
|
17810
|
-
|
|
17811
|
-
|
|
17812
|
-
|
|
17349
|
+
const index = this.pageIndex(scope);
|
|
17350
|
+
try {
|
|
17351
|
+
const results = await index.fetch([url]);
|
|
17352
|
+
const doc = results[0];
|
|
17353
|
+
if (!doc) return null;
|
|
17354
|
+
return {
|
|
17355
|
+
url: doc.content.url,
|
|
17356
|
+
title: doc.content.title,
|
|
17357
|
+
markdown: doc.metadata.markdown,
|
|
17358
|
+
projectId: doc.metadata.projectId,
|
|
17359
|
+
scopeName: doc.metadata.scopeName,
|
|
17360
|
+
routeFile: doc.metadata.routeFile,
|
|
17361
|
+
routeResolution: doc.metadata.routeResolution,
|
|
17362
|
+
incomingLinks: doc.metadata.incomingLinks,
|
|
17363
|
+
outgoingLinks: doc.metadata.outgoingLinks,
|
|
17364
|
+
depth: doc.metadata.depth,
|
|
17365
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17366
|
+
indexedAt: doc.metadata.indexedAt,
|
|
17367
|
+
summary: doc.content.summary || void 0,
|
|
17368
|
+
description: doc.content.description || void 0,
|
|
17369
|
+
keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
|
|
17370
|
+
};
|
|
17371
|
+
} catch {
|
|
17372
|
+
return null;
|
|
17373
|
+
}
|
|
17813
17374
|
}
|
|
17814
17375
|
async deletePages(scope) {
|
|
17815
|
-
|
|
17816
|
-
|
|
17817
|
-
|
|
17818
|
-
|
|
17819
|
-
}
|
|
17820
|
-
}
|
|
17821
|
-
async getScopeModelId(scope) {
|
|
17822
|
-
await this.ensureRegistry();
|
|
17823
|
-
const rs = await this.client.execute({
|
|
17824
|
-
sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17825
|
-
args: [scope.projectId, scope.scopeName]
|
|
17826
|
-
});
|
|
17827
|
-
if (rs.rows.length === 0) return null;
|
|
17828
|
-
return rs.rows[0].model_id;
|
|
17376
|
+
try {
|
|
17377
|
+
const index = this.pageIndex(scope);
|
|
17378
|
+
await index.reset();
|
|
17379
|
+
} catch {
|
|
17380
|
+
}
|
|
17829
17381
|
}
|
|
17830
17382
|
async health() {
|
|
17831
17383
|
try {
|
|
17832
|
-
await this.client.
|
|
17384
|
+
await this.client.info();
|
|
17833
17385
|
return { ok: true };
|
|
17834
17386
|
} catch (error) {
|
|
17835
17387
|
return {
|
|
@@ -17838,40 +17390,34 @@ var TursoVectorStore = class {
|
|
|
17838
17390
|
};
|
|
17839
17391
|
}
|
|
17840
17392
|
}
|
|
17393
|
+
async dropAllIndexes(projectId) {
|
|
17394
|
+
const allIndexes = await this.client.listIndexes();
|
|
17395
|
+
const prefix = `${projectId}--`;
|
|
17396
|
+
for (const name of allIndexes) {
|
|
17397
|
+
if (name.startsWith(prefix)) {
|
|
17398
|
+
try {
|
|
17399
|
+
const index = this.client.index(name);
|
|
17400
|
+
await index.deleteIndex();
|
|
17401
|
+
} catch {
|
|
17402
|
+
}
|
|
17403
|
+
}
|
|
17404
|
+
}
|
|
17405
|
+
}
|
|
17841
17406
|
};
|
|
17842
17407
|
|
|
17843
17408
|
// src/vector/factory.ts
|
|
17844
|
-
async function
|
|
17845
|
-
const
|
|
17846
|
-
const
|
|
17847
|
-
if (
|
|
17848
|
-
const { createClient: createClient2 } = await import('@libsql/client/http');
|
|
17849
|
-
const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
|
|
17850
|
-
const client2 = createClient2({
|
|
17851
|
-
url: remoteUrl,
|
|
17852
|
-
authToken
|
|
17853
|
-
});
|
|
17854
|
-
return new TursoVectorStore({
|
|
17855
|
-
client: client2,
|
|
17856
|
-
dimension: config.vector.dimension
|
|
17857
|
-
});
|
|
17858
|
-
}
|
|
17859
|
-
if (isServerless()) {
|
|
17409
|
+
async function createUpstashStore(config) {
|
|
17410
|
+
const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
|
|
17411
|
+
const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
|
|
17412
|
+
if (!url || !token) {
|
|
17860
17413
|
throw new SearchSocketError(
|
|
17861
17414
|
"VECTOR_BACKEND_UNAVAILABLE",
|
|
17862
|
-
`
|
|
17415
|
+
`Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
|
|
17863
17416
|
);
|
|
17864
17417
|
}
|
|
17865
|
-
const {
|
|
17866
|
-
const
|
|
17867
|
-
|
|
17868
|
-
const client = createClient({
|
|
17869
|
-
url: `file:${localPath}`
|
|
17870
|
-
});
|
|
17871
|
-
return new TursoVectorStore({
|
|
17872
|
-
client,
|
|
17873
|
-
dimension: config.vector.dimension
|
|
17874
|
-
});
|
|
17418
|
+
const { Search } = await import('@upstash/search');
|
|
17419
|
+
const client = new Search({ url, token });
|
|
17420
|
+
return new UpstashSearchStore({ client });
|
|
17875
17421
|
}
|
|
17876
17422
|
|
|
17877
17423
|
// src/utils/pattern.ts
|
|
@@ -17911,7 +17457,12 @@ function nonNegativeOrZero(value) {
|
|
|
17911
17457
|
}
|
|
17912
17458
|
return Math.max(0, value);
|
|
17913
17459
|
}
|
|
17914
|
-
function
|
|
17460
|
+
function normalizeForTitleMatch(text) {
|
|
17461
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
|
|
17462
|
+
}
|
|
17463
|
+
function rankHits(hits, config, query) {
|
|
17464
|
+
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
17465
|
+
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
17915
17466
|
return hits.map((hit) => {
|
|
17916
17467
|
let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
17917
17468
|
if (config.ranking.enableIncomingLinkBoost) {
|
|
@@ -17922,6 +17473,12 @@ function rankHits(hits, config) {
|
|
|
17922
17473
|
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
|
|
17923
17474
|
score += depthBoost * config.ranking.weights.depth;
|
|
17924
17475
|
}
|
|
17476
|
+
if (normalizedQuery && titleMatchWeight > 0) {
|
|
17477
|
+
const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
|
|
17478
|
+
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
17479
|
+
score += titleMatchWeight;
|
|
17480
|
+
}
|
|
17481
|
+
}
|
|
17925
17482
|
return {
|
|
17926
17483
|
hit,
|
|
17927
17484
|
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
|
|
@@ -17931,6 +17488,30 @@ function rankHits(hits, config) {
|
|
|
17931
17488
|
return Number.isNaN(delta) ? 0 : delta;
|
|
17932
17489
|
});
|
|
17933
17490
|
}
|
|
17491
|
+
function trimByScoreGap(results, config) {
|
|
17492
|
+
if (results.length === 0) return results;
|
|
17493
|
+
const threshold = config.ranking.scoreGapThreshold;
|
|
17494
|
+
const minScore = config.ranking.minScore;
|
|
17495
|
+
if (minScore > 0 && results.length > 0) {
|
|
17496
|
+
const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
|
|
17497
|
+
const mid = Math.floor(sortedScores.length / 2);
|
|
17498
|
+
const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
|
|
17499
|
+
if (median < minScore) return [];
|
|
17500
|
+
}
|
|
17501
|
+
if (threshold > 0 && results.length > 1) {
|
|
17502
|
+
for (let i = 1; i < results.length; i++) {
|
|
17503
|
+
const prev = results[i - 1].pageScore;
|
|
17504
|
+
const current = results[i].pageScore;
|
|
17505
|
+
if (prev > 0) {
|
|
17506
|
+
const gap = (prev - current) / prev;
|
|
17507
|
+
if (gap >= threshold) {
|
|
17508
|
+
return results.slice(0, i);
|
|
17509
|
+
}
|
|
17510
|
+
}
|
|
17511
|
+
}
|
|
17512
|
+
}
|
|
17513
|
+
return results;
|
|
17514
|
+
}
|
|
17934
17515
|
function findPageWeight(url, pageWeights) {
|
|
17935
17516
|
let bestPattern = "";
|
|
17936
17517
|
let bestWeight = 1;
|
|
@@ -17985,6 +17566,61 @@ function aggregateByPage(ranked, config) {
|
|
|
17985
17566
|
return Number.isNaN(delta) ? 0 : delta;
|
|
17986
17567
|
});
|
|
17987
17568
|
}
|
|
17569
|
+
function mergePageAndChunkResults(pageHits, rankedChunks, config) {
|
|
17570
|
+
if (pageHits.length === 0) return rankedChunks;
|
|
17571
|
+
const w = config.search.pageSearchWeight;
|
|
17572
|
+
const pageScoreMap = /* @__PURE__ */ new Map();
|
|
17573
|
+
for (const ph of pageHits) {
|
|
17574
|
+
pageScoreMap.set(ph.url, ph);
|
|
17575
|
+
}
|
|
17576
|
+
const pagesWithChunks = /* @__PURE__ */ new Set();
|
|
17577
|
+
const merged = rankedChunks.map((ranked) => {
|
|
17578
|
+
const url = ranked.hit.metadata.url;
|
|
17579
|
+
const pageHit = pageScoreMap.get(url);
|
|
17580
|
+
if (pageHit) {
|
|
17581
|
+
pagesWithChunks.add(url);
|
|
17582
|
+
const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
|
|
17583
|
+
return {
|
|
17584
|
+
hit: ranked.hit,
|
|
17585
|
+
finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
|
|
17586
|
+
};
|
|
17587
|
+
}
|
|
17588
|
+
return ranked;
|
|
17589
|
+
});
|
|
17590
|
+
for (const [url, pageHit] of pageScoreMap) {
|
|
17591
|
+
if (pagesWithChunks.has(url)) continue;
|
|
17592
|
+
const syntheticScore = pageHit.score * w;
|
|
17593
|
+
const syntheticHit = {
|
|
17594
|
+
id: `page:${url}`,
|
|
17595
|
+
score: pageHit.score,
|
|
17596
|
+
metadata: {
|
|
17597
|
+
projectId: "",
|
|
17598
|
+
scopeName: "",
|
|
17599
|
+
url: pageHit.url,
|
|
17600
|
+
path: pageHit.url,
|
|
17601
|
+
title: pageHit.title,
|
|
17602
|
+
sectionTitle: "",
|
|
17603
|
+
headingPath: [],
|
|
17604
|
+
snippet: pageHit.description || pageHit.title,
|
|
17605
|
+
chunkText: pageHit.description || pageHit.title,
|
|
17606
|
+
ordinal: 0,
|
|
17607
|
+
contentHash: "",
|
|
17608
|
+
depth: pageHit.depth,
|
|
17609
|
+
incomingLinks: pageHit.incomingLinks,
|
|
17610
|
+
routeFile: pageHit.routeFile,
|
|
17611
|
+
tags: pageHit.tags
|
|
17612
|
+
}
|
|
17613
|
+
};
|
|
17614
|
+
merged.push({
|
|
17615
|
+
hit: syntheticHit,
|
|
17616
|
+
finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
|
|
17617
|
+
});
|
|
17618
|
+
}
|
|
17619
|
+
return merged.sort((a, b) => {
|
|
17620
|
+
const delta = b.finalScore - a.finalScore;
|
|
17621
|
+
return Number.isNaN(delta) ? 0 : delta;
|
|
17622
|
+
});
|
|
17623
|
+
}
|
|
17988
17624
|
|
|
17989
17625
|
// src/search/engine.ts
|
|
17990
17626
|
var requestSchema = zod.z.object({
|
|
@@ -17993,35 +17629,25 @@ var requestSchema = zod.z.object({
|
|
|
17993
17629
|
scope: zod.z.string().optional(),
|
|
17994
17630
|
pathPrefix: zod.z.string().optional(),
|
|
17995
17631
|
tags: zod.z.array(zod.z.string()).optional(),
|
|
17996
|
-
|
|
17997
|
-
groupBy: zod.z.enum(["page", "chunk"]).optional(),
|
|
17998
|
-
stream: zod.z.boolean().optional()
|
|
17632
|
+
groupBy: zod.z.enum(["page", "chunk"]).optional()
|
|
17999
17633
|
});
|
|
18000
17634
|
var SearchEngine = class _SearchEngine {
|
|
18001
17635
|
cwd;
|
|
18002
17636
|
config;
|
|
18003
|
-
|
|
18004
|
-
vectorStore;
|
|
18005
|
-
reranker;
|
|
17637
|
+
store;
|
|
18006
17638
|
constructor(options) {
|
|
18007
17639
|
this.cwd = options.cwd;
|
|
18008
17640
|
this.config = options.config;
|
|
18009
|
-
this.
|
|
18010
|
-
this.vectorStore = options.vectorStore;
|
|
18011
|
-
this.reranker = options.reranker;
|
|
17641
|
+
this.store = options.store;
|
|
18012
17642
|
}
|
|
18013
17643
|
static async create(options = {}) {
|
|
18014
17644
|
const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
|
|
18015
17645
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
18016
|
-
const
|
|
18017
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
18018
|
-
const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
|
|
17646
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
18019
17647
|
return new _SearchEngine({
|
|
18020
17648
|
cwd,
|
|
18021
17649
|
config,
|
|
18022
|
-
|
|
18023
|
-
vectorStore,
|
|
18024
|
-
reranker
|
|
17650
|
+
store
|
|
18025
17651
|
});
|
|
18026
17652
|
}
|
|
18027
17653
|
getConfig() {
|
|
@@ -18035,142 +17661,90 @@ var SearchEngine = class _SearchEngine {
|
|
|
18035
17661
|
const input = parsed.data;
|
|
18036
17662
|
const totalStart = process.hrtime.bigint();
|
|
18037
17663
|
const resolvedScope = resolveScope(this.config, input.scope);
|
|
18038
|
-
await this.assertModelCompatibility(resolvedScope);
|
|
18039
17664
|
const topK = input.topK ?? 10;
|
|
18040
|
-
const wantsRerank = Boolean(input.rerank);
|
|
18041
17665
|
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
18042
17666
|
const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
|
|
18043
|
-
const
|
|
18044
|
-
|
|
18045
|
-
|
|
18046
|
-
|
|
18047
|
-
|
|
18048
|
-
|
|
18049
|
-
|
|
18050
|
-
|
|
18051
|
-
|
|
18052
|
-
|
|
18053
|
-
|
|
18054
|
-
|
|
18055
|
-
|
|
18056
|
-
|
|
18057
|
-
|
|
18058
|
-
|
|
18059
|
-
|
|
18060
|
-
|
|
18061
|
-
|
|
18062
|
-
|
|
18063
|
-
|
|
18064
|
-
|
|
18065
|
-
|
|
18066
|
-
|
|
18067
|
-
|
|
18068
|
-
|
|
18069
|
-
|
|
17667
|
+
const filterParts = [];
|
|
17668
|
+
if (input.pathPrefix) {
|
|
17669
|
+
const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
|
|
17670
|
+
filterParts.push(`url GLOB '${prefix}*'`);
|
|
17671
|
+
}
|
|
17672
|
+
if (input.tags && input.tags.length > 0) {
|
|
17673
|
+
for (const tag of input.tags) {
|
|
17674
|
+
filterParts.push(`tags GLOB '*${tag}*'`);
|
|
17675
|
+
}
|
|
17676
|
+
}
|
|
17677
|
+
const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
|
|
17678
|
+
const useDualSearch = this.config.search.dualSearch && groupByPage;
|
|
17679
|
+
const searchStart = process.hrtime.bigint();
|
|
17680
|
+
let ranked;
|
|
17681
|
+
if (useDualSearch) {
|
|
17682
|
+
const chunkLimit = Math.max(topK * 10, 100);
|
|
17683
|
+
const pageLimit = 20;
|
|
17684
|
+
const [pageHits, chunkHits] = await Promise.all([
|
|
17685
|
+
this.store.searchPages(
|
|
17686
|
+
input.q,
|
|
17687
|
+
{
|
|
17688
|
+
limit: pageLimit,
|
|
17689
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17690
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17691
|
+
filter
|
|
17692
|
+
},
|
|
17693
|
+
resolvedScope
|
|
17694
|
+
),
|
|
17695
|
+
this.store.search(
|
|
17696
|
+
input.q,
|
|
17697
|
+
{
|
|
17698
|
+
limit: chunkLimit,
|
|
17699
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17700
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17701
|
+
reranking: false,
|
|
17702
|
+
filter
|
|
17703
|
+
},
|
|
17704
|
+
resolvedScope
|
|
17705
|
+
)
|
|
17706
|
+
]);
|
|
17707
|
+
const rankedChunks = rankHits(chunkHits, this.config, input.q);
|
|
17708
|
+
ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
|
|
17709
|
+
} else {
|
|
17710
|
+
const hits = await this.store.search(
|
|
17711
|
+
input.q,
|
|
17712
|
+
{
|
|
17713
|
+
limit: candidateK,
|
|
17714
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17715
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17716
|
+
reranking: this.config.search.reranking,
|
|
17717
|
+
filter
|
|
17718
|
+
},
|
|
17719
|
+
resolvedScope
|
|
17720
|
+
);
|
|
17721
|
+
ranked = rankHits(hits, this.config, input.q);
|
|
18070
17722
|
}
|
|
18071
|
-
const
|
|
17723
|
+
const searchMs = hrTimeMs(searchStart);
|
|
17724
|
+
const results = this.buildResults(ranked, topK, groupByPage, input.q);
|
|
18072
17725
|
return {
|
|
18073
17726
|
q: input.q,
|
|
18074
17727
|
scope: resolvedScope.scopeName,
|
|
18075
17728
|
results,
|
|
18076
17729
|
meta: {
|
|
18077
17730
|
timingsMs: {
|
|
18078
|
-
|
|
18079
|
-
vector: Math.round(vectorMs),
|
|
18080
|
-
rerank: Math.round(rerankMs),
|
|
17731
|
+
search: Math.round(searchMs),
|
|
18081
17732
|
total: Math.round(hrTimeMs(totalStart))
|
|
18082
|
-
},
|
|
18083
|
-
usedRerank,
|
|
18084
|
-
modelId: this.config.embeddings.model
|
|
18085
|
-
}
|
|
18086
|
-
};
|
|
18087
|
-
}
|
|
18088
|
-
async *searchStreaming(request) {
|
|
18089
|
-
const parsed = requestSchema.safeParse(request);
|
|
18090
|
-
if (!parsed.success) {
|
|
18091
|
-
throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
|
|
18092
|
-
}
|
|
18093
|
-
const input = parsed.data;
|
|
18094
|
-
const wantsRerank = Boolean(input.rerank);
|
|
18095
|
-
if (!wantsRerank) {
|
|
18096
|
-
const response = await this.search(request);
|
|
18097
|
-
yield { phase: "initial", data: response };
|
|
18098
|
-
return;
|
|
18099
|
-
}
|
|
18100
|
-
const totalStart = process.hrtime.bigint();
|
|
18101
|
-
const resolvedScope = resolveScope(this.config, input.scope);
|
|
18102
|
-
await this.assertModelCompatibility(resolvedScope);
|
|
18103
|
-
const topK = input.topK ?? 10;
|
|
18104
|
-
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
18105
|
-
const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
|
|
18106
|
-
const embedStart = process.hrtime.bigint();
|
|
18107
|
-
const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
|
|
18108
|
-
const queryVector = queryEmbeddings[0];
|
|
18109
|
-
if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
|
|
18110
|
-
throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
|
|
18111
|
-
}
|
|
18112
|
-
const embedMs = hrTimeMs(embedStart);
|
|
18113
|
-
const vectorStart = process.hrtime.bigint();
|
|
18114
|
-
const hits = await this.vectorStore.query(
|
|
18115
|
-
queryVector,
|
|
18116
|
-
{
|
|
18117
|
-
topK: candidateK,
|
|
18118
|
-
pathPrefix: input.pathPrefix,
|
|
18119
|
-
tags: input.tags
|
|
18120
|
-
},
|
|
18121
|
-
resolvedScope
|
|
18122
|
-
);
|
|
18123
|
-
const vectorMs = hrTimeMs(vectorStart);
|
|
18124
|
-
const ranked = rankHits(hits, this.config);
|
|
18125
|
-
const initialResults = this.buildResults(ranked, topK, groupByPage);
|
|
18126
|
-
yield {
|
|
18127
|
-
phase: "initial",
|
|
18128
|
-
data: {
|
|
18129
|
-
q: input.q,
|
|
18130
|
-
scope: resolvedScope.scopeName,
|
|
18131
|
-
results: initialResults,
|
|
18132
|
-
meta: {
|
|
18133
|
-
timingsMs: {
|
|
18134
|
-
embed: Math.round(embedMs),
|
|
18135
|
-
vector: Math.round(vectorMs),
|
|
18136
|
-
rerank: 0,
|
|
18137
|
-
total: Math.round(hrTimeMs(totalStart))
|
|
18138
|
-
},
|
|
18139
|
-
usedRerank: false,
|
|
18140
|
-
modelId: this.config.embeddings.model
|
|
18141
|
-
}
|
|
18142
|
-
}
|
|
18143
|
-
};
|
|
18144
|
-
const rerankStart = process.hrtime.bigint();
|
|
18145
|
-
const reranked = await this.rerankHits(input.q, ranked, topK);
|
|
18146
|
-
const rerankMs = hrTimeMs(rerankStart);
|
|
18147
|
-
const rerankedResults = this.buildResults(reranked, topK, groupByPage);
|
|
18148
|
-
yield {
|
|
18149
|
-
phase: "reranked",
|
|
18150
|
-
data: {
|
|
18151
|
-
q: input.q,
|
|
18152
|
-
scope: resolvedScope.scopeName,
|
|
18153
|
-
results: rerankedResults,
|
|
18154
|
-
meta: {
|
|
18155
|
-
timingsMs: {
|
|
18156
|
-
embed: Math.round(embedMs),
|
|
18157
|
-
vector: Math.round(vectorMs),
|
|
18158
|
-
rerank: Math.round(rerankMs),
|
|
18159
|
-
total: Math.round(hrTimeMs(totalStart))
|
|
18160
|
-
},
|
|
18161
|
-
usedRerank: true,
|
|
18162
|
-
modelId: this.config.embeddings.model
|
|
18163
17733
|
}
|
|
18164
17734
|
}
|
|
18165
17735
|
};
|
|
18166
17736
|
}
|
|
18167
|
-
|
|
18168
|
-
const
|
|
17737
|
+
ensureSnippet(hit) {
|
|
17738
|
+
const snippet = hit.hit.metadata.snippet;
|
|
17739
|
+
if (snippet && snippet.length >= 30) return snippet;
|
|
17740
|
+
const chunkText = hit.hit.metadata.chunkText;
|
|
17741
|
+
if (chunkText) return toSnippet(chunkText);
|
|
17742
|
+
return snippet || "";
|
|
17743
|
+
}
|
|
17744
|
+
buildResults(ordered, topK, groupByPage, _query) {
|
|
18169
17745
|
if (groupByPage) {
|
|
18170
17746
|
let pages = aggregateByPage(ordered, this.config);
|
|
18171
|
-
|
|
18172
|
-
pages = pages.filter((p) => p.pageScore >= minScore);
|
|
18173
|
-
}
|
|
17747
|
+
pages = trimByScoreGap(pages, this.config);
|
|
18174
17748
|
const minRatio = this.config.ranking.minChunkScoreRatio;
|
|
18175
17749
|
return pages.slice(0, topK).map((page) => {
|
|
18176
17750
|
const bestScore = page.bestChunk.finalScore;
|
|
@@ -18180,12 +17754,12 @@ var SearchEngine = class _SearchEngine {
|
|
|
18180
17754
|
url: page.url,
|
|
18181
17755
|
title: page.title,
|
|
18182
17756
|
sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
|
|
18183
|
-
snippet: page.bestChunk
|
|
17757
|
+
snippet: this.ensureSnippet(page.bestChunk),
|
|
18184
17758
|
score: Number(page.pageScore.toFixed(6)),
|
|
18185
17759
|
routeFile: page.routeFile,
|
|
18186
17760
|
chunks: meaningful.length > 1 ? meaningful.map((c) => ({
|
|
18187
17761
|
sectionTitle: c.hit.metadata.sectionTitle || void 0,
|
|
18188
|
-
snippet: c
|
|
17762
|
+
snippet: this.ensureSnippet(c),
|
|
18189
17763
|
headingPath: c.hit.metadata.headingPath,
|
|
18190
17764
|
score: Number(c.finalScore.toFixed(6))
|
|
18191
17765
|
})) : void 0
|
|
@@ -18193,6 +17767,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18193
17767
|
});
|
|
18194
17768
|
} else {
|
|
18195
17769
|
let filtered = ordered;
|
|
17770
|
+
const minScore = this.config.ranking.minScore;
|
|
18196
17771
|
if (minScore > 0) {
|
|
18197
17772
|
filtered = ordered.filter((entry) => entry.finalScore >= minScore);
|
|
18198
17773
|
}
|
|
@@ -18200,7 +17775,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18200
17775
|
url: hit.metadata.url,
|
|
18201
17776
|
title: hit.metadata.title,
|
|
18202
17777
|
sectionTitle: hit.metadata.sectionTitle || void 0,
|
|
18203
|
-
snippet: hit
|
|
17778
|
+
snippet: this.ensureSnippet({ hit, finalScore }),
|
|
18204
17779
|
score: Number(finalScore.toFixed(6)),
|
|
18205
17780
|
routeFile: hit.metadata.routeFile
|
|
18206
17781
|
}));
|
|
@@ -18209,7 +17784,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18209
17784
|
async getPage(pathOrUrl, scope) {
|
|
18210
17785
|
const resolvedScope = resolveScope(this.config, scope);
|
|
18211
17786
|
const urlPath = this.resolveInputPath(pathOrUrl);
|
|
18212
|
-
const page = await this.
|
|
17787
|
+
const page = await this.store.getPage(urlPath, resolvedScope);
|
|
18213
17788
|
if (!page) {
|
|
18214
17789
|
throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
|
|
18215
17790
|
}
|
|
@@ -18230,7 +17805,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18230
17805
|
};
|
|
18231
17806
|
}
|
|
18232
17807
|
async health() {
|
|
18233
|
-
return this.
|
|
17808
|
+
return this.store.health();
|
|
18234
17809
|
}
|
|
18235
17810
|
resolveInputPath(pathOrUrl) {
|
|
18236
17811
|
try {
|
|
@@ -18242,90 +17817,6 @@ var SearchEngine = class _SearchEngine {
|
|
|
18242
17817
|
const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
|
|
18243
17818
|
return normalizeUrlPath(withoutQueryOrHash);
|
|
18244
17819
|
}
|
|
18245
|
-
async assertModelCompatibility(scope) {
|
|
18246
|
-
const modelId = await this.vectorStore.getScopeModelId(scope);
|
|
18247
|
-
if (modelId && modelId !== this.config.embeddings.model) {
|
|
18248
|
-
throw new SearchSocketError(
|
|
18249
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
18250
|
-
`Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
|
|
18251
|
-
);
|
|
18252
|
-
}
|
|
18253
|
-
}
|
|
18254
|
-
async rerankHits(query, ranked, topK) {
|
|
18255
|
-
if (!this.config.rerank.enabled) {
|
|
18256
|
-
throw new SearchSocketError(
|
|
18257
|
-
"INVALID_REQUEST",
|
|
18258
|
-
"rerank=true requested but rerank.enabled is not set to true.",
|
|
18259
|
-
400
|
|
18260
|
-
);
|
|
18261
|
-
}
|
|
18262
|
-
if (!this.reranker) {
|
|
18263
|
-
throw new SearchSocketError(
|
|
18264
|
-
"CONFIG_MISSING",
|
|
18265
|
-
`rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
|
|
18266
|
-
400
|
|
18267
|
-
);
|
|
18268
|
-
}
|
|
18269
|
-
const pageGroups = /* @__PURE__ */ new Map();
|
|
18270
|
-
for (const entry of ranked) {
|
|
18271
|
-
const url = entry.hit.metadata.url;
|
|
18272
|
-
const group = pageGroups.get(url);
|
|
18273
|
-
if (group) group.push(entry);
|
|
18274
|
-
else pageGroups.set(url, [entry]);
|
|
18275
|
-
}
|
|
18276
|
-
const MAX_CHUNKS_PER_PAGE = 5;
|
|
18277
|
-
const MIN_CHUNKS_PER_PAGE = 1;
|
|
18278
|
-
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
18279
|
-
const MAX_DOC_CHARS = 2e3;
|
|
18280
|
-
const pageCandidates = [];
|
|
18281
|
-
for (const [url, chunks] of pageGroups) {
|
|
18282
|
-
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
18283
|
-
const bestScore = byScore[0].finalScore;
|
|
18284
|
-
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
18285
|
-
const selected = byScore.filter(
|
|
18286
|
-
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
18287
|
-
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
18288
|
-
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
18289
|
-
const first = selected[0].hit.metadata;
|
|
18290
|
-
const parts = [first.title];
|
|
18291
|
-
if (first.description) {
|
|
18292
|
-
parts.push(first.description);
|
|
18293
|
-
}
|
|
18294
|
-
if (first.keywords && first.keywords.length > 0) {
|
|
18295
|
-
parts.push(first.keywords.join(", "));
|
|
18296
|
-
}
|
|
18297
|
-
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
18298
|
-
parts.push(body);
|
|
18299
|
-
let text = parts.join("\n\n");
|
|
18300
|
-
if (text.length > MAX_DOC_CHARS) {
|
|
18301
|
-
text = text.slice(0, MAX_DOC_CHARS);
|
|
18302
|
-
}
|
|
18303
|
-
pageCandidates.push({ id: url, text });
|
|
18304
|
-
}
|
|
18305
|
-
const maxCandidates = Math.max(topK, this.config.rerank.topN);
|
|
18306
|
-
const cappedCandidates = pageCandidates.slice(0, maxCandidates);
|
|
18307
|
-
const reranked = await this.reranker.rerank(
|
|
18308
|
-
query,
|
|
18309
|
-
cappedCandidates,
|
|
18310
|
-
maxCandidates
|
|
18311
|
-
);
|
|
18312
|
-
const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
|
|
18313
|
-
return ranked.map((entry) => {
|
|
18314
|
-
const pageScore = scoreByUrl.get(entry.hit.metadata.url);
|
|
18315
|
-
const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
|
|
18316
|
-
if (pageScore === void 0 || !Number.isFinite(pageScore)) {
|
|
18317
|
-
return { ...entry, finalScore: base };
|
|
18318
|
-
}
|
|
18319
|
-
const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
|
|
18320
|
-
return {
|
|
18321
|
-
...entry,
|
|
18322
|
-
finalScore: Number.isFinite(combined) ? combined : base
|
|
18323
|
-
};
|
|
18324
|
-
}).sort((a, b) => {
|
|
18325
|
-
const delta = b.finalScore - a.finalScore;
|
|
18326
|
-
return Number.isNaN(delta) ? 0 : delta;
|
|
18327
|
-
});
|
|
18328
|
-
}
|
|
18329
17820
|
};
|
|
18330
17821
|
|
|
18331
17822
|
// src/sveltekit/handle.ts
|
|
@@ -18480,42 +17971,6 @@ function searchsocketHandle(options = {}) {
|
|
|
18480
17971
|
}
|
|
18481
17972
|
const engine = await getEngine();
|
|
18482
17973
|
const searchRequest = body;
|
|
18483
|
-
if (searchRequest.stream && searchRequest.rerank) {
|
|
18484
|
-
const encoder = new TextEncoder();
|
|
18485
|
-
const stream = new ReadableStream({
|
|
18486
|
-
async start(controller) {
|
|
18487
|
-
try {
|
|
18488
|
-
for await (const event2 of engine.searchStreaming(searchRequest)) {
|
|
18489
|
-
const line = JSON.stringify(event2) + "\n";
|
|
18490
|
-
controller.enqueue(encoder.encode(line));
|
|
18491
|
-
}
|
|
18492
|
-
} catch (streamError) {
|
|
18493
|
-
const errorEvent = {
|
|
18494
|
-
phase: "error",
|
|
18495
|
-
data: {
|
|
18496
|
-
error: {
|
|
18497
|
-
code: streamError instanceof SearchSocketError ? streamError.code : "INTERNAL_ERROR",
|
|
18498
|
-
message: streamError instanceof Error ? streamError.message : "Unknown error"
|
|
18499
|
-
}
|
|
18500
|
-
}
|
|
18501
|
-
};
|
|
18502
|
-
controller.enqueue(encoder.encode(JSON.stringify(errorEvent) + "\n"));
|
|
18503
|
-
} finally {
|
|
18504
|
-
controller.close();
|
|
18505
|
-
}
|
|
18506
|
-
}
|
|
18507
|
-
});
|
|
18508
|
-
return withCors(
|
|
18509
|
-
new Response(stream, {
|
|
18510
|
-
status: 200,
|
|
18511
|
-
headers: {
|
|
18512
|
-
"content-type": "application/x-ndjson"
|
|
18513
|
-
}
|
|
18514
|
-
}),
|
|
18515
|
-
event.request,
|
|
18516
|
-
config
|
|
18517
|
-
);
|
|
18518
|
-
}
|
|
18519
17974
|
const result = await engine.search(searchRequest);
|
|
18520
17975
|
return withCors(
|
|
18521
17976
|
new Response(JSON.stringify(result), {
|
|
@@ -18576,9 +18031,8 @@ function withCors(response, request, config) {
|
|
|
18576
18031
|
}
|
|
18577
18032
|
function ensureStateDirs(cwd, stateDir, scope) {
|
|
18578
18033
|
const statePath = path__default.default.resolve(cwd, stateDir);
|
|
18579
|
-
|
|
18580
|
-
|
|
18581
|
-
return { statePath, pagesPath };
|
|
18034
|
+
fs__default.default.mkdirSync(statePath, { recursive: true });
|
|
18035
|
+
return { statePath };
|
|
18582
18036
|
}
|
|
18583
18037
|
function sha1(input) {
|
|
18584
18038
|
return crypto.createHash("sha1").update(input).digest("hex");
|
|
@@ -18828,7 +18282,7 @@ function buildEmbeddingText(chunk, prependTitle) {
|
|
|
18828
18282
|
|
|
18829
18283
|
${chunk.chunkText}`;
|
|
18830
18284
|
}
|
|
18831
|
-
function
|
|
18285
|
+
function chunkPage(page, config, scope) {
|
|
18832
18286
|
const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
|
|
18833
18287
|
const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
|
|
18834
18288
|
const chunks = [];
|
|
@@ -19859,53 +19313,6 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
19859
19313
|
weight: mdWeight
|
|
19860
19314
|
};
|
|
19861
19315
|
}
|
|
19862
|
-
function yamlString(value) {
|
|
19863
|
-
return JSON.stringify(value);
|
|
19864
|
-
}
|
|
19865
|
-
function yamlArray(values) {
|
|
19866
|
-
return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
|
|
19867
|
-
}
|
|
19868
|
-
function buildMirrorMarkdown(page) {
|
|
19869
|
-
const frontmatterLines = [
|
|
19870
|
-
"---",
|
|
19871
|
-
`url: ${yamlString(page.url)}`,
|
|
19872
|
-
`title: ${yamlString(page.title)}`,
|
|
19873
|
-
`scope: ${yamlString(page.scope)}`,
|
|
19874
|
-
`routeFile: ${yamlString(page.routeFile)}`,
|
|
19875
|
-
`routeResolution: ${yamlString(page.routeResolution)}`,
|
|
19876
|
-
`generatedAt: ${yamlString(page.generatedAt)}`,
|
|
19877
|
-
`incomingLinks: ${page.incomingLinks}`,
|
|
19878
|
-
`outgoingLinks: ${page.outgoingLinks}`,
|
|
19879
|
-
`depth: ${page.depth}`,
|
|
19880
|
-
`tags: ${yamlArray(page.tags)}`,
|
|
19881
|
-
"---",
|
|
19882
|
-
""
|
|
19883
|
-
];
|
|
19884
|
-
return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
|
|
19885
|
-
}
|
|
19886
|
-
function stripGeneratedAt(content) {
|
|
19887
|
-
return content.replace(/^generatedAt: .*$/m, "");
|
|
19888
|
-
}
|
|
19889
|
-
async function writeMirrorPage(statePath, scope, page) {
|
|
19890
|
-
const relative = urlPathToMirrorRelative(page.url);
|
|
19891
|
-
const outputPath = path__default.default.join(statePath, "pages", scope.scopeName, relative);
|
|
19892
|
-
await fs4__default.default.mkdir(path__default.default.dirname(outputPath), { recursive: true });
|
|
19893
|
-
const newContent = buildMirrorMarkdown(page);
|
|
19894
|
-
try {
|
|
19895
|
-
const existing = await fs4__default.default.readFile(outputPath, "utf8");
|
|
19896
|
-
if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
|
|
19897
|
-
return outputPath;
|
|
19898
|
-
}
|
|
19899
|
-
} catch {
|
|
19900
|
-
}
|
|
19901
|
-
await fs4__default.default.writeFile(outputPath, newContent, "utf8");
|
|
19902
|
-
return outputPath;
|
|
19903
|
-
}
|
|
19904
|
-
async function cleanMirrorForScope(statePath, scope) {
|
|
19905
|
-
const target = path__default.default.join(statePath, "pages", scope.scopeName);
|
|
19906
|
-
await fs4__default.default.rm(target, { recursive: true, force: true });
|
|
19907
|
-
await fs4__default.default.mkdir(target, { recursive: true });
|
|
19908
|
-
}
|
|
19909
19316
|
function segmentToRegex(segment) {
|
|
19910
19317
|
if (segment.startsWith("(") && segment.endsWith(")")) {
|
|
19911
19318
|
return { regex: "", score: 0 };
|
|
@@ -20066,7 +19473,7 @@ async function parseManifest(cwd, outputDir) {
|
|
|
20066
19473
|
const manifestPath = path__default.default.resolve(cwd, outputDir, "server", "manifest-full.js");
|
|
20067
19474
|
let content;
|
|
20068
19475
|
try {
|
|
20069
|
-
content = await
|
|
19476
|
+
content = await fs3__default.default.readFile(manifestPath, "utf8");
|
|
20070
19477
|
} catch {
|
|
20071
19478
|
throw new SearchSocketError(
|
|
20072
19479
|
"BUILD_MANIFEST_NOT_FOUND",
|
|
@@ -20239,7 +19646,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
|
|
|
20239
19646
|
const visited = /* @__PURE__ */ new Set();
|
|
20240
19647
|
const pages = [];
|
|
20241
19648
|
const queue = [];
|
|
20242
|
-
const limit =
|
|
19649
|
+
const limit = pLimit__default.default(8);
|
|
20243
19650
|
for (const seed of seedUrls) {
|
|
20244
19651
|
const normalized = normalizeUrlPath(seed);
|
|
20245
19652
|
if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
|
|
@@ -20321,7 +19728,7 @@ async function loadBuildPages(cwd, config, maxPages) {
|
|
|
20321
19728
|
const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
|
|
20322
19729
|
const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
|
|
20323
19730
|
try {
|
|
20324
|
-
const concurrencyLimit =
|
|
19731
|
+
const concurrencyLimit = pLimit__default.default(8);
|
|
20325
19732
|
const results = await Promise.allSettled(
|
|
20326
19733
|
selected.map(
|
|
20327
19734
|
(route) => concurrencyLimit(async () => {
|
|
@@ -20395,7 +19802,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
|
|
|
20395
19802
|
const selected = typeof limit === "number" ? files.slice(0, limit) : files;
|
|
20396
19803
|
const pages = [];
|
|
20397
19804
|
for (const filePath of selected) {
|
|
20398
|
-
const raw = await
|
|
19805
|
+
const raw = await fs3__default.default.readFile(filePath, "utf8");
|
|
20399
19806
|
const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
|
|
20400
19807
|
pages.push({
|
|
20401
19808
|
url: filePathToUrl(filePath, baseDir),
|
|
@@ -20490,7 +19897,7 @@ async function loadCrawledPages(config, maxPages) {
|
|
|
20490
19897
|
const routes = await resolveRoutes(config);
|
|
20491
19898
|
const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
|
|
20492
19899
|
const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
|
|
20493
|
-
const concurrencyLimit =
|
|
19900
|
+
const concurrencyLimit = pLimit__default.default(8);
|
|
20494
19901
|
const results = await Promise.allSettled(
|
|
20495
19902
|
selected.map(
|
|
20496
19903
|
(route) => concurrencyLimit(async () => {
|
|
@@ -20531,7 +19938,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
|
|
|
20531
19938
|
const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
|
|
20532
19939
|
const pages = [];
|
|
20533
19940
|
for (const filePath of selected) {
|
|
20534
|
-
const html = await
|
|
19941
|
+
const html = await fs3__default.default.readFile(filePath, "utf8");
|
|
20535
19942
|
pages.push({
|
|
20536
19943
|
url: staticHtmlFileToUrl(filePath, outputDir),
|
|
20537
19944
|
html,
|
|
@@ -20594,7 +20001,7 @@ function isBlockedByRobots(urlPath, rules3) {
|
|
|
20594
20001
|
}
|
|
20595
20002
|
async function loadRobotsTxtFromDir(dir) {
|
|
20596
20003
|
try {
|
|
20597
|
-
const content = await
|
|
20004
|
+
const content = await fs3__default.default.readFile(path__default.default.join(dir, "robots.txt"), "utf8");
|
|
20598
20005
|
return parseRobotsTxt(content);
|
|
20599
20006
|
} catch {
|
|
20600
20007
|
return null;
|
|
@@ -20613,34 +20020,41 @@ async function fetchRobotsTxt(baseUrl) {
|
|
|
20613
20020
|
}
|
|
20614
20021
|
|
|
20615
20022
|
// src/indexing/pipeline.ts
|
|
20616
|
-
|
|
20617
|
-
|
|
20618
|
-
|
|
20619
|
-
|
|
20620
|
-
|
|
20023
|
+
function buildPageSummary(page, maxChars = 3500) {
|
|
20024
|
+
const parts = [page.title];
|
|
20025
|
+
if (page.description) {
|
|
20026
|
+
parts.push(page.description);
|
|
20027
|
+
}
|
|
20028
|
+
if (page.keywords && page.keywords.length > 0) {
|
|
20029
|
+
parts.push(page.keywords.join(", "));
|
|
20030
|
+
}
|
|
20031
|
+
const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
|
|
20032
|
+
if (plainBody) {
|
|
20033
|
+
parts.push(plainBody);
|
|
20034
|
+
}
|
|
20035
|
+
const joined = parts.join("\n\n");
|
|
20036
|
+
if (joined.length <= maxChars) return joined;
|
|
20037
|
+
return joined.slice(0, maxChars).trim();
|
|
20038
|
+
}
|
|
20621
20039
|
var IndexPipeline = class _IndexPipeline {
|
|
20622
20040
|
cwd;
|
|
20623
20041
|
config;
|
|
20624
|
-
|
|
20625
|
-
vectorStore;
|
|
20042
|
+
store;
|
|
20626
20043
|
logger;
|
|
20627
20044
|
constructor(options) {
|
|
20628
20045
|
this.cwd = options.cwd;
|
|
20629
20046
|
this.config = options.config;
|
|
20630
|
-
this.
|
|
20631
|
-
this.vectorStore = options.vectorStore;
|
|
20047
|
+
this.store = options.store;
|
|
20632
20048
|
this.logger = options.logger;
|
|
20633
20049
|
}
|
|
20634
20050
|
static async create(options = {}) {
|
|
20635
20051
|
const cwd = path__default.default.resolve(options.cwd ?? process.cwd());
|
|
20636
20052
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
20637
|
-
const
|
|
20638
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
20053
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
20639
20054
|
return new _IndexPipeline({
|
|
20640
20055
|
cwd,
|
|
20641
20056
|
config,
|
|
20642
|
-
|
|
20643
|
-
vectorStore,
|
|
20057
|
+
store,
|
|
20644
20058
|
logger: options.logger ?? new Logger()
|
|
20645
20059
|
});
|
|
20646
20060
|
}
|
|
@@ -20660,25 +20074,17 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20660
20074
|
stageTimingsMs[name] = Math.round(hrTimeMs(start));
|
|
20661
20075
|
};
|
|
20662
20076
|
const scope = resolveScope(this.config, options.scopeOverride);
|
|
20663
|
-
|
|
20077
|
+
ensureStateDirs(this.cwd, this.config.state.dir);
|
|
20664
20078
|
const sourceMode = options.sourceOverride ?? this.config.source.mode;
|
|
20665
|
-
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode},
|
|
20079
|
+
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
|
|
20666
20080
|
if (options.force) {
|
|
20667
20081
|
this.logger.info("Force mode enabled \u2014 full rebuild");
|
|
20668
|
-
await cleanMirrorForScope(statePath, scope);
|
|
20669
20082
|
}
|
|
20670
20083
|
if (options.dryRun) {
|
|
20671
20084
|
this.logger.info("Dry run \u2014 no writes will be performed");
|
|
20672
20085
|
}
|
|
20673
20086
|
const manifestStart = stageStart();
|
|
20674
|
-
const existingHashes = await this.
|
|
20675
|
-
const existingModelId = await this.vectorStore.getScopeModelId(scope);
|
|
20676
|
-
if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
|
|
20677
|
-
throw new SearchSocketError(
|
|
20678
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
20679
|
-
`Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
|
|
20680
|
-
);
|
|
20681
|
-
}
|
|
20087
|
+
const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
|
|
20682
20088
|
stageEnd("manifest", manifestStart);
|
|
20683
20089
|
this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
|
|
20684
20090
|
const sourceStart = stageStart();
|
|
@@ -20807,9 +20213,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20807
20213
|
}
|
|
20808
20214
|
stageEnd("links", linkStart);
|
|
20809
20215
|
this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
|
|
20810
|
-
const
|
|
20811
|
-
this.logger.info("
|
|
20812
|
-
const
|
|
20216
|
+
const pagesStart = stageStart();
|
|
20217
|
+
this.logger.info("Building indexed pages...");
|
|
20218
|
+
const pages = [];
|
|
20813
20219
|
let routeExact = 0;
|
|
20814
20220
|
let routeBestEffort = 0;
|
|
20815
20221
|
const precomputedRoutes = /* @__PURE__ */ new Map();
|
|
@@ -20838,7 +20244,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20838
20244
|
} else {
|
|
20839
20245
|
routeExact += 1;
|
|
20840
20246
|
}
|
|
20841
|
-
const
|
|
20247
|
+
const indexedPage = {
|
|
20842
20248
|
url: page.url,
|
|
20843
20249
|
title: page.title,
|
|
20844
20250
|
scope: scope.scopeName,
|
|
@@ -20853,35 +20259,38 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20853
20259
|
description: page.description,
|
|
20854
20260
|
keywords: page.keywords
|
|
20855
20261
|
};
|
|
20856
|
-
|
|
20857
|
-
|
|
20858
|
-
await writeMirrorPage(statePath, scope, mirror);
|
|
20859
|
-
}
|
|
20860
|
-
this.logger.event("markdown_written", { url: page.url });
|
|
20262
|
+
pages.push(indexedPage);
|
|
20263
|
+
this.logger.event("page_indexed", { url: page.url });
|
|
20861
20264
|
}
|
|
20862
20265
|
if (!options.dryRun) {
|
|
20863
|
-
const pageRecords =
|
|
20864
|
-
|
|
20865
|
-
|
|
20866
|
-
|
|
20867
|
-
|
|
20868
|
-
|
|
20869
|
-
|
|
20870
|
-
|
|
20871
|
-
|
|
20872
|
-
|
|
20873
|
-
|
|
20874
|
-
|
|
20875
|
-
|
|
20876
|
-
|
|
20877
|
-
|
|
20878
|
-
|
|
20266
|
+
const pageRecords = pages.map((p) => {
|
|
20267
|
+
const summary = buildPageSummary(p);
|
|
20268
|
+
return {
|
|
20269
|
+
url: p.url,
|
|
20270
|
+
title: p.title,
|
|
20271
|
+
markdown: p.markdown,
|
|
20272
|
+
projectId: scope.projectId,
|
|
20273
|
+
scopeName: scope.scopeName,
|
|
20274
|
+
routeFile: p.routeFile,
|
|
20275
|
+
routeResolution: p.routeResolution,
|
|
20276
|
+
incomingLinks: p.incomingLinks,
|
|
20277
|
+
outgoingLinks: p.outgoingLinks,
|
|
20278
|
+
depth: p.depth,
|
|
20279
|
+
tags: p.tags,
|
|
20280
|
+
indexedAt: p.generatedAt,
|
|
20281
|
+
summary,
|
|
20282
|
+
description: p.description,
|
|
20283
|
+
keywords: p.keywords
|
|
20284
|
+
};
|
|
20285
|
+
});
|
|
20286
|
+
await this.store.deletePages(scope);
|
|
20287
|
+
await this.store.upsertPages(pageRecords, scope);
|
|
20879
20288
|
}
|
|
20880
|
-
stageEnd("
|
|
20881
|
-
this.logger.info(`
|
|
20289
|
+
stageEnd("pages", pagesStart);
|
|
20290
|
+
this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
|
|
20882
20291
|
const chunkStart = stageStart();
|
|
20883
20292
|
this.logger.info("Chunking pages...");
|
|
20884
|
-
let chunks =
|
|
20293
|
+
let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
|
|
20885
20294
|
const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
|
|
20886
20295
|
if (typeof maxChunks === "number") {
|
|
20887
20296
|
chunks = chunks.slice(0, maxChunks);
|
|
@@ -20913,125 +20322,59 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20913
20322
|
});
|
|
20914
20323
|
const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
|
|
20915
20324
|
this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
|
|
20916
|
-
const
|
|
20917
|
-
|
|
20918
|
-
for (const chunk of changedChunks) {
|
|
20919
|
-
chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
|
|
20920
|
-
}
|
|
20921
|
-
const estimatedTokens = changedChunks.reduce(
|
|
20922
|
-
(sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
|
|
20923
|
-
0
|
|
20924
|
-
);
|
|
20925
|
-
const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
|
|
20926
|
-
const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
|
|
20927
|
-
let newEmbeddings = 0;
|
|
20928
|
-
const vectorsByChunk = /* @__PURE__ */ new Map();
|
|
20325
|
+
const upsertStart = stageStart();
|
|
20326
|
+
let documentsUpserted = 0;
|
|
20929
20327
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
20930
|
-
this.logger.info(`
|
|
20931
|
-
const
|
|
20932
|
-
|
|
20933
|
-
|
|
20934
|
-
|
|
20935
|
-
|
|
20936
|
-
|
|
20937
|
-
|
|
20938
|
-
|
|
20939
|
-
|
|
20940
|
-
);
|
|
20941
|
-
|
|
20942
|
-
for (let i = 0; i < changedChunks.length; i += 1) {
|
|
20943
|
-
const chunk = changedChunks[i];
|
|
20944
|
-
const embedding = embeddings[i];
|
|
20945
|
-
if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
|
|
20946
|
-
throw new SearchSocketError(
|
|
20947
|
-
"VECTOR_BACKEND_UNAVAILABLE",
|
|
20948
|
-
`Embedding provider returned an invalid vector for chunk index ${i}.`
|
|
20949
|
-
);
|
|
20950
|
-
}
|
|
20951
|
-
vectorsByChunk.set(chunk.chunkKey, embedding);
|
|
20952
|
-
newEmbeddings += 1;
|
|
20953
|
-
this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
|
|
20954
|
-
}
|
|
20955
|
-
}
|
|
20956
|
-
stageEnd("embedding", embedStart);
|
|
20957
|
-
if (changedChunks.length > 0) {
|
|
20958
|
-
this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
|
|
20959
|
-
} else {
|
|
20960
|
-
this.logger.info("No chunks to embed \u2014 all up to date");
|
|
20961
|
-
}
|
|
20962
|
-
const syncStart = stageStart();
|
|
20963
|
-
if (!options.dryRun) {
|
|
20964
|
-
this.logger.info("Syncing vectors...");
|
|
20965
|
-
const upserts = [];
|
|
20966
|
-
for (const chunk of changedChunks) {
|
|
20967
|
-
const vector = vectorsByChunk.get(chunk.chunkKey);
|
|
20968
|
-
if (!vector) {
|
|
20969
|
-
continue;
|
|
20970
|
-
}
|
|
20971
|
-
upserts.push({
|
|
20328
|
+
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
|
|
20329
|
+
const UPSTASH_CONTENT_LIMIT = 4096;
|
|
20330
|
+
const docs = changedChunks.map((chunk) => {
|
|
20331
|
+
const title = chunk.title;
|
|
20332
|
+
const sectionTitle = chunk.sectionTitle ?? "";
|
|
20333
|
+
const url = chunk.url;
|
|
20334
|
+
const tags = chunk.tags.join(",");
|
|
20335
|
+
const headingPath = chunk.headingPath.join(" > ");
|
|
20336
|
+
const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
|
|
20337
|
+
const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
|
|
20338
|
+
const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
|
|
20339
|
+
return {
|
|
20972
20340
|
id: chunk.chunkKey,
|
|
20973
|
-
|
|
20341
|
+
content: { title, sectionTitle, text, url, tags, headingPath },
|
|
20974
20342
|
metadata: {
|
|
20975
20343
|
projectId: scope.projectId,
|
|
20976
20344
|
scopeName: scope.scopeName,
|
|
20977
|
-
url: chunk.url,
|
|
20978
20345
|
path: chunk.path,
|
|
20979
|
-
title: chunk.title,
|
|
20980
|
-
sectionTitle: chunk.sectionTitle ?? "",
|
|
20981
|
-
headingPath: chunk.headingPath,
|
|
20982
20346
|
snippet: chunk.snippet,
|
|
20983
|
-
chunkText: chunk.chunkText.slice(0, 4e3),
|
|
20984
20347
|
ordinal: chunk.ordinal,
|
|
20985
20348
|
contentHash: chunk.contentHash,
|
|
20986
|
-
modelId: this.config.embeddings.model,
|
|
20987
20349
|
depth: chunk.depth,
|
|
20988
20350
|
incomingLinks: chunk.incomingLinks,
|
|
20989
20351
|
routeFile: chunk.routeFile,
|
|
20990
|
-
|
|
20991
|
-
|
|
20992
|
-
keywords: chunk.keywords
|
|
20352
|
+
description: chunk.description ?? "",
|
|
20353
|
+
keywords: (chunk.keywords ?? []).join(",")
|
|
20993
20354
|
}
|
|
20994
|
-
}
|
|
20995
|
-
}
|
|
20996
|
-
if (upserts.length > 0) {
|
|
20997
|
-
await this.vectorStore.upsert(upserts, scope);
|
|
20998
|
-
this.logger.event("upserted", { count: upserts.length });
|
|
20999
|
-
}
|
|
21000
|
-
if (deletes.length > 0) {
|
|
21001
|
-
await this.vectorStore.deleteByIds(deletes, scope);
|
|
21002
|
-
this.logger.event("deleted", { count: deletes.length });
|
|
21003
|
-
}
|
|
21004
|
-
}
|
|
21005
|
-
stageEnd("sync", syncStart);
|
|
21006
|
-
this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
|
|
21007
|
-
const finalizeStart = stageStart();
|
|
21008
|
-
if (!options.dryRun) {
|
|
21009
|
-
const scopeInfo = {
|
|
21010
|
-
projectId: scope.projectId,
|
|
21011
|
-
scopeName: scope.scopeName,
|
|
21012
|
-
modelId: this.config.embeddings.model,
|
|
21013
|
-
lastIndexedAt: nowIso(),
|
|
21014
|
-
vectorCount: chunks.length,
|
|
21015
|
-
lastEstimateTokens: estimatedTokens,
|
|
21016
|
-
lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
21017
|
-
lastEstimateChangedChunks: changedChunks.length
|
|
21018
|
-
};
|
|
21019
|
-
await this.vectorStore.recordScope(scopeInfo);
|
|
21020
|
-
this.logger.event("registry_updated", {
|
|
21021
|
-
scope: scope.scopeName,
|
|
21022
|
-
vectorCount: chunks.length
|
|
20355
|
+
};
|
|
21023
20356
|
});
|
|
20357
|
+
await this.store.upsertChunks(docs, scope);
|
|
20358
|
+
documentsUpserted = docs.length;
|
|
20359
|
+
this.logger.event("upserted", { count: docs.length });
|
|
20360
|
+
}
|
|
20361
|
+
if (!options.dryRun && deletes.length > 0) {
|
|
20362
|
+
await this.store.deleteByIds(deletes, scope);
|
|
20363
|
+
this.logger.event("deleted", { count: deletes.length });
|
|
20364
|
+
}
|
|
20365
|
+
stageEnd("upsert", upsertStart);
|
|
20366
|
+
if (changedChunks.length > 0) {
|
|
20367
|
+
this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
|
|
20368
|
+
} else {
|
|
20369
|
+
this.logger.info("No chunks to upsert \u2014 all up to date");
|
|
21024
20370
|
}
|
|
21025
|
-
stageEnd("finalize", finalizeStart);
|
|
21026
20371
|
this.logger.info("Done.");
|
|
21027
20372
|
return {
|
|
21028
|
-
pagesProcessed:
|
|
20373
|
+
pagesProcessed: pages.length,
|
|
21029
20374
|
chunksTotal: chunks.length,
|
|
21030
20375
|
chunksChanged: changedChunks.length,
|
|
21031
|
-
|
|
20376
|
+
documentsUpserted,
|
|
21032
20377
|
deletes: deletes.length,
|
|
21033
|
-
estimatedTokens,
|
|
21034
|
-
estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
21035
20378
|
routeExact,
|
|
21036
20379
|
routeBestEffort,
|
|
21037
20380
|
stageTimingsMs
|
|
@@ -21062,30 +20405,11 @@ function shouldRunAutoIndex(options) {
|
|
|
21062
20405
|
}
|
|
21063
20406
|
return false;
|
|
21064
20407
|
}
|
|
21065
|
-
function searchsocketViteConfig() {
|
|
21066
|
-
return {
|
|
21067
|
-
name: "searchsocket:config",
|
|
21068
|
-
config() {
|
|
21069
|
-
return {
|
|
21070
|
-
ssr: {
|
|
21071
|
-
external: ["@libsql/client", "libsql"]
|
|
21072
|
-
}
|
|
21073
|
-
};
|
|
21074
|
-
}
|
|
21075
|
-
};
|
|
21076
|
-
}
|
|
21077
20408
|
function searchsocketVitePlugin(options = {}) {
|
|
21078
20409
|
let executed = false;
|
|
21079
20410
|
let running = false;
|
|
21080
20411
|
return {
|
|
21081
20412
|
name: "searchsocket:auto-index",
|
|
21082
|
-
config() {
|
|
21083
|
-
return {
|
|
21084
|
-
ssr: {
|
|
21085
|
-
external: ["@libsql/client", "libsql"]
|
|
21086
|
-
}
|
|
21087
|
-
};
|
|
21088
|
-
},
|
|
21089
20413
|
async closeBundle() {
|
|
21090
20414
|
if (executed || running) {
|
|
21091
20415
|
return;
|
|
@@ -21113,9 +20437,8 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21113
20437
|
verbose: options.verbose
|
|
21114
20438
|
});
|
|
21115
20439
|
logger3.info(
|
|
21116
|
-
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged}
|
|
20440
|
+
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
|
|
21117
20441
|
);
|
|
21118
|
-
logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
|
|
21119
20442
|
executed = true;
|
|
21120
20443
|
} finally {
|
|
21121
20444
|
running = false;
|
|
@@ -21123,6 +20446,186 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21123
20446
|
}
|
|
21124
20447
|
};
|
|
21125
20448
|
}
|
|
20449
|
+
|
|
20450
|
+
// src/sveltekit/scroll-to-text.ts
|
|
20451
|
+
var HIGHLIGHT_CLASS = "ssk-highlight";
|
|
20452
|
+
var HIGHLIGHT_DURATION = 2e3;
|
|
20453
|
+
var HIGHLIGHT_MARKER_ATTR = "data-ssk-highlight-marker";
|
|
20454
|
+
var HIGHLIGHT_NAME = "ssk-search-match";
|
|
20455
|
+
var styleInjected = false;
|
|
20456
|
+
function ensureHighlightStyle() {
|
|
20457
|
+
if (styleInjected || typeof document === "undefined") return;
|
|
20458
|
+
styleInjected = true;
|
|
20459
|
+
const style = document.createElement("style");
|
|
20460
|
+
style.textContent = `
|
|
20461
|
+
@keyframes ssk-highlight-fade {
|
|
20462
|
+
0% { background-color: rgba(16, 185, 129, 0.18); }
|
|
20463
|
+
100% { background-color: transparent; }
|
|
20464
|
+
}
|
|
20465
|
+
.${HIGHLIGHT_CLASS} {
|
|
20466
|
+
animation: ssk-highlight-fade ${HIGHLIGHT_DURATION}ms ease-out forwards;
|
|
20467
|
+
border-radius: 4px;
|
|
20468
|
+
}
|
|
20469
|
+
::highlight(${HIGHLIGHT_NAME}) {
|
|
20470
|
+
background-color: rgba(16, 185, 129, 0.18);
|
|
20471
|
+
}
|
|
20472
|
+
`;
|
|
20473
|
+
document.head.appendChild(style);
|
|
20474
|
+
}
|
|
20475
|
+
var IGNORED_TAGS = /* @__PURE__ */ new Set(["SCRIPT", "STYLE", "NOSCRIPT", "TEMPLATE"]);
|
|
20476
|
+
function buildTextMap(root2) {
|
|
20477
|
+
const walker = document.createTreeWalker(root2, NodeFilter.SHOW_TEXT, {
|
|
20478
|
+
acceptNode(node) {
|
|
20479
|
+
const parent = node.parentElement;
|
|
20480
|
+
if (!parent || IGNORED_TAGS.has(parent.tagName)) return NodeFilter.FILTER_REJECT;
|
|
20481
|
+
return NodeFilter.FILTER_ACCEPT;
|
|
20482
|
+
}
|
|
20483
|
+
});
|
|
20484
|
+
const chunks = [];
|
|
20485
|
+
let text = "";
|
|
20486
|
+
let current;
|
|
20487
|
+
while (current = walker.nextNode()) {
|
|
20488
|
+
const value = current.nodeValue ?? "";
|
|
20489
|
+
if (!value) continue;
|
|
20490
|
+
chunks.push({ node: current, start: text.length, end: text.length + value.length });
|
|
20491
|
+
text += value;
|
|
20492
|
+
}
|
|
20493
|
+
return { text, chunks };
|
|
20494
|
+
}
|
|
20495
|
+
function normalize(text) {
|
|
20496
|
+
return text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
20497
|
+
}
|
|
20498
|
+
function escapeRegExp(value) {
|
|
20499
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
20500
|
+
}
|
|
20501
|
+
function buildNeedleRegex(needle) {
|
|
20502
|
+
const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
|
|
20503
|
+
if (tokenParts.length > 1) {
|
|
20504
|
+
const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]+");
|
|
20505
|
+
return new RegExp(pattern, "iu");
|
|
20506
|
+
}
|
|
20507
|
+
if (tokenParts.length === 1) {
|
|
20508
|
+
return new RegExp(escapeRegExp(tokenParts[0]), "iu");
|
|
20509
|
+
}
|
|
20510
|
+
if (!needle) return null;
|
|
20511
|
+
return new RegExp(escapeRegExp(needle).replace(/\s+/g, "\\s+"), "i");
|
|
20512
|
+
}
|
|
20513
|
+
function buildLenientRegex(needle) {
|
|
20514
|
+
const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
|
|
20515
|
+
if (tokenParts.length <= 1) return null;
|
|
20516
|
+
const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]*");
|
|
20517
|
+
return new RegExp(pattern, "iu");
|
|
20518
|
+
}
|
|
20519
|
+
function findMatch(fullText, needle) {
|
|
20520
|
+
const regex = buildNeedleRegex(needle);
|
|
20521
|
+
if (regex) {
|
|
20522
|
+
const m = regex.exec(fullText);
|
|
20523
|
+
if (m && typeof m.index === "number") {
|
|
20524
|
+
return { start: m.index, end: m.index + m[0].length };
|
|
20525
|
+
}
|
|
20526
|
+
}
|
|
20527
|
+
const lenient = buildLenientRegex(needle);
|
|
20528
|
+
if (lenient) {
|
|
20529
|
+
const m = lenient.exec(fullText);
|
|
20530
|
+
if (m && typeof m.index === "number") {
|
|
20531
|
+
return { start: m.index, end: m.index + m[0].length };
|
|
20532
|
+
}
|
|
20533
|
+
}
|
|
20534
|
+
return null;
|
|
20535
|
+
}
|
|
20536
|
+
function resolveRange(map, offsets) {
|
|
20537
|
+
let startChunk;
|
|
20538
|
+
let endChunk;
|
|
20539
|
+
for (const chunk of map.chunks) {
|
|
20540
|
+
if (!startChunk && offsets.start >= chunk.start && offsets.start < chunk.end) {
|
|
20541
|
+
startChunk = chunk;
|
|
20542
|
+
}
|
|
20543
|
+
if (offsets.end > chunk.start && offsets.end <= chunk.end) {
|
|
20544
|
+
endChunk = chunk;
|
|
20545
|
+
}
|
|
20546
|
+
if (startChunk && endChunk) break;
|
|
20547
|
+
}
|
|
20548
|
+
if (!startChunk || !endChunk) return null;
|
|
20549
|
+
const range = document.createRange();
|
|
20550
|
+
range.setStart(startChunk.node, offsets.start - startChunk.start);
|
|
20551
|
+
range.setEnd(endChunk.node, offsets.end - endChunk.start);
|
|
20552
|
+
return range;
|
|
20553
|
+
}
|
|
20554
|
+
function hasCustomHighlightAPI() {
|
|
20555
|
+
return typeof CSS !== "undefined" && typeof CSS.highlights !== "undefined";
|
|
20556
|
+
}
|
|
20557
|
+
var highlightTimer = null;
|
|
20558
|
+
function highlightWithCSS(range) {
|
|
20559
|
+
ensureHighlightStyle();
|
|
20560
|
+
const hl = new globalThis.Highlight(range);
|
|
20561
|
+
CSS.highlights.set(HIGHLIGHT_NAME, hl);
|
|
20562
|
+
if (highlightTimer) clearTimeout(highlightTimer);
|
|
20563
|
+
highlightTimer = setTimeout(() => {
|
|
20564
|
+
CSS.highlights.delete(HIGHLIGHT_NAME);
|
|
20565
|
+
highlightTimer = null;
|
|
20566
|
+
}, HIGHLIGHT_DURATION);
|
|
20567
|
+
}
|
|
20568
|
+
function unwrapMarker(marker) {
|
|
20569
|
+
if (!marker.isConnected) return;
|
|
20570
|
+
const parent = marker.parentNode;
|
|
20571
|
+
if (!parent) return;
|
|
20572
|
+
while (marker.firstChild) parent.insertBefore(marker.firstChild, marker);
|
|
20573
|
+
parent.removeChild(marker);
|
|
20574
|
+
if (parent instanceof Element) parent.normalize();
|
|
20575
|
+
}
|
|
20576
|
+
function highlightWithDOM(range) {
|
|
20577
|
+
ensureHighlightStyle();
|
|
20578
|
+
try {
|
|
20579
|
+
const marker = document.createElement("span");
|
|
20580
|
+
marker.classList.add(HIGHLIGHT_CLASS);
|
|
20581
|
+
marker.setAttribute(HIGHLIGHT_MARKER_ATTR, "true");
|
|
20582
|
+
range.surroundContents(marker);
|
|
20583
|
+
setTimeout(() => unwrapMarker(marker), HIGHLIGHT_DURATION);
|
|
20584
|
+
return marker;
|
|
20585
|
+
} catch {
|
|
20586
|
+
const ancestor = range.commonAncestorContainer;
|
|
20587
|
+
const el = ancestor instanceof Element ? ancestor : ancestor.parentElement;
|
|
20588
|
+
if (el) {
|
|
20589
|
+
el.classList.add(HIGHLIGHT_CLASS);
|
|
20590
|
+
setTimeout(() => el.classList.remove(HIGHLIGHT_CLASS), HIGHLIGHT_DURATION);
|
|
20591
|
+
return el;
|
|
20592
|
+
}
|
|
20593
|
+
return document.body;
|
|
20594
|
+
}
|
|
20595
|
+
}
|
|
20596
|
+
function scrollToRange(range) {
|
|
20597
|
+
const rect = range.getBoundingClientRect();
|
|
20598
|
+
window.scrollTo({
|
|
20599
|
+
top: window.scrollY + rect.top - window.innerHeight / 3,
|
|
20600
|
+
behavior: "smooth"
|
|
20601
|
+
});
|
|
20602
|
+
}
|
|
20603
|
+
function scrollIntoViewIfPossible(el) {
|
|
20604
|
+
if (typeof el.scrollIntoView === "function") {
|
|
20605
|
+
el.scrollIntoView({ behavior: "smooth", block: "start" });
|
|
20606
|
+
}
|
|
20607
|
+
}
|
|
20608
|
+
function searchsocketScrollToText(navigation) {
|
|
20609
|
+
if (typeof document === "undefined") return;
|
|
20610
|
+
const params = navigation.to?.url.searchParams;
|
|
20611
|
+
const raw = params?.get("_sskt") ?? params?.get("_ssk");
|
|
20612
|
+
if (!raw) return;
|
|
20613
|
+
const needle = normalize(raw);
|
|
20614
|
+
if (!needle) return;
|
|
20615
|
+
const map = buildTextMap(document.body);
|
|
20616
|
+
const offsets = findMatch(map.text, needle);
|
|
20617
|
+
if (!offsets) return;
|
|
20618
|
+
const range = resolveRange(map, offsets);
|
|
20619
|
+
if (!range) return;
|
|
20620
|
+
if (hasCustomHighlightAPI()) {
|
|
20621
|
+
highlightWithCSS(range);
|
|
20622
|
+
scrollToRange(range);
|
|
20623
|
+
} else {
|
|
20624
|
+
const marker = highlightWithDOM(range);
|
|
20625
|
+
const target = typeof marker.scrollIntoView === "function" ? marker : marker.parentElement;
|
|
20626
|
+
if (target) scrollIntoViewIfPossible(target);
|
|
20627
|
+
}
|
|
20628
|
+
}
|
|
21126
20629
|
/*! Bundled license information:
|
|
21127
20630
|
|
|
21128
20631
|
@mixmark-io/domino/lib/style_parser.js:
|
|
@@ -21136,7 +20639,7 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21136
20639
|
*/
|
|
21137
20640
|
|
|
21138
20641
|
exports.searchsocketHandle = searchsocketHandle;
|
|
21139
|
-
exports.
|
|
20642
|
+
exports.searchsocketScrollToText = searchsocketScrollToText;
|
|
21140
20643
|
exports.searchsocketVitePlugin = searchsocketVitePlugin;
|
|
21141
20644
|
//# sourceMappingURL=sveltekit.cjs.map
|
|
21142
20645
|
//# sourceMappingURL=sveltekit.cjs.map
|