searchsocket 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -31
- package/dist/cli.js +634 -1326
- package/dist/client.cjs +41 -117
- package/dist/client.d.cts +3 -17
- package/dist/client.d.ts +3 -17
- package/dist/client.js +41 -117
- package/dist/index.cjs +608 -1398
- package/dist/index.d.cts +73 -35
- package/dist/index.d.ts +73 -35
- package/dist/index.js +605 -1392
- package/dist/plugin-B_npJSux.d.cts +36 -0
- package/dist/plugin-M-aW0ev6.d.ts +36 -0
- package/dist/scroll.cjs +185 -0
- package/dist/scroll.d.cts +42 -0
- package/dist/scroll.d.ts +42 -0
- package/dist/scroll.js +183 -0
- package/dist/sveltekit.cjs +781 -1278
- package/dist/sveltekit.d.cts +3 -43
- package/dist/sveltekit.d.ts +3 -43
- package/dist/sveltekit.js +779 -1276
- package/dist/{types-z2dw3H6E.d.cts → types-Dk43uz25.d.cts} +46 -141
- package/dist/{types-z2dw3H6E.d.ts → types-Dk43uz25.d.ts} +46 -141
- package/package.json +10 -3
package/dist/sveltekit.js
CHANGED
|
@@ -2,13 +2,13 @@ import fs from 'fs';
|
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import { createJiti } from 'jiti';
|
|
4
4
|
import { z } from 'zod';
|
|
5
|
-
import pLimit2 from 'p-limit';
|
|
6
5
|
import { execSync, spawn } from 'child_process';
|
|
7
6
|
import { createHash } from 'crypto';
|
|
8
7
|
import { load } from 'cheerio';
|
|
9
8
|
import matter from 'gray-matter';
|
|
10
|
-
import fs4 from 'fs/promises';
|
|
11
9
|
import fg from 'fast-glob';
|
|
10
|
+
import pLimit from 'p-limit';
|
|
11
|
+
import fs3 from 'fs/promises';
|
|
12
12
|
import net from 'net';
|
|
13
13
|
import { gunzipSync } from 'zlib';
|
|
14
14
|
|
|
@@ -2755,12 +2755,12 @@ var require_ChildNode = __commonJS({
|
|
|
2755
2755
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/ChildNode.js"(exports$1, module) {
|
|
2756
2756
|
var Node2 = require_Node();
|
|
2757
2757
|
var LinkedList = require_LinkedList();
|
|
2758
|
-
var createDocumentFragmentFromArguments = function(
|
|
2759
|
-
var docFrag =
|
|
2758
|
+
var createDocumentFragmentFromArguments = function(document2, args) {
|
|
2759
|
+
var docFrag = document2.createDocumentFragment();
|
|
2760
2760
|
for (var i = 0; i < args.length; i++) {
|
|
2761
2761
|
var argItem = args[i];
|
|
2762
2762
|
var isNode = argItem instanceof Node2;
|
|
2763
|
-
docFrag.appendChild(isNode ? argItem :
|
|
2763
|
+
docFrag.appendChild(isNode ? argItem : document2.createTextNode(String(argItem)));
|
|
2764
2764
|
}
|
|
2765
2765
|
return docFrag;
|
|
2766
2766
|
};
|
|
@@ -2918,7 +2918,7 @@ var require_NamedNodeMap = __commonJS({
|
|
|
2918
2918
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js
|
|
2919
2919
|
var require_Element = __commonJS({
|
|
2920
2920
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/Element.js"(exports$1, module) {
|
|
2921
|
-
module.exports =
|
|
2921
|
+
module.exports = Element2;
|
|
2922
2922
|
var xml = require_xmlnames();
|
|
2923
2923
|
var utils = require_utils();
|
|
2924
2924
|
var NAMESPACE = utils.NAMESPACE;
|
|
@@ -2935,7 +2935,7 @@ var require_Element = __commonJS({
|
|
|
2935
2935
|
var NonDocumentTypeChildNode = require_NonDocumentTypeChildNode();
|
|
2936
2936
|
var NamedNodeMap = require_NamedNodeMap();
|
|
2937
2937
|
var uppercaseCache = /* @__PURE__ */ Object.create(null);
|
|
2938
|
-
function
|
|
2938
|
+
function Element2(doc, localName, namespaceURI, prefix) {
|
|
2939
2939
|
ContainerNode.call(this);
|
|
2940
2940
|
this.nodeType = Node2.ELEMENT_NODE;
|
|
2941
2941
|
this.ownerDocument = doc;
|
|
@@ -2955,7 +2955,7 @@ var require_Element = __commonJS({
|
|
|
2955
2955
|
recursiveGetText(node.childNodes[i], a);
|
|
2956
2956
|
}
|
|
2957
2957
|
}
|
|
2958
|
-
|
|
2958
|
+
Element2.prototype = Object.create(ContainerNode.prototype, {
|
|
2959
2959
|
isHTML: { get: function isHTML() {
|
|
2960
2960
|
return this.namespaceURI === NAMESPACE.HTML && this.ownerDocument.isHTML;
|
|
2961
2961
|
} },
|
|
@@ -3025,7 +3025,7 @@ var require_Element = __commonJS({
|
|
|
3025
3025
|
return NodeUtils.serializeOne(this, { nodeType: 0 });
|
|
3026
3026
|
},
|
|
3027
3027
|
set: function(v) {
|
|
3028
|
-
var
|
|
3028
|
+
var document2 = this.ownerDocument;
|
|
3029
3029
|
var parent = this.parentNode;
|
|
3030
3030
|
if (parent === null) {
|
|
3031
3031
|
return;
|
|
@@ -3036,8 +3036,8 @@ var require_Element = __commonJS({
|
|
|
3036
3036
|
if (parent.nodeType === Node2.DOCUMENT_FRAGMENT_NODE) {
|
|
3037
3037
|
parent = parent.ownerDocument.createElement("body");
|
|
3038
3038
|
}
|
|
3039
|
-
var parser =
|
|
3040
|
-
|
|
3039
|
+
var parser = document2.implementation.mozHTMLParser(
|
|
3040
|
+
document2._address,
|
|
3041
3041
|
parent
|
|
3042
3042
|
);
|
|
3043
3043
|
parser.parse(v === null ? "" : String(v), true);
|
|
@@ -3096,7 +3096,7 @@ var require_Element = __commonJS({
|
|
|
3096
3096
|
default:
|
|
3097
3097
|
utils.SyntaxError();
|
|
3098
3098
|
}
|
|
3099
|
-
if (!(context instanceof
|
|
3099
|
+
if (!(context instanceof Element2) || context.ownerDocument.isHTML && context.localName === "html" && context.namespaceURI === NAMESPACE.HTML) {
|
|
3100
3100
|
context = context.ownerDocument.createElementNS(NAMESPACE.HTML, "body");
|
|
3101
3101
|
}
|
|
3102
3102
|
var parser = this.ownerDocument.implementation.mozHTMLParser(
|
|
@@ -3704,10 +3704,10 @@ var require_Element = __commonJS({
|
|
|
3704
3704
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
3705
3705
|
} }
|
|
3706
3706
|
});
|
|
3707
|
-
Object.defineProperties(
|
|
3708
|
-
Object.defineProperties(
|
|
3707
|
+
Object.defineProperties(Element2.prototype, ChildNode);
|
|
3708
|
+
Object.defineProperties(Element2.prototype, NonDocumentTypeChildNode);
|
|
3709
3709
|
attributes.registerChangeHandler(
|
|
3710
|
-
|
|
3710
|
+
Element2,
|
|
3711
3711
|
"id",
|
|
3712
3712
|
function(element, lname, oldval, newval) {
|
|
3713
3713
|
if (element.rooted) {
|
|
@@ -3721,7 +3721,7 @@ var require_Element = __commonJS({
|
|
|
3721
3721
|
}
|
|
3722
3722
|
);
|
|
3723
3723
|
attributes.registerChangeHandler(
|
|
3724
|
-
|
|
3724
|
+
Element2,
|
|
3725
3725
|
"class",
|
|
3726
3726
|
function(element, lname, oldval, newval) {
|
|
3727
3727
|
if (element._classList) {
|
|
@@ -3820,7 +3820,7 @@ var require_Element = __commonJS({
|
|
|
3820
3820
|
}
|
|
3821
3821
|
}
|
|
3822
3822
|
});
|
|
3823
|
-
|
|
3823
|
+
Element2._Attr = Attr;
|
|
3824
3824
|
function AttributesArray(elt) {
|
|
3825
3825
|
NamedNodeMap.call(this, elt);
|
|
3826
3826
|
for (var name in elt._attrsByQName) {
|
|
@@ -4222,7 +4222,7 @@ var require_DocumentFragment = __commonJS({
|
|
|
4222
4222
|
var Node2 = require_Node();
|
|
4223
4223
|
var NodeList = require_NodeList();
|
|
4224
4224
|
var ContainerNode = require_ContainerNode();
|
|
4225
|
-
var
|
|
4225
|
+
var Element2 = require_Element();
|
|
4226
4226
|
var select = require_select();
|
|
4227
4227
|
var utils = require_utils();
|
|
4228
4228
|
function DocumentFragment(doc) {
|
|
@@ -4240,9 +4240,9 @@ var require_DocumentFragment = __commonJS({
|
|
|
4240
4240
|
}
|
|
4241
4241
|
},
|
|
4242
4242
|
// Copy the text content getter/setter from Element
|
|
4243
|
-
textContent: Object.getOwnPropertyDescriptor(
|
|
4243
|
+
textContent: Object.getOwnPropertyDescriptor(Element2.prototype, "textContent"),
|
|
4244
4244
|
// Copy the text content getter/setter from Element
|
|
4245
|
-
innerText: Object.getOwnPropertyDescriptor(
|
|
4245
|
+
innerText: Object.getOwnPropertyDescriptor(Element2.prototype, "innerText"),
|
|
4246
4246
|
querySelector: { value: function(selector) {
|
|
4247
4247
|
var nodes = this.querySelectorAll(selector);
|
|
4248
4248
|
return nodes.length ? nodes[0] : null;
|
|
@@ -4250,8 +4250,8 @@ var require_DocumentFragment = __commonJS({
|
|
|
4250
4250
|
querySelectorAll: { value: function(selector) {
|
|
4251
4251
|
var context = Object.create(this);
|
|
4252
4252
|
context.isHTML = true;
|
|
4253
|
-
context.getElementsByTagName =
|
|
4254
|
-
context.nextElement = Object.getOwnPropertyDescriptor(
|
|
4253
|
+
context.getElementsByTagName = Element2.prototype.getElementsByTagName;
|
|
4254
|
+
context.nextElement = Object.getOwnPropertyDescriptor(Element2.prototype, "firstElementChild").get;
|
|
4255
4255
|
var nodes = select(selector, context);
|
|
4256
4256
|
return nodes.item ? nodes : new NodeList(nodes);
|
|
4257
4257
|
} },
|
|
@@ -4333,7 +4333,7 @@ var require_ProcessingInstruction = __commonJS({
|
|
|
4333
4333
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js
|
|
4334
4334
|
var require_NodeFilter = __commonJS({
|
|
4335
4335
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeFilter.js"(exports$1, module) {
|
|
4336
|
-
var
|
|
4336
|
+
var NodeFilter2 = {
|
|
4337
4337
|
// Constants for acceptNode()
|
|
4338
4338
|
FILTER_ACCEPT: 1,
|
|
4339
4339
|
FILTER_REJECT: 2,
|
|
@@ -4358,7 +4358,7 @@ var require_NodeFilter = __commonJS({
|
|
|
4358
4358
|
SHOW_NOTATION: 2048
|
|
4359
4359
|
// historical
|
|
4360
4360
|
};
|
|
4361
|
-
module.exports =
|
|
4361
|
+
module.exports = NodeFilter2.constructor = NodeFilter2.prototype = NodeFilter2;
|
|
4362
4362
|
}
|
|
4363
4363
|
});
|
|
4364
4364
|
|
|
@@ -4433,7 +4433,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4433
4433
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/TreeWalker.js"(exports$1, module) {
|
|
4434
4434
|
module.exports = TreeWalker;
|
|
4435
4435
|
var Node2 = require_Node();
|
|
4436
|
-
var
|
|
4436
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4437
4437
|
var NodeTraversal = require_NodeTraversal();
|
|
4438
4438
|
var utils = require_utils();
|
|
4439
4439
|
var mapChild = {
|
|
@@ -4453,11 +4453,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4453
4453
|
node = tw._currentNode[mapChild[type]];
|
|
4454
4454
|
while (node !== null) {
|
|
4455
4455
|
result = tw._internalFilter(node);
|
|
4456
|
-
if (result ===
|
|
4456
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4457
4457
|
tw._currentNode = node;
|
|
4458
4458
|
return node;
|
|
4459
4459
|
}
|
|
4460
|
-
if (result ===
|
|
4460
|
+
if (result === NodeFilter2.FILTER_SKIP) {
|
|
4461
4461
|
child = node[mapChild[type]];
|
|
4462
4462
|
if (child !== null) {
|
|
4463
4463
|
node = child;
|
|
@@ -4491,12 +4491,12 @@ var require_TreeWalker = __commonJS({
|
|
|
4491
4491
|
while (sibling !== null) {
|
|
4492
4492
|
node = sibling;
|
|
4493
4493
|
result = tw._internalFilter(node);
|
|
4494
|
-
if (result ===
|
|
4494
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4495
4495
|
tw._currentNode = node;
|
|
4496
4496
|
return node;
|
|
4497
4497
|
}
|
|
4498
4498
|
sibling = node[mapChild[type]];
|
|
4499
|
-
if (result ===
|
|
4499
|
+
if (result === NodeFilter2.FILTER_REJECT || sibling === null) {
|
|
4500
4500
|
sibling = node[mapSibling[type]];
|
|
4501
4501
|
}
|
|
4502
4502
|
}
|
|
@@ -4504,7 +4504,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4504
4504
|
if (node === null || node === tw.root) {
|
|
4505
4505
|
return null;
|
|
4506
4506
|
}
|
|
4507
|
-
if (tw._internalFilter(node) ===
|
|
4507
|
+
if (tw._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4508
4508
|
return null;
|
|
4509
4509
|
}
|
|
4510
4510
|
}
|
|
@@ -4552,11 +4552,11 @@ var require_TreeWalker = __commonJS({
|
|
|
4552
4552
|
utils.InvalidStateError();
|
|
4553
4553
|
}
|
|
4554
4554
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4555
|
-
return
|
|
4555
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4556
4556
|
}
|
|
4557
4557
|
filter = this._filter;
|
|
4558
4558
|
if (filter === null) {
|
|
4559
|
-
result =
|
|
4559
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4560
4560
|
} else {
|
|
4561
4561
|
this._active = true;
|
|
4562
4562
|
try {
|
|
@@ -4585,7 +4585,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4585
4585
|
if (node === null) {
|
|
4586
4586
|
return null;
|
|
4587
4587
|
}
|
|
4588
|
-
if (this._internalFilter(node) ===
|
|
4588
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4589
4589
|
this._currentNode = node;
|
|
4590
4590
|
return node;
|
|
4591
4591
|
}
|
|
@@ -4638,17 +4638,17 @@ var require_TreeWalker = __commonJS({
|
|
|
4638
4638
|
for (previousSibling = node.previousSibling; previousSibling; previousSibling = node.previousSibling) {
|
|
4639
4639
|
node = previousSibling;
|
|
4640
4640
|
result = this._internalFilter(node);
|
|
4641
|
-
if (result ===
|
|
4641
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4642
4642
|
continue;
|
|
4643
4643
|
}
|
|
4644
4644
|
for (lastChild = node.lastChild; lastChild; lastChild = node.lastChild) {
|
|
4645
4645
|
node = lastChild;
|
|
4646
4646
|
result = this._internalFilter(node);
|
|
4647
|
-
if (result ===
|
|
4647
|
+
if (result === NodeFilter2.FILTER_REJECT) {
|
|
4648
4648
|
break;
|
|
4649
4649
|
}
|
|
4650
4650
|
}
|
|
4651
|
-
if (result ===
|
|
4651
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4652
4652
|
this._currentNode = node;
|
|
4653
4653
|
return node;
|
|
4654
4654
|
}
|
|
@@ -4657,7 +4657,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4657
4657
|
return null;
|
|
4658
4658
|
}
|
|
4659
4659
|
node = node.parentNode;
|
|
4660
|
-
if (this._internalFilter(node) ===
|
|
4660
|
+
if (this._internalFilter(node) === NodeFilter2.FILTER_ACCEPT) {
|
|
4661
4661
|
this._currentNode = node;
|
|
4662
4662
|
return node;
|
|
4663
4663
|
}
|
|
@@ -4674,26 +4674,26 @@ var require_TreeWalker = __commonJS({
|
|
|
4674
4674
|
nextNode: { value: function nextNode() {
|
|
4675
4675
|
var node, result, firstChild, nextSibling;
|
|
4676
4676
|
node = this._currentNode;
|
|
4677
|
-
result =
|
|
4677
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4678
4678
|
CHILDREN:
|
|
4679
4679
|
while (true) {
|
|
4680
4680
|
for (firstChild = node.firstChild; firstChild; firstChild = node.firstChild) {
|
|
4681
4681
|
node = firstChild;
|
|
4682
4682
|
result = this._internalFilter(node);
|
|
4683
|
-
if (result ===
|
|
4683
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4684
4684
|
this._currentNode = node;
|
|
4685
4685
|
return node;
|
|
4686
|
-
} else if (result ===
|
|
4686
|
+
} else if (result === NodeFilter2.FILTER_REJECT) {
|
|
4687
4687
|
break;
|
|
4688
4688
|
}
|
|
4689
4689
|
}
|
|
4690
4690
|
for (nextSibling = NodeTraversal.nextSkippingChildren(node, this.root); nextSibling; nextSibling = NodeTraversal.nextSkippingChildren(node, this.root)) {
|
|
4691
4691
|
node = nextSibling;
|
|
4692
4692
|
result = this._internalFilter(node);
|
|
4693
|
-
if (result ===
|
|
4693
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4694
4694
|
this._currentNode = node;
|
|
4695
4695
|
return node;
|
|
4696
|
-
} else if (result ===
|
|
4696
|
+
} else if (result === NodeFilter2.FILTER_SKIP) {
|
|
4697
4697
|
continue CHILDREN;
|
|
4698
4698
|
}
|
|
4699
4699
|
}
|
|
@@ -4712,7 +4712,7 @@ var require_TreeWalker = __commonJS({
|
|
|
4712
4712
|
var require_NodeIterator = __commonJS({
|
|
4713
4713
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/NodeIterator.js"(exports$1, module) {
|
|
4714
4714
|
module.exports = NodeIterator;
|
|
4715
|
-
var
|
|
4715
|
+
var NodeFilter2 = require_NodeFilter();
|
|
4716
4716
|
var NodeTraversal = require_NodeTraversal();
|
|
4717
4717
|
var utils = require_utils();
|
|
4718
4718
|
function move(node, stayWithin, directionIsNext) {
|
|
@@ -4747,7 +4747,7 @@ var require_NodeIterator = __commonJS({
|
|
|
4747
4747
|
}
|
|
4748
4748
|
}
|
|
4749
4749
|
var result = ni._internalFilter(node);
|
|
4750
|
-
if (result ===
|
|
4750
|
+
if (result === NodeFilter2.FILTER_ACCEPT) {
|
|
4751
4751
|
break;
|
|
4752
4752
|
}
|
|
4753
4753
|
}
|
|
@@ -4795,11 +4795,11 @@ var require_NodeIterator = __commonJS({
|
|
|
4795
4795
|
utils.InvalidStateError();
|
|
4796
4796
|
}
|
|
4797
4797
|
if (!(1 << node.nodeType - 1 & this._whatToShow)) {
|
|
4798
|
-
return
|
|
4798
|
+
return NodeFilter2.FILTER_SKIP;
|
|
4799
4799
|
}
|
|
4800
4800
|
filter = this._filter;
|
|
4801
4801
|
if (filter === null) {
|
|
4802
|
-
result =
|
|
4802
|
+
result = NodeFilter2.FILTER_ACCEPT;
|
|
4803
4803
|
} else {
|
|
4804
4804
|
this._active = true;
|
|
4805
4805
|
try {
|
|
@@ -5009,32 +5009,32 @@ var require_URL = __commonJS({
|
|
|
5009
5009
|
else
|
|
5010
5010
|
return basepath.substring(0, lastslash + 1) + refpath;
|
|
5011
5011
|
}
|
|
5012
|
-
function remove_dot_segments(
|
|
5013
|
-
if (!
|
|
5012
|
+
function remove_dot_segments(path13) {
|
|
5013
|
+
if (!path13) return path13;
|
|
5014
5014
|
var output = "";
|
|
5015
|
-
while (
|
|
5016
|
-
if (
|
|
5017
|
-
|
|
5015
|
+
while (path13.length > 0) {
|
|
5016
|
+
if (path13 === "." || path13 === "..") {
|
|
5017
|
+
path13 = "";
|
|
5018
5018
|
break;
|
|
5019
5019
|
}
|
|
5020
|
-
var twochars =
|
|
5021
|
-
var threechars =
|
|
5022
|
-
var fourchars =
|
|
5020
|
+
var twochars = path13.substring(0, 2);
|
|
5021
|
+
var threechars = path13.substring(0, 3);
|
|
5022
|
+
var fourchars = path13.substring(0, 4);
|
|
5023
5023
|
if (threechars === "../") {
|
|
5024
|
-
|
|
5024
|
+
path13 = path13.substring(3);
|
|
5025
5025
|
} else if (twochars === "./") {
|
|
5026
|
-
|
|
5026
|
+
path13 = path13.substring(2);
|
|
5027
5027
|
} else if (threechars === "/./") {
|
|
5028
|
-
|
|
5029
|
-
} else if (twochars === "/." &&
|
|
5030
|
-
|
|
5031
|
-
} else if (fourchars === "/../" || threechars === "/.." &&
|
|
5032
|
-
|
|
5028
|
+
path13 = "/" + path13.substring(3);
|
|
5029
|
+
} else if (twochars === "/." && path13.length === 2) {
|
|
5030
|
+
path13 = "/";
|
|
5031
|
+
} else if (fourchars === "/../" || threechars === "/.." && path13.length === 3) {
|
|
5032
|
+
path13 = "/" + path13.substring(4);
|
|
5033
5033
|
output = output.replace(/\/?[^\/]*$/, "");
|
|
5034
5034
|
} else {
|
|
5035
|
-
var segment =
|
|
5035
|
+
var segment = path13.match(/(\/?([^\/]*))/)[0];
|
|
5036
5036
|
output += segment;
|
|
5037
|
-
|
|
5037
|
+
path13 = path13.substring(segment.length);
|
|
5038
5038
|
}
|
|
5039
5039
|
}
|
|
5040
5040
|
return output;
|
|
@@ -5599,9 +5599,9 @@ var require_defineElement = __commonJS({
|
|
|
5599
5599
|
});
|
|
5600
5600
|
return c;
|
|
5601
5601
|
};
|
|
5602
|
-
function EventHandlerBuilder(body,
|
|
5602
|
+
function EventHandlerBuilder(body, document2, form, element) {
|
|
5603
5603
|
this.body = body;
|
|
5604
|
-
this.document =
|
|
5604
|
+
this.document = document2;
|
|
5605
5605
|
this.form = form;
|
|
5606
5606
|
this.element = element;
|
|
5607
5607
|
}
|
|
@@ -5635,7 +5635,7 @@ var require_defineElement = __commonJS({
|
|
|
5635
5635
|
var require_htmlelts = __commonJS({
|
|
5636
5636
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/htmlelts.js"(exports$1) {
|
|
5637
5637
|
var Node2 = require_Node();
|
|
5638
|
-
var
|
|
5638
|
+
var Element2 = require_Element();
|
|
5639
5639
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
5640
5640
|
var utils = require_utils();
|
|
5641
5641
|
var URLUtils = require_URLUtils();
|
|
@@ -5703,10 +5703,10 @@ var require_htmlelts = __commonJS({
|
|
|
5703
5703
|
this._form = null;
|
|
5704
5704
|
};
|
|
5705
5705
|
var HTMLElement = exports$1.HTMLElement = define({
|
|
5706
|
-
superclass:
|
|
5706
|
+
superclass: Element2,
|
|
5707
5707
|
name: "HTMLElement",
|
|
5708
5708
|
ctor: function HTMLElement2(doc, localName, prefix) {
|
|
5709
|
-
|
|
5709
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.HTML, prefix);
|
|
5710
5710
|
},
|
|
5711
5711
|
props: {
|
|
5712
5712
|
dangerouslySetInnerHTML: {
|
|
@@ -7188,7 +7188,7 @@ var require_htmlelts = __commonJS({
|
|
|
7188
7188
|
// node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js
|
|
7189
7189
|
var require_svg = __commonJS({
|
|
7190
7190
|
"node_modules/.pnpm/@mixmark-io+domino@2.2.0/node_modules/@mixmark-io/domino/lib/svg.js"(exports$1) {
|
|
7191
|
-
var
|
|
7191
|
+
var Element2 = require_Element();
|
|
7192
7192
|
var defineElement = require_defineElement();
|
|
7193
7193
|
var utils = require_utils();
|
|
7194
7194
|
var CSSStyleDeclaration = require_CSSStyleDeclaration();
|
|
@@ -7202,10 +7202,10 @@ var require_svg = __commonJS({
|
|
|
7202
7202
|
return defineElement(spec, SVGElement, svgElements, svgNameToImpl);
|
|
7203
7203
|
}
|
|
7204
7204
|
var SVGElement = define({
|
|
7205
|
-
superclass:
|
|
7205
|
+
superclass: Element2,
|
|
7206
7206
|
name: "SVGElement",
|
|
7207
7207
|
ctor: function SVGElement2(doc, localName, prefix) {
|
|
7208
|
-
|
|
7208
|
+
Element2.call(this, doc, localName, utils.NAMESPACE.SVG, prefix);
|
|
7209
7209
|
},
|
|
7210
7210
|
props: {
|
|
7211
7211
|
style: { get: function() {
|
|
@@ -7340,7 +7340,7 @@ var require_Document = __commonJS({
|
|
|
7340
7340
|
var Node2 = require_Node();
|
|
7341
7341
|
var NodeList = require_NodeList();
|
|
7342
7342
|
var ContainerNode = require_ContainerNode();
|
|
7343
|
-
var
|
|
7343
|
+
var Element2 = require_Element();
|
|
7344
7344
|
var Text = require_Text();
|
|
7345
7345
|
var Comment = require_Comment();
|
|
7346
7346
|
var Event = require_Event();
|
|
@@ -7349,7 +7349,7 @@ var require_Document = __commonJS({
|
|
|
7349
7349
|
var DOMImplementation = require_DOMImplementation();
|
|
7350
7350
|
var TreeWalker = require_TreeWalker();
|
|
7351
7351
|
var NodeIterator = require_NodeIterator();
|
|
7352
|
-
var
|
|
7352
|
+
var NodeFilter2 = require_NodeFilter();
|
|
7353
7353
|
var URL2 = require_URL();
|
|
7354
7354
|
var select = require_select();
|
|
7355
7355
|
var events = require_events();
|
|
@@ -7488,13 +7488,13 @@ var require_Document = __commonJS({
|
|
|
7488
7488
|
if (this.isHTML) {
|
|
7489
7489
|
localName = utils.toASCIILowerCase(localName);
|
|
7490
7490
|
}
|
|
7491
|
-
return new
|
|
7491
|
+
return new Element2._Attr(null, localName, null, null, "");
|
|
7492
7492
|
} },
|
|
7493
7493
|
createAttributeNS: { value: function(namespace, qualifiedName) {
|
|
7494
7494
|
namespace = namespace === null || namespace === void 0 || namespace === "" ? null : String(namespace);
|
|
7495
7495
|
qualifiedName = String(qualifiedName);
|
|
7496
7496
|
var ve = validateAndExtract(namespace, qualifiedName);
|
|
7497
|
-
return new
|
|
7497
|
+
return new Element2._Attr(null, ve.localName, ve.prefix, ve.namespace, "");
|
|
7498
7498
|
} },
|
|
7499
7499
|
createElement: { value: function(localName) {
|
|
7500
7500
|
localName = String(localName);
|
|
@@ -7506,7 +7506,7 @@ var require_Document = __commonJS({
|
|
|
7506
7506
|
} else if (this.contentType === "application/xhtml+xml") {
|
|
7507
7507
|
return html.createElement(this, localName, null);
|
|
7508
7508
|
} else {
|
|
7509
|
-
return new
|
|
7509
|
+
return new Element2(this, localName, null, null);
|
|
7510
7510
|
}
|
|
7511
7511
|
}, writable: isApiWritable },
|
|
7512
7512
|
createElementNS: { value: function(namespace, qualifiedName) {
|
|
@@ -7523,7 +7523,7 @@ var require_Document = __commonJS({
|
|
|
7523
7523
|
} else if (namespace === NAMESPACE.SVG) {
|
|
7524
7524
|
return svg.createElement(this, localName, prefix);
|
|
7525
7525
|
}
|
|
7526
|
-
return new
|
|
7526
|
+
return new Element2(this, localName, namespace, prefix);
|
|
7527
7527
|
} },
|
|
7528
7528
|
createEvent: { value: function createEvent(interfaceName) {
|
|
7529
7529
|
interfaceName = interfaceName.toLowerCase();
|
|
@@ -7545,7 +7545,7 @@ var require_Document = __commonJS({
|
|
|
7545
7545
|
if (!(root3 instanceof Node2)) {
|
|
7546
7546
|
throw new TypeError("root not a node");
|
|
7547
7547
|
}
|
|
7548
|
-
whatToShow = whatToShow === void 0 ?
|
|
7548
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7549
7549
|
filter = filter === void 0 ? null : filter;
|
|
7550
7550
|
return new TreeWalker(root3, whatToShow, filter);
|
|
7551
7551
|
} },
|
|
@@ -7557,7 +7557,7 @@ var require_Document = __commonJS({
|
|
|
7557
7557
|
if (!(root3 instanceof Node2)) {
|
|
7558
7558
|
throw new TypeError("root not a node");
|
|
7559
7559
|
}
|
|
7560
|
-
whatToShow = whatToShow === void 0 ?
|
|
7560
|
+
whatToShow = whatToShow === void 0 ? NodeFilter2.SHOW_ALL : +whatToShow;
|
|
7561
7561
|
filter = filter === void 0 ? null : filter;
|
|
7562
7562
|
return new NodeIterator(root3, whatToShow, filter);
|
|
7563
7563
|
} },
|
|
@@ -7618,10 +7618,10 @@ var require_Document = __commonJS({
|
|
|
7618
7618
|
return this.byId[id] instanceof MultiId;
|
|
7619
7619
|
} },
|
|
7620
7620
|
// Just copy this method from the Element prototype
|
|
7621
|
-
getElementsByName: { value:
|
|
7622
|
-
getElementsByTagName: { value:
|
|
7623
|
-
getElementsByTagNameNS: { value:
|
|
7624
|
-
getElementsByClassName: { value:
|
|
7621
|
+
getElementsByName: { value: Element2.prototype.getElementsByName },
|
|
7622
|
+
getElementsByTagName: { value: Element2.prototype.getElementsByTagName },
|
|
7623
|
+
getElementsByTagNameNS: { value: Element2.prototype.getElementsByTagNameNS },
|
|
7624
|
+
getElementsByClassName: { value: Element2.prototype.getElementsByClassName },
|
|
7625
7625
|
adoptNode: { value: function adoptNode(node) {
|
|
7626
7626
|
if (node.nodeType === Node2.DOCUMENT_NODE) utils.NotSupportedError();
|
|
7627
7627
|
if (node.nodeType === Node2.ATTRIBUTE_NODE) {
|
|
@@ -16447,8 +16447,8 @@ var require_Window = __commonJS({
|
|
|
16447
16447
|
var Location = require_Location();
|
|
16448
16448
|
var utils = require_utils();
|
|
16449
16449
|
module.exports = Window;
|
|
16450
|
-
function Window(
|
|
16451
|
-
this.document =
|
|
16450
|
+
function Window(document2) {
|
|
16451
|
+
this.document = document2 || new DOMImplementation(null).createHTMLDocument("");
|
|
16452
16452
|
this.document._scripting_enabled = true;
|
|
16453
16453
|
this.document.defaultView = this;
|
|
16454
16454
|
this.location = new Location(this, this.document._address || "about:blank");
|
|
@@ -16578,11 +16578,11 @@ var require_lib = __commonJS({
|
|
|
16578
16578
|
};
|
|
16579
16579
|
};
|
|
16580
16580
|
exports$1.createWindow = function(html, address) {
|
|
16581
|
-
var
|
|
16581
|
+
var document2 = exports$1.createDocument(html);
|
|
16582
16582
|
if (address !== void 0) {
|
|
16583
|
-
|
|
16583
|
+
document2._address = address;
|
|
16584
16584
|
}
|
|
16585
|
-
return new impl.Window(
|
|
16585
|
+
return new impl.Window(document2);
|
|
16586
16586
|
};
|
|
16587
16587
|
exports$1.impl = impl;
|
|
16588
16588
|
}
|
|
@@ -16647,29 +16647,18 @@ var searchSocketConfigSchema = z.object({
|
|
|
16647
16647
|
prependTitle: z.boolean().optional(),
|
|
16648
16648
|
pageSummaryChunk: z.boolean().optional()
|
|
16649
16649
|
}).optional(),
|
|
16650
|
-
|
|
16651
|
-
|
|
16652
|
-
|
|
16653
|
-
|
|
16654
|
-
|
|
16655
|
-
batchSize: z.number().int().positive().optional(),
|
|
16656
|
-
concurrency: z.number().int().positive().optional(),
|
|
16657
|
-
pricePer1kTokens: z.number().positive().optional()
|
|
16650
|
+
upstash: z.object({
|
|
16651
|
+
url: z.string().url().optional(),
|
|
16652
|
+
token: z.string().min(1).optional(),
|
|
16653
|
+
urlEnv: z.string().min(1).optional(),
|
|
16654
|
+
tokenEnv: z.string().min(1).optional()
|
|
16658
16655
|
}).optional(),
|
|
16659
|
-
|
|
16660
|
-
|
|
16661
|
-
|
|
16662
|
-
|
|
16663
|
-
|
|
16664
|
-
|
|
16665
|
-
authTokenEnv: z.string().optional(),
|
|
16666
|
-
localPath: z.string().optional()
|
|
16667
|
-
}).optional()
|
|
16668
|
-
}).optional(),
|
|
16669
|
-
rerank: z.object({
|
|
16670
|
-
enabled: z.boolean().optional(),
|
|
16671
|
-
topN: z.number().int().positive().optional(),
|
|
16672
|
-
model: z.string().optional()
|
|
16656
|
+
search: z.object({
|
|
16657
|
+
semanticWeight: z.number().min(0).max(1).optional(),
|
|
16658
|
+
inputEnrichment: z.boolean().optional(),
|
|
16659
|
+
reranking: z.boolean().optional(),
|
|
16660
|
+
dualSearch: z.boolean().optional(),
|
|
16661
|
+
pageSearchWeight: z.number().min(0).max(1).optional()
|
|
16673
16662
|
}).optional(),
|
|
16674
16663
|
ranking: z.object({
|
|
16675
16664
|
enableIncomingLinkBoost: z.boolean().optional(),
|
|
@@ -16679,11 +16668,12 @@ var searchSocketConfigSchema = z.object({
|
|
|
16679
16668
|
aggregationDecay: z.number().min(0).max(1).optional(),
|
|
16680
16669
|
minChunkScoreRatio: z.number().min(0).max(1).optional(),
|
|
16681
16670
|
minScore: z.number().min(0).max(1).optional(),
|
|
16671
|
+
scoreGapThreshold: z.number().min(0).max(1).optional(),
|
|
16682
16672
|
weights: z.object({
|
|
16683
16673
|
incomingLinks: z.number().optional(),
|
|
16684
16674
|
depth: z.number().optional(),
|
|
16685
|
-
|
|
16686
|
-
|
|
16675
|
+
aggregation: z.number().optional(),
|
|
16676
|
+
titleMatch: z.number().optional()
|
|
16687
16677
|
}).optional()
|
|
16688
16678
|
}).optional(),
|
|
16689
16679
|
api: z.object({
|
|
@@ -16705,8 +16695,7 @@ var searchSocketConfigSchema = z.object({
|
|
|
16705
16695
|
}).optional()
|
|
16706
16696
|
}).optional(),
|
|
16707
16697
|
state: z.object({
|
|
16708
|
-
dir: z.string().optional()
|
|
16709
|
-
writeMirror: z.boolean().optional()
|
|
16698
|
+
dir: z.string().optional()
|
|
16710
16699
|
}).optional()
|
|
16711
16700
|
});
|
|
16712
16701
|
|
|
@@ -16760,24 +16749,16 @@ function createDefaultConfig(projectId) {
|
|
|
16760
16749
|
prependTitle: true,
|
|
16761
16750
|
pageSummaryChunk: true
|
|
16762
16751
|
},
|
|
16763
|
-
|
|
16764
|
-
|
|
16765
|
-
|
|
16766
|
-
apiKeyEnv: "JINA_API_KEY",
|
|
16767
|
-
batchSize: 64,
|
|
16768
|
-
concurrency: 4
|
|
16769
|
-
},
|
|
16770
|
-
vector: {
|
|
16771
|
-
turso: {
|
|
16772
|
-
urlEnv: "TURSO_DATABASE_URL",
|
|
16773
|
-
authTokenEnv: "TURSO_AUTH_TOKEN",
|
|
16774
|
-
localPath: ".searchsocket/vectors.db"
|
|
16775
|
-
}
|
|
16752
|
+
upstash: {
|
|
16753
|
+
urlEnv: "UPSTASH_SEARCH_REST_URL",
|
|
16754
|
+
tokenEnv: "UPSTASH_SEARCH_REST_TOKEN"
|
|
16776
16755
|
},
|
|
16777
|
-
|
|
16778
|
-
|
|
16779
|
-
|
|
16780
|
-
|
|
16756
|
+
search: {
|
|
16757
|
+
semanticWeight: 0.75,
|
|
16758
|
+
inputEnrichment: true,
|
|
16759
|
+
reranking: true,
|
|
16760
|
+
dualSearch: true,
|
|
16761
|
+
pageSearchWeight: 0.3
|
|
16781
16762
|
},
|
|
16782
16763
|
ranking: {
|
|
16783
16764
|
enableIncomingLinkBoost: true,
|
|
@@ -16786,12 +16767,13 @@ function createDefaultConfig(projectId) {
|
|
|
16786
16767
|
aggregationCap: 5,
|
|
16787
16768
|
aggregationDecay: 0.5,
|
|
16788
16769
|
minChunkScoreRatio: 0.5,
|
|
16789
|
-
minScore: 0,
|
|
16770
|
+
minScore: 0.3,
|
|
16771
|
+
scoreGapThreshold: 0.4,
|
|
16790
16772
|
weights: {
|
|
16791
16773
|
incomingLinks: 0.05,
|
|
16792
16774
|
depth: 0.03,
|
|
16793
|
-
|
|
16794
|
-
|
|
16775
|
+
aggregation: 0.1,
|
|
16776
|
+
titleMatch: 0.15
|
|
16795
16777
|
}
|
|
16796
16778
|
},
|
|
16797
16779
|
api: {
|
|
@@ -16809,8 +16791,7 @@ function createDefaultConfig(projectId) {
|
|
|
16809
16791
|
}
|
|
16810
16792
|
},
|
|
16811
16793
|
state: {
|
|
16812
|
-
dir: ".searchsocket"
|
|
16813
|
-
writeMirror: false
|
|
16794
|
+
dir: ".searchsocket"
|
|
16814
16795
|
}
|
|
16815
16796
|
};
|
|
16816
16797
|
}
|
|
@@ -16934,21 +16915,13 @@ ${issues}`
|
|
|
16934
16915
|
...defaults.chunking,
|
|
16935
16916
|
...parsed.chunking
|
|
16936
16917
|
},
|
|
16937
|
-
|
|
16938
|
-
...defaults.
|
|
16939
|
-
...parsed.
|
|
16918
|
+
upstash: {
|
|
16919
|
+
...defaults.upstash,
|
|
16920
|
+
...parsed.upstash
|
|
16940
16921
|
},
|
|
16941
|
-
|
|
16942
|
-
...defaults.
|
|
16943
|
-
...parsed.
|
|
16944
|
-
turso: {
|
|
16945
|
-
...defaults.vector.turso,
|
|
16946
|
-
...parsed.vector?.turso
|
|
16947
|
-
}
|
|
16948
|
-
},
|
|
16949
|
-
rerank: {
|
|
16950
|
-
...defaults.rerank,
|
|
16951
|
-
...parsed.rerank
|
|
16922
|
+
search: {
|
|
16923
|
+
...defaults.search,
|
|
16924
|
+
...parsed.search
|
|
16952
16925
|
},
|
|
16953
16926
|
ranking: {
|
|
16954
16927
|
...defaults.ranking,
|
|
@@ -17039,128 +17012,6 @@ async function loadConfig(options = {}) {
|
|
|
17039
17012
|
function isServerless() {
|
|
17040
17013
|
return !!(process.env.VERCEL || process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.FUNCTIONS_WORKER || process.env.CF_PAGES);
|
|
17041
17014
|
}
|
|
17042
|
-
function sleep(ms) {
|
|
17043
|
-
return new Promise((resolve) => {
|
|
17044
|
-
setTimeout(resolve, ms);
|
|
17045
|
-
});
|
|
17046
|
-
}
|
|
17047
|
-
var JinaEmbeddingsProvider = class {
|
|
17048
|
-
apiKey;
|
|
17049
|
-
batchSize;
|
|
17050
|
-
concurrency;
|
|
17051
|
-
defaultTask;
|
|
17052
|
-
constructor(options) {
|
|
17053
|
-
if (!Number.isInteger(options.batchSize) || options.batchSize <= 0) {
|
|
17054
|
-
throw new Error(`Invalid batchSize: ${options.batchSize}. batchSize must be a positive integer.`);
|
|
17055
|
-
}
|
|
17056
|
-
if (!Number.isInteger(options.concurrency) || options.concurrency <= 0) {
|
|
17057
|
-
throw new Error(`Invalid concurrency: ${options.concurrency}. concurrency must be a positive integer.`);
|
|
17058
|
-
}
|
|
17059
|
-
this.apiKey = options.apiKey;
|
|
17060
|
-
this.batchSize = options.batchSize;
|
|
17061
|
-
this.concurrency = options.concurrency;
|
|
17062
|
-
this.defaultTask = options.task ?? "retrieval.passage";
|
|
17063
|
-
}
|
|
17064
|
-
estimateTokens(text) {
|
|
17065
|
-
const normalized = text.trim();
|
|
17066
|
-
if (!normalized) {
|
|
17067
|
-
return 0;
|
|
17068
|
-
}
|
|
17069
|
-
const wordCount = normalized.match(/[A-Za-z0-9_]+/g)?.length ?? 0;
|
|
17070
|
-
const punctuationCount = normalized.match(/[^\s\w]/g)?.length ?? 0;
|
|
17071
|
-
const cjkCount = normalized.match(/[\u3400-\u9fff]/g)?.length ?? 0;
|
|
17072
|
-
const charEstimate = Math.ceil(normalized.length / 4);
|
|
17073
|
-
const lexicalEstimate = Math.ceil(wordCount * 1.25 + punctuationCount * 0.45 + cjkCount * 1.6);
|
|
17074
|
-
return Math.max(1, Math.max(charEstimate, lexicalEstimate));
|
|
17075
|
-
}
|
|
17076
|
-
async embedTexts(texts, modelId, task) {
|
|
17077
|
-
if (texts.length === 0) {
|
|
17078
|
-
return [];
|
|
17079
|
-
}
|
|
17080
|
-
const batches = [];
|
|
17081
|
-
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
17082
|
-
batches.push({
|
|
17083
|
-
index: i,
|
|
17084
|
-
values: texts.slice(i, i + this.batchSize)
|
|
17085
|
-
});
|
|
17086
|
-
}
|
|
17087
|
-
const outputs = new Array(batches.length);
|
|
17088
|
-
const limit = pLimit2(this.concurrency);
|
|
17089
|
-
await Promise.all(
|
|
17090
|
-
batches.map(
|
|
17091
|
-
(batch, position) => limit(async () => {
|
|
17092
|
-
outputs[position] = await this.embedWithRetry(batch.values, modelId, task ?? this.defaultTask);
|
|
17093
|
-
})
|
|
17094
|
-
)
|
|
17095
|
-
);
|
|
17096
|
-
return outputs.flat();
|
|
17097
|
-
}
|
|
17098
|
-
async embedWithRetry(texts, modelId, task) {
|
|
17099
|
-
const maxAttempts = 5;
|
|
17100
|
-
let attempt = 0;
|
|
17101
|
-
while (attempt < maxAttempts) {
|
|
17102
|
-
attempt += 1;
|
|
17103
|
-
let response;
|
|
17104
|
-
try {
|
|
17105
|
-
response = await fetch("https://api.jina.ai/v1/embeddings", {
|
|
17106
|
-
method: "POST",
|
|
17107
|
-
headers: {
|
|
17108
|
-
"content-type": "application/json",
|
|
17109
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17110
|
-
},
|
|
17111
|
-
body: JSON.stringify({
|
|
17112
|
-
model: modelId,
|
|
17113
|
-
input: texts,
|
|
17114
|
-
task
|
|
17115
|
-
})
|
|
17116
|
-
});
|
|
17117
|
-
} catch (error) {
|
|
17118
|
-
if (attempt >= maxAttempts) {
|
|
17119
|
-
throw error;
|
|
17120
|
-
}
|
|
17121
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17122
|
-
continue;
|
|
17123
|
-
}
|
|
17124
|
-
if (!response.ok) {
|
|
17125
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17126
|
-
if (!retryable || attempt >= maxAttempts) {
|
|
17127
|
-
const errorBody = await response.text();
|
|
17128
|
-
throw new Error(`Jina embeddings failed (${response.status}): ${errorBody}`);
|
|
17129
|
-
}
|
|
17130
|
-
await sleep(Math.min(2 ** attempt * 300, 5e3));
|
|
17131
|
-
continue;
|
|
17132
|
-
}
|
|
17133
|
-
const payload = await response.json();
|
|
17134
|
-
if (!payload.data || !Array.isArray(payload.data)) {
|
|
17135
|
-
throw new Error("Invalid Jina embeddings response format");
|
|
17136
|
-
}
|
|
17137
|
-
return payload.data.map((entry) => entry.embedding);
|
|
17138
|
-
}
|
|
17139
|
-
throw new Error("Unreachable retry state");
|
|
17140
|
-
}
|
|
17141
|
-
};
|
|
17142
|
-
|
|
17143
|
-
// src/embeddings/factory.ts
|
|
17144
|
-
function createEmbeddingsProvider(config) {
|
|
17145
|
-
if (config.embeddings.provider !== "jina") {
|
|
17146
|
-
throw new SearchSocketError(
|
|
17147
|
-
"CONFIG_MISSING",
|
|
17148
|
-
`Unsupported embeddings provider ${config.embeddings.provider}`
|
|
17149
|
-
);
|
|
17150
|
-
}
|
|
17151
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17152
|
-
if (!apiKey) {
|
|
17153
|
-
throw new SearchSocketError(
|
|
17154
|
-
"CONFIG_MISSING",
|
|
17155
|
-
`Missing embeddings API key: provide embeddings.apiKey or set env var ${config.embeddings.apiKeyEnv}`
|
|
17156
|
-
);
|
|
17157
|
-
}
|
|
17158
|
-
return new JinaEmbeddingsProvider({
|
|
17159
|
-
apiKey,
|
|
17160
|
-
batchSize: config.embeddings.batchSize,
|
|
17161
|
-
concurrency: config.embeddings.concurrency
|
|
17162
|
-
});
|
|
17163
|
-
}
|
|
17164
17015
|
|
|
17165
17016
|
// src/utils/text.ts
|
|
17166
17017
|
function normalizeText(input) {
|
|
@@ -17235,103 +17086,6 @@ function resolveScope(config, override) {
|
|
|
17235
17086
|
};
|
|
17236
17087
|
}
|
|
17237
17088
|
|
|
17238
|
-
// src/rerank/jina.ts
|
|
17239
|
-
function sleep2(ms) {
|
|
17240
|
-
return new Promise((resolve) => {
|
|
17241
|
-
setTimeout(resolve, ms);
|
|
17242
|
-
});
|
|
17243
|
-
}
|
|
17244
|
-
var JinaReranker = class {
|
|
17245
|
-
apiKey;
|
|
17246
|
-
model;
|
|
17247
|
-
maxRetries;
|
|
17248
|
-
constructor(options) {
|
|
17249
|
-
this.apiKey = options.apiKey;
|
|
17250
|
-
this.model = options.model;
|
|
17251
|
-
this.maxRetries = options.maxRetries ?? 2;
|
|
17252
|
-
}
|
|
17253
|
-
async rerank(query, candidates, topN) {
|
|
17254
|
-
if (candidates.length === 0) {
|
|
17255
|
-
return [];
|
|
17256
|
-
}
|
|
17257
|
-
const body = {
|
|
17258
|
-
model: this.model,
|
|
17259
|
-
query,
|
|
17260
|
-
documents: candidates.map((candidate) => candidate.text),
|
|
17261
|
-
top_n: topN ?? candidates.length,
|
|
17262
|
-
return_documents: false
|
|
17263
|
-
};
|
|
17264
|
-
let attempt = 0;
|
|
17265
|
-
while (attempt <= this.maxRetries) {
|
|
17266
|
-
attempt += 1;
|
|
17267
|
-
let response;
|
|
17268
|
-
try {
|
|
17269
|
-
response = await fetch("https://api.jina.ai/v1/rerank", {
|
|
17270
|
-
method: "POST",
|
|
17271
|
-
headers: {
|
|
17272
|
-
"content-type": "application/json",
|
|
17273
|
-
authorization: `Bearer ${this.apiKey}`
|
|
17274
|
-
},
|
|
17275
|
-
body: JSON.stringify(body)
|
|
17276
|
-
});
|
|
17277
|
-
} catch (error) {
|
|
17278
|
-
if (attempt <= this.maxRetries) {
|
|
17279
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17280
|
-
continue;
|
|
17281
|
-
}
|
|
17282
|
-
throw error;
|
|
17283
|
-
}
|
|
17284
|
-
if (!response.ok) {
|
|
17285
|
-
const retryable = response.status === 429 || response.status >= 500;
|
|
17286
|
-
if (retryable && attempt <= this.maxRetries) {
|
|
17287
|
-
await sleep2(Math.min(300 * 2 ** attempt, 4e3));
|
|
17288
|
-
continue;
|
|
17289
|
-
}
|
|
17290
|
-
const errorBody = await response.text();
|
|
17291
|
-
throw new Error(`Jina rerank failed (${response.status}): ${errorBody}`);
|
|
17292
|
-
}
|
|
17293
|
-
const payload = await response.json();
|
|
17294
|
-
const rawResults = payload.results ?? payload.data ?? [];
|
|
17295
|
-
if (!Array.isArray(rawResults)) {
|
|
17296
|
-
throw new Error("Invalid Jina rerank response format");
|
|
17297
|
-
}
|
|
17298
|
-
return rawResults.flatMap((item) => {
|
|
17299
|
-
const index = item.index;
|
|
17300
|
-
if (typeof index !== "number" || index < 0 || index >= candidates.length) {
|
|
17301
|
-
return [];
|
|
17302
|
-
}
|
|
17303
|
-
const candidate = candidates[index];
|
|
17304
|
-
if (!candidate) {
|
|
17305
|
-
return [];
|
|
17306
|
-
}
|
|
17307
|
-
const score = typeof item.relevance_score === "number" ? item.relevance_score : item.score ?? 0;
|
|
17308
|
-
return [
|
|
17309
|
-
{
|
|
17310
|
-
id: candidate.id,
|
|
17311
|
-
score
|
|
17312
|
-
}
|
|
17313
|
-
];
|
|
17314
|
-
}).sort((a, b) => b.score - a.score);
|
|
17315
|
-
}
|
|
17316
|
-
throw new Error("Jina rerank request failed after retries");
|
|
17317
|
-
}
|
|
17318
|
-
};
|
|
17319
|
-
|
|
17320
|
-
// src/rerank/factory.ts
|
|
17321
|
-
function createReranker(config) {
|
|
17322
|
-
if (!config.rerank.enabled) {
|
|
17323
|
-
return null;
|
|
17324
|
-
}
|
|
17325
|
-
const apiKey = config.embeddings.apiKey ?? process.env[config.embeddings.apiKeyEnv];
|
|
17326
|
-
if (!apiKey) {
|
|
17327
|
-
return null;
|
|
17328
|
-
}
|
|
17329
|
-
return new JinaReranker({
|
|
17330
|
-
apiKey,
|
|
17331
|
-
model: config.rerank.model
|
|
17332
|
-
});
|
|
17333
|
-
}
|
|
17334
|
-
|
|
17335
17089
|
// src/utils/time.ts
|
|
17336
17090
|
function nowIso() {
|
|
17337
17091
|
return (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -17350,13 +17104,6 @@ function normalizeUrlPath(rawPath) {
|
|
|
17350
17104
|
}
|
|
17351
17105
|
return out;
|
|
17352
17106
|
}
|
|
17353
|
-
function urlPathToMirrorRelative(urlPath) {
|
|
17354
|
-
const normalized = normalizeUrlPath(urlPath);
|
|
17355
|
-
if (normalized === "/") {
|
|
17356
|
-
return "index.md";
|
|
17357
|
-
}
|
|
17358
|
-
return `${normalized.slice(1)}.md`;
|
|
17359
|
-
}
|
|
17360
17107
|
function staticHtmlFileToUrl(filePath, rootDir) {
|
|
17361
17108
|
const relative = path.relative(rootDir, filePath).replace(/\\/g, "/");
|
|
17362
17109
|
if (relative === "index.html") {
|
|
@@ -17390,434 +17137,239 @@ function joinUrl(baseUrl, route) {
|
|
|
17390
17137
|
return `${base}${routePart}`;
|
|
17391
17138
|
}
|
|
17392
17139
|
|
|
17393
|
-
// src/vector/
|
|
17394
|
-
|
|
17140
|
+
// src/vector/upstash.ts
|
|
17141
|
+
function chunkIndexName(scope) {
|
|
17142
|
+
return `${scope.projectId}--${scope.scopeName}`;
|
|
17143
|
+
}
|
|
17144
|
+
function pageIndexName(scope) {
|
|
17145
|
+
return `${scope.projectId}--${scope.scopeName}--pages`;
|
|
17146
|
+
}
|
|
17147
|
+
var UpstashSearchStore = class {
|
|
17395
17148
|
client;
|
|
17396
|
-
dimension;
|
|
17397
|
-
chunksReady = false;
|
|
17398
|
-
registryReady = false;
|
|
17399
|
-
pagesReady = false;
|
|
17400
17149
|
constructor(opts) {
|
|
17401
17150
|
this.client = opts.client;
|
|
17402
|
-
this.dimension = opts.dimension;
|
|
17403
|
-
}
|
|
17404
|
-
async ensureRegistry() {
|
|
17405
|
-
if (this.registryReady) return;
|
|
17406
|
-
await this.client.execute(`
|
|
17407
|
-
CREATE TABLE IF NOT EXISTS registry (
|
|
17408
|
-
scope_key TEXT PRIMARY KEY,
|
|
17409
|
-
project_id TEXT NOT NULL,
|
|
17410
|
-
scope_name TEXT NOT NULL,
|
|
17411
|
-
model_id TEXT NOT NULL,
|
|
17412
|
-
last_indexed_at TEXT NOT NULL,
|
|
17413
|
-
vector_count INTEGER,
|
|
17414
|
-
last_estimate_tokens INTEGER,
|
|
17415
|
-
last_estimate_cost_usd REAL,
|
|
17416
|
-
last_estimate_changed_chunks INTEGER
|
|
17417
|
-
)
|
|
17418
|
-
`);
|
|
17419
|
-
const estimateCols = [
|
|
17420
|
-
{ name: "last_estimate_tokens", def: "INTEGER" },
|
|
17421
|
-
{ name: "last_estimate_cost_usd", def: "REAL" },
|
|
17422
|
-
{ name: "last_estimate_changed_chunks", def: "INTEGER" }
|
|
17423
|
-
];
|
|
17424
|
-
for (const col of estimateCols) {
|
|
17425
|
-
try {
|
|
17426
|
-
await this.client.execute(`ALTER TABLE registry ADD COLUMN ${col.name} ${col.def}`);
|
|
17427
|
-
} catch (error) {
|
|
17428
|
-
if (error instanceof Error && !error.message.includes("duplicate column")) {
|
|
17429
|
-
throw error;
|
|
17430
|
-
}
|
|
17431
|
-
}
|
|
17432
|
-
}
|
|
17433
|
-
this.registryReady = true;
|
|
17434
|
-
}
|
|
17435
|
-
async ensureChunks(dim) {
|
|
17436
|
-
if (this.chunksReady) return;
|
|
17437
|
-
const exists = await this.chunksTableExists();
|
|
17438
|
-
if (exists) {
|
|
17439
|
-
const currentDim = await this.getChunksDimension();
|
|
17440
|
-
if (currentDim !== null && currentDim !== dim) {
|
|
17441
|
-
await this.client.batch([
|
|
17442
|
-
"DROP INDEX IF EXISTS idx",
|
|
17443
|
-
"DROP TABLE IF EXISTS chunks"
|
|
17444
|
-
]);
|
|
17445
|
-
}
|
|
17446
|
-
}
|
|
17447
|
-
await this.client.batch([
|
|
17448
|
-
`CREATE TABLE IF NOT EXISTS chunks (
|
|
17449
|
-
id TEXT PRIMARY KEY,
|
|
17450
|
-
project_id TEXT NOT NULL,
|
|
17451
|
-
scope_name TEXT NOT NULL,
|
|
17452
|
-
url TEXT NOT NULL,
|
|
17453
|
-
path TEXT NOT NULL,
|
|
17454
|
-
title TEXT NOT NULL,
|
|
17455
|
-
section_title TEXT NOT NULL DEFAULT '',
|
|
17456
|
-
heading_path TEXT NOT NULL DEFAULT '[]',
|
|
17457
|
-
snippet TEXT NOT NULL DEFAULT '',
|
|
17458
|
-
chunk_text TEXT NOT NULL DEFAULT '',
|
|
17459
|
-
ordinal INTEGER NOT NULL DEFAULT 0,
|
|
17460
|
-
content_hash TEXT NOT NULL DEFAULT '',
|
|
17461
|
-
model_id TEXT NOT NULL DEFAULT '',
|
|
17462
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17463
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17464
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17465
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17466
|
-
description TEXT NOT NULL DEFAULT '',
|
|
17467
|
-
keywords TEXT NOT NULL DEFAULT '[]',
|
|
17468
|
-
embedding F32_BLOB(${dim})
|
|
17469
|
-
)`,
|
|
17470
|
-
`CREATE INDEX IF NOT EXISTS idx ON chunks (libsql_vector_idx(embedding, 'metric=cosine'))`
|
|
17471
|
-
]);
|
|
17472
|
-
this.chunksReady = true;
|
|
17473
|
-
}
|
|
17474
|
-
async ensurePages() {
|
|
17475
|
-
if (this.pagesReady) return;
|
|
17476
|
-
await this.client.execute(`
|
|
17477
|
-
CREATE TABLE IF NOT EXISTS pages (
|
|
17478
|
-
project_id TEXT NOT NULL,
|
|
17479
|
-
scope_name TEXT NOT NULL,
|
|
17480
|
-
url TEXT NOT NULL,
|
|
17481
|
-
title TEXT NOT NULL,
|
|
17482
|
-
markdown TEXT NOT NULL,
|
|
17483
|
-
route_file TEXT NOT NULL DEFAULT '',
|
|
17484
|
-
route_resolution TEXT NOT NULL DEFAULT 'exact',
|
|
17485
|
-
incoming_links INTEGER NOT NULL DEFAULT 0,
|
|
17486
|
-
outgoing_links INTEGER NOT NULL DEFAULT 0,
|
|
17487
|
-
depth INTEGER NOT NULL DEFAULT 0,
|
|
17488
|
-
tags TEXT NOT NULL DEFAULT '[]',
|
|
17489
|
-
indexed_at TEXT NOT NULL,
|
|
17490
|
-
PRIMARY KEY (project_id, scope_name, url)
|
|
17491
|
-
)
|
|
17492
|
-
`);
|
|
17493
|
-
this.pagesReady = true;
|
|
17494
17151
|
}
|
|
17495
|
-
|
|
17496
|
-
|
|
17497
|
-
await this.client.execute("SELECT 1 FROM chunks LIMIT 0");
|
|
17498
|
-
return true;
|
|
17499
|
-
} catch (error) {
|
|
17500
|
-
if (error instanceof Error && error.message.includes("no such table")) {
|
|
17501
|
-
return false;
|
|
17502
|
-
}
|
|
17503
|
-
throw error;
|
|
17504
|
-
}
|
|
17152
|
+
chunkIndex(scope) {
|
|
17153
|
+
return this.client.index(chunkIndexName(scope));
|
|
17505
17154
|
}
|
|
17506
|
-
|
|
17507
|
-
|
|
17508
|
-
* Returns null if the table doesn't exist or the dimension can't be parsed.
|
|
17509
|
-
*/
|
|
17510
|
-
async getChunksDimension() {
|
|
17511
|
-
try {
|
|
17512
|
-
const rs = await this.client.execute(
|
|
17513
|
-
"SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'"
|
|
17514
|
-
);
|
|
17515
|
-
if (rs.rows.length === 0) return null;
|
|
17516
|
-
const sql = rs.rows[0].sql;
|
|
17517
|
-
const match = sql.match(/F32_BLOB\((\d+)\)/i);
|
|
17518
|
-
return match ? parseInt(match[1], 10) : null;
|
|
17519
|
-
} catch {
|
|
17520
|
-
return null;
|
|
17521
|
-
}
|
|
17155
|
+
pageIndex(scope) {
|
|
17156
|
+
return this.client.index(pageIndexName(scope));
|
|
17522
17157
|
}
|
|
17523
|
-
|
|
17524
|
-
|
|
17525
|
-
|
|
17526
|
-
*/
|
|
17527
|
-
async dropAllTables() {
|
|
17528
|
-
await this.client.batch([
|
|
17529
|
-
"DROP INDEX IF EXISTS idx",
|
|
17530
|
-
"DROP TABLE IF EXISTS chunks",
|
|
17531
|
-
"DROP TABLE IF EXISTS registry",
|
|
17532
|
-
"DROP TABLE IF EXISTS pages"
|
|
17533
|
-
]);
|
|
17534
|
-
this.chunksReady = false;
|
|
17535
|
-
this.registryReady = false;
|
|
17536
|
-
this.pagesReady = false;
|
|
17537
|
-
}
|
|
17538
|
-
async upsert(records, _scope) {
|
|
17539
|
-
if (records.length === 0) return;
|
|
17540
|
-
const dim = this.dimension ?? records[0].vector.length;
|
|
17541
|
-
await this.ensureChunks(dim);
|
|
17158
|
+
async upsertChunks(chunks, scope) {
|
|
17159
|
+
if (chunks.length === 0) return;
|
|
17160
|
+
const index = this.chunkIndex(scope);
|
|
17542
17161
|
const BATCH_SIZE = 100;
|
|
17543
|
-
for (let i = 0; i <
|
|
17544
|
-
const batch =
|
|
17545
|
-
|
|
17546
|
-
sql: `INSERT OR REPLACE INTO chunks
|
|
17547
|
-
(id, project_id, scope_name, url, path, title, section_title,
|
|
17548
|
-
heading_path, snippet, chunk_text, ordinal, content_hash, model_id, depth,
|
|
17549
|
-
incoming_links, route_file, tags, description, keywords, embedding)
|
|
17550
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, vector(?))`,
|
|
17551
|
-
args: [
|
|
17552
|
-
r.id,
|
|
17553
|
-
r.metadata.projectId,
|
|
17554
|
-
r.metadata.scopeName,
|
|
17555
|
-
r.metadata.url,
|
|
17556
|
-
r.metadata.path,
|
|
17557
|
-
r.metadata.title,
|
|
17558
|
-
r.metadata.sectionTitle,
|
|
17559
|
-
JSON.stringify(r.metadata.headingPath),
|
|
17560
|
-
r.metadata.snippet,
|
|
17561
|
-
r.metadata.chunkText,
|
|
17562
|
-
r.metadata.ordinal,
|
|
17563
|
-
r.metadata.contentHash,
|
|
17564
|
-
r.metadata.modelId,
|
|
17565
|
-
r.metadata.depth,
|
|
17566
|
-
r.metadata.incomingLinks,
|
|
17567
|
-
r.metadata.routeFile,
|
|
17568
|
-
JSON.stringify(r.metadata.tags),
|
|
17569
|
-
r.metadata.description ?? "",
|
|
17570
|
-
JSON.stringify(r.metadata.keywords ?? []),
|
|
17571
|
-
JSON.stringify(r.vector)
|
|
17572
|
-
]
|
|
17573
|
-
}));
|
|
17574
|
-
await this.client.batch(stmts);
|
|
17162
|
+
for (let i = 0; i < chunks.length; i += BATCH_SIZE) {
|
|
17163
|
+
const batch = chunks.slice(i, i + BATCH_SIZE);
|
|
17164
|
+
await index.upsert(batch);
|
|
17575
17165
|
}
|
|
17576
17166
|
}
|
|
17577
|
-
async query
|
|
17578
|
-
const
|
|
17579
|
-
await
|
|
17580
|
-
|
|
17581
|
-
|
|
17582
|
-
|
|
17583
|
-
|
|
17584
|
-
|
|
17585
|
-
|
|
17586
|
-
c.description, c.keywords,
|
|
17587
|
-
vector_distance_cos(c.embedding, vector(?)) AS distance
|
|
17588
|
-
FROM vector_top_k('idx', vector(?), ?) AS v
|
|
17589
|
-
JOIN chunks AS c ON c.rowid = v.id`,
|
|
17590
|
-
args: [queryJson, queryJson, opts.topK]
|
|
17167
|
+
async search(query, opts, scope) {
|
|
17168
|
+
const index = this.chunkIndex(scope);
|
|
17169
|
+
const results = await index.search({
|
|
17170
|
+
query,
|
|
17171
|
+
limit: opts.limit,
|
|
17172
|
+
semanticWeight: opts.semanticWeight,
|
|
17173
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17174
|
+
reranking: opts.reranking,
|
|
17175
|
+
filter: opts.filter
|
|
17591
17176
|
});
|
|
17592
|
-
|
|
17593
|
-
|
|
17594
|
-
|
|
17595
|
-
|
|
17596
|
-
|
|
17597
|
-
|
|
17598
|
-
|
|
17599
|
-
|
|
17600
|
-
|
|
17601
|
-
|
|
17602
|
-
|
|
17603
|
-
|
|
17604
|
-
|
|
17605
|
-
|
|
17606
|
-
|
|
17607
|
-
|
|
17608
|
-
|
|
17609
|
-
|
|
17610
|
-
|
|
17611
|
-
|
|
17612
|
-
|
|
17613
|
-
}
|
|
17177
|
+
return results.map((doc) => ({
|
|
17178
|
+
id: doc.id,
|
|
17179
|
+
score: doc.score,
|
|
17180
|
+
metadata: {
|
|
17181
|
+
projectId: doc.metadata?.projectId ?? "",
|
|
17182
|
+
scopeName: doc.metadata?.scopeName ?? "",
|
|
17183
|
+
url: doc.content.url,
|
|
17184
|
+
path: doc.metadata?.path ?? "",
|
|
17185
|
+
title: doc.content.title,
|
|
17186
|
+
sectionTitle: doc.content.sectionTitle,
|
|
17187
|
+
headingPath: doc.content.headingPath ? doc.content.headingPath.split(" > ").filter(Boolean) : [],
|
|
17188
|
+
snippet: doc.metadata?.snippet ?? "",
|
|
17189
|
+
chunkText: doc.content.text,
|
|
17190
|
+
ordinal: doc.metadata?.ordinal ?? 0,
|
|
17191
|
+
contentHash: doc.metadata?.contentHash ?? "",
|
|
17192
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17193
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17194
|
+
routeFile: doc.metadata?.routeFile ?? "",
|
|
17195
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17196
|
+
description: doc.metadata?.description || void 0,
|
|
17197
|
+
keywords: doc.metadata?.keywords ? doc.metadata.keywords.split(",").filter(Boolean) : void 0
|
|
17614
17198
|
}
|
|
17615
|
-
|
|
17616
|
-
|
|
17617
|
-
|
|
17618
|
-
|
|
17619
|
-
|
|
17620
|
-
|
|
17621
|
-
|
|
17622
|
-
|
|
17623
|
-
|
|
17624
|
-
|
|
17625
|
-
|
|
17626
|
-
|
|
17627
|
-
|
|
17628
|
-
scopeName,
|
|
17629
|
-
url: row.url,
|
|
17630
|
-
path: rowPath,
|
|
17631
|
-
title: row.title,
|
|
17632
|
-
sectionTitle: row.section_title,
|
|
17633
|
-
headingPath: JSON.parse(row.heading_path || "[]"),
|
|
17634
|
-
snippet: row.snippet,
|
|
17635
|
-
chunkText: row.chunk_text || "",
|
|
17636
|
-
ordinal: row.ordinal || 0,
|
|
17637
|
-
contentHash: row.content_hash,
|
|
17638
|
-
modelId: row.model_id,
|
|
17639
|
-
depth: row.depth,
|
|
17640
|
-
incomingLinks: row.incoming_links,
|
|
17641
|
-
routeFile: row.route_file,
|
|
17642
|
-
tags,
|
|
17643
|
-
description,
|
|
17644
|
-
keywords
|
|
17645
|
-
}
|
|
17199
|
+
}));
|
|
17200
|
+
}
|
|
17201
|
+
async searchPages(query, opts, scope) {
|
|
17202
|
+
const index = this.pageIndex(scope);
|
|
17203
|
+
let results;
|
|
17204
|
+
try {
|
|
17205
|
+
results = await index.search({
|
|
17206
|
+
query,
|
|
17207
|
+
limit: opts.limit,
|
|
17208
|
+
semanticWeight: opts.semanticWeight,
|
|
17209
|
+
inputEnrichment: opts.inputEnrichment,
|
|
17210
|
+
reranking: true,
|
|
17211
|
+
filter: opts.filter
|
|
17646
17212
|
});
|
|
17213
|
+
} catch {
|
|
17214
|
+
return [];
|
|
17647
17215
|
}
|
|
17648
|
-
|
|
17649
|
-
|
|
17216
|
+
return results.map((doc) => ({
|
|
17217
|
+
id: doc.id,
|
|
17218
|
+
score: doc.score,
|
|
17219
|
+
title: doc.content.title,
|
|
17220
|
+
url: doc.content.url,
|
|
17221
|
+
description: doc.content.description ?? "",
|
|
17222
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17223
|
+
depth: doc.metadata?.depth ?? 0,
|
|
17224
|
+
incomingLinks: doc.metadata?.incomingLinks ?? 0,
|
|
17225
|
+
routeFile: doc.metadata?.routeFile ?? ""
|
|
17226
|
+
}));
|
|
17650
17227
|
}
|
|
17651
17228
|
async deleteByIds(ids, scope) {
|
|
17652
17229
|
if (ids.length === 0) return;
|
|
17230
|
+
const index = this.chunkIndex(scope);
|
|
17653
17231
|
const BATCH_SIZE = 500;
|
|
17654
17232
|
for (let i = 0; i < ids.length; i += BATCH_SIZE) {
|
|
17655
17233
|
const batch = ids.slice(i, i + BATCH_SIZE);
|
|
17656
|
-
|
|
17657
|
-
await this.client.execute({
|
|
17658
|
-
sql: `DELETE FROM chunks WHERE project_id = ? AND scope_name = ? AND id IN (${placeholders})`,
|
|
17659
|
-
args: [scope.projectId, scope.scopeName, ...batch]
|
|
17660
|
-
});
|
|
17234
|
+
await index.delete(batch);
|
|
17661
17235
|
}
|
|
17662
17236
|
}
|
|
17663
17237
|
async deleteScope(scope) {
|
|
17664
|
-
await this.ensureRegistry();
|
|
17665
17238
|
try {
|
|
17666
|
-
|
|
17667
|
-
|
|
17668
|
-
|
|
17669
|
-
});
|
|
17670
|
-
} catch (error) {
|
|
17671
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17672
|
-
throw error;
|
|
17673
|
-
}
|
|
17239
|
+
const chunkIdx = this.chunkIndex(scope);
|
|
17240
|
+
await chunkIdx.deleteIndex();
|
|
17241
|
+
} catch {
|
|
17674
17242
|
}
|
|
17675
17243
|
try {
|
|
17676
|
-
|
|
17677
|
-
|
|
17678
|
-
|
|
17679
|
-
});
|
|
17680
|
-
} catch (error) {
|
|
17681
|
-
if (error instanceof Error && !error.message.includes("no such table")) {
|
|
17682
|
-
throw error;
|
|
17683
|
-
}
|
|
17244
|
+
const pageIdx = this.pageIndex(scope);
|
|
17245
|
+
await pageIdx.deleteIndex();
|
|
17246
|
+
} catch {
|
|
17684
17247
|
}
|
|
17685
|
-
await this.client.execute({
|
|
17686
|
-
sql: `DELETE FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17687
|
-
args: [scope.projectId, scope.scopeName]
|
|
17688
|
-
});
|
|
17689
|
-
}
|
|
17690
|
-
async listScopes(scopeProjectId) {
|
|
17691
|
-
await this.ensureRegistry();
|
|
17692
|
-
const rs = await this.client.execute({
|
|
17693
|
-
sql: `SELECT project_id, scope_name, model_id, last_indexed_at, vector_count,
|
|
17694
|
-
last_estimate_tokens, last_estimate_cost_usd, last_estimate_changed_chunks
|
|
17695
|
-
FROM registry WHERE project_id = ?`,
|
|
17696
|
-
args: [scopeProjectId]
|
|
17697
|
-
});
|
|
17698
|
-
return rs.rows.map((row) => ({
|
|
17699
|
-
projectId: row.project_id,
|
|
17700
|
-
scopeName: row.scope_name,
|
|
17701
|
-
modelId: row.model_id,
|
|
17702
|
-
lastIndexedAt: row.last_indexed_at,
|
|
17703
|
-
vectorCount: row.vector_count,
|
|
17704
|
-
lastEstimateTokens: row.last_estimate_tokens,
|
|
17705
|
-
lastEstimateCostUSD: row.last_estimate_cost_usd,
|
|
17706
|
-
lastEstimateChangedChunks: row.last_estimate_changed_chunks
|
|
17707
|
-
}));
|
|
17708
17248
|
}
|
|
17709
|
-
async
|
|
17710
|
-
await this.
|
|
17711
|
-
const
|
|
17712
|
-
|
|
17713
|
-
|
|
17714
|
-
|
|
17715
|
-
|
|
17716
|
-
|
|
17717
|
-
|
|
17718
|
-
|
|
17719
|
-
|
|
17720
|
-
|
|
17721
|
-
|
|
17722
|
-
|
|
17723
|
-
|
|
17724
|
-
|
|
17725
|
-
|
|
17726
|
-
|
|
17727
|
-
|
|
17728
|
-
|
|
17249
|
+
async listScopes(projectId) {
|
|
17250
|
+
const allIndexes = await this.client.listIndexes();
|
|
17251
|
+
const prefix = `${projectId}--`;
|
|
17252
|
+
const scopeNames = /* @__PURE__ */ new Set();
|
|
17253
|
+
for (const name of allIndexes) {
|
|
17254
|
+
if (name.startsWith(prefix) && !name.endsWith("--pages")) {
|
|
17255
|
+
const scopeName = name.slice(prefix.length);
|
|
17256
|
+
scopeNames.add(scopeName);
|
|
17257
|
+
}
|
|
17258
|
+
}
|
|
17259
|
+
const scopes = [];
|
|
17260
|
+
for (const scopeName of scopeNames) {
|
|
17261
|
+
const scope = {
|
|
17262
|
+
projectId,
|
|
17263
|
+
scopeName,
|
|
17264
|
+
scopeId: `${projectId}:${scopeName}`
|
|
17265
|
+
};
|
|
17266
|
+
try {
|
|
17267
|
+
const info = await this.chunkIndex(scope).info();
|
|
17268
|
+
scopes.push({
|
|
17269
|
+
projectId,
|
|
17270
|
+
scopeName,
|
|
17271
|
+
lastIndexedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
17272
|
+
documentCount: info.documentCount
|
|
17273
|
+
});
|
|
17274
|
+
} catch {
|
|
17275
|
+
scopes.push({
|
|
17276
|
+
projectId,
|
|
17277
|
+
scopeName,
|
|
17278
|
+
lastIndexedAt: "unknown",
|
|
17279
|
+
documentCount: 0
|
|
17280
|
+
});
|
|
17281
|
+
}
|
|
17282
|
+
}
|
|
17283
|
+
return scopes;
|
|
17729
17284
|
}
|
|
17730
17285
|
async getContentHashes(scope) {
|
|
17731
|
-
const exists = await this.chunksTableExists();
|
|
17732
|
-
if (!exists) return /* @__PURE__ */ new Map();
|
|
17733
|
-
const rs = await this.client.execute({
|
|
17734
|
-
sql: `SELECT id, content_hash FROM chunks WHERE project_id = ? AND scope_name = ?`,
|
|
17735
|
-
args: [scope.projectId, scope.scopeName]
|
|
17736
|
-
});
|
|
17737
17286
|
const map = /* @__PURE__ */ new Map();
|
|
17738
|
-
|
|
17739
|
-
|
|
17287
|
+
const index = this.chunkIndex(scope);
|
|
17288
|
+
let cursor = "0";
|
|
17289
|
+
try {
|
|
17290
|
+
for (; ; ) {
|
|
17291
|
+
const result = await index.range({ cursor, limit: 100 });
|
|
17292
|
+
for (const doc of result.documents) {
|
|
17293
|
+
if (doc.metadata?.contentHash) {
|
|
17294
|
+
map.set(doc.id, doc.metadata.contentHash);
|
|
17295
|
+
}
|
|
17296
|
+
}
|
|
17297
|
+
if (!result.nextCursor || result.nextCursor === "0") break;
|
|
17298
|
+
cursor = result.nextCursor;
|
|
17299
|
+
}
|
|
17300
|
+
} catch {
|
|
17740
17301
|
}
|
|
17741
17302
|
return map;
|
|
17742
17303
|
}
|
|
17743
17304
|
async upsertPages(pages, scope) {
|
|
17744
17305
|
if (pages.length === 0) return;
|
|
17745
|
-
|
|
17746
|
-
|
|
17747
|
-
if (page.projectId !== scope.projectId || page.scopeName !== scope.scopeName) {
|
|
17748
|
-
throw new Error(
|
|
17749
|
-
`Page scope mismatch: page has ${page.projectId}:${page.scopeName} but scope is ${scope.projectId}:${scope.scopeName}`
|
|
17750
|
-
);
|
|
17751
|
-
}
|
|
17752
|
-
}
|
|
17753
|
-
const BATCH_SIZE = 100;
|
|
17306
|
+
const index = this.pageIndex(scope);
|
|
17307
|
+
const BATCH_SIZE = 50;
|
|
17754
17308
|
for (let i = 0; i < pages.length; i += BATCH_SIZE) {
|
|
17755
17309
|
const batch = pages.slice(i, i + BATCH_SIZE);
|
|
17756
|
-
const
|
|
17757
|
-
|
|
17758
|
-
|
|
17759
|
-
|
|
17760
|
-
|
|
17761
|
-
|
|
17762
|
-
p.
|
|
17763
|
-
p.
|
|
17764
|
-
p.
|
|
17765
|
-
p.
|
|
17766
|
-
|
|
17767
|
-
|
|
17768
|
-
p.
|
|
17769
|
-
p.
|
|
17770
|
-
p.
|
|
17771
|
-
p.
|
|
17772
|
-
|
|
17773
|
-
p.
|
|
17774
|
-
|
|
17310
|
+
const docs = batch.map((p) => ({
|
|
17311
|
+
id: p.url,
|
|
17312
|
+
content: {
|
|
17313
|
+
title: p.title,
|
|
17314
|
+
url: p.url,
|
|
17315
|
+
type: "page",
|
|
17316
|
+
description: p.description ?? "",
|
|
17317
|
+
keywords: (p.keywords ?? []).join(","),
|
|
17318
|
+
summary: p.summary ?? "",
|
|
17319
|
+
tags: p.tags.join(",")
|
|
17320
|
+
},
|
|
17321
|
+
metadata: {
|
|
17322
|
+
markdown: p.markdown,
|
|
17323
|
+
projectId: p.projectId,
|
|
17324
|
+
scopeName: p.scopeName,
|
|
17325
|
+
routeFile: p.routeFile,
|
|
17326
|
+
routeResolution: p.routeResolution,
|
|
17327
|
+
incomingLinks: p.incomingLinks,
|
|
17328
|
+
outgoingLinks: p.outgoingLinks,
|
|
17329
|
+
depth: p.depth,
|
|
17330
|
+
indexedAt: p.indexedAt
|
|
17331
|
+
}
|
|
17775
17332
|
}));
|
|
17776
|
-
await
|
|
17333
|
+
await index.upsert(docs);
|
|
17777
17334
|
}
|
|
17778
17335
|
}
|
|
17779
17336
|
async getPage(url, scope) {
|
|
17780
|
-
|
|
17781
|
-
|
|
17782
|
-
|
|
17783
|
-
|
|
17784
|
-
|
|
17785
|
-
|
|
17786
|
-
|
|
17787
|
-
|
|
17788
|
-
|
|
17789
|
-
|
|
17790
|
-
|
|
17791
|
-
|
|
17792
|
-
|
|
17793
|
-
|
|
17794
|
-
|
|
17795
|
-
|
|
17796
|
-
|
|
17797
|
-
|
|
17798
|
-
|
|
17799
|
-
|
|
17800
|
-
|
|
17337
|
+
const index = this.pageIndex(scope);
|
|
17338
|
+
try {
|
|
17339
|
+
const results = await index.fetch([url]);
|
|
17340
|
+
const doc = results[0];
|
|
17341
|
+
if (!doc) return null;
|
|
17342
|
+
return {
|
|
17343
|
+
url: doc.content.url,
|
|
17344
|
+
title: doc.content.title,
|
|
17345
|
+
markdown: doc.metadata.markdown,
|
|
17346
|
+
projectId: doc.metadata.projectId,
|
|
17347
|
+
scopeName: doc.metadata.scopeName,
|
|
17348
|
+
routeFile: doc.metadata.routeFile,
|
|
17349
|
+
routeResolution: doc.metadata.routeResolution,
|
|
17350
|
+
incomingLinks: doc.metadata.incomingLinks,
|
|
17351
|
+
outgoingLinks: doc.metadata.outgoingLinks,
|
|
17352
|
+
depth: doc.metadata.depth,
|
|
17353
|
+
tags: doc.content.tags ? doc.content.tags.split(",").filter(Boolean) : [],
|
|
17354
|
+
indexedAt: doc.metadata.indexedAt,
|
|
17355
|
+
summary: doc.content.summary || void 0,
|
|
17356
|
+
description: doc.content.description || void 0,
|
|
17357
|
+
keywords: doc.content.keywords ? doc.content.keywords.split(",").filter(Boolean) : void 0
|
|
17358
|
+
};
|
|
17359
|
+
} catch {
|
|
17360
|
+
return null;
|
|
17361
|
+
}
|
|
17801
17362
|
}
|
|
17802
17363
|
async deletePages(scope) {
|
|
17803
|
-
|
|
17804
|
-
|
|
17805
|
-
|
|
17806
|
-
|
|
17807
|
-
}
|
|
17808
|
-
}
|
|
17809
|
-
async getScopeModelId(scope) {
|
|
17810
|
-
await this.ensureRegistry();
|
|
17811
|
-
const rs = await this.client.execute({
|
|
17812
|
-
sql: `SELECT model_id FROM registry WHERE project_id = ? AND scope_name = ?`,
|
|
17813
|
-
args: [scope.projectId, scope.scopeName]
|
|
17814
|
-
});
|
|
17815
|
-
if (rs.rows.length === 0) return null;
|
|
17816
|
-
return rs.rows[0].model_id;
|
|
17364
|
+
try {
|
|
17365
|
+
const index = this.pageIndex(scope);
|
|
17366
|
+
await index.reset();
|
|
17367
|
+
} catch {
|
|
17368
|
+
}
|
|
17817
17369
|
}
|
|
17818
17370
|
async health() {
|
|
17819
17371
|
try {
|
|
17820
|
-
await this.client.
|
|
17372
|
+
await this.client.info();
|
|
17821
17373
|
return { ok: true };
|
|
17822
17374
|
} catch (error) {
|
|
17823
17375
|
return {
|
|
@@ -17826,40 +17378,34 @@ var TursoVectorStore = class {
|
|
|
17826
17378
|
};
|
|
17827
17379
|
}
|
|
17828
17380
|
}
|
|
17381
|
+
async dropAllIndexes(projectId) {
|
|
17382
|
+
const allIndexes = await this.client.listIndexes();
|
|
17383
|
+
const prefix = `${projectId}--`;
|
|
17384
|
+
for (const name of allIndexes) {
|
|
17385
|
+
if (name.startsWith(prefix)) {
|
|
17386
|
+
try {
|
|
17387
|
+
const index = this.client.index(name);
|
|
17388
|
+
await index.deleteIndex();
|
|
17389
|
+
} catch {
|
|
17390
|
+
}
|
|
17391
|
+
}
|
|
17392
|
+
}
|
|
17393
|
+
}
|
|
17829
17394
|
};
|
|
17830
17395
|
|
|
17831
17396
|
// src/vector/factory.ts
|
|
17832
|
-
async function
|
|
17833
|
-
const
|
|
17834
|
-
const
|
|
17835
|
-
if (
|
|
17836
|
-
const { createClient: createClient2 } = await import('@libsql/client/http');
|
|
17837
|
-
const authToken = turso.authToken ?? process.env[turso.authTokenEnv];
|
|
17838
|
-
const client2 = createClient2({
|
|
17839
|
-
url: remoteUrl,
|
|
17840
|
-
authToken
|
|
17841
|
-
});
|
|
17842
|
-
return new TursoVectorStore({
|
|
17843
|
-
client: client2,
|
|
17844
|
-
dimension: config.vector.dimension
|
|
17845
|
-
});
|
|
17846
|
-
}
|
|
17847
|
-
if (isServerless()) {
|
|
17397
|
+
async function createUpstashStore(config) {
|
|
17398
|
+
const url = config.upstash.url ?? process.env[config.upstash.urlEnv];
|
|
17399
|
+
const token = config.upstash.token ?? process.env[config.upstash.tokenEnv];
|
|
17400
|
+
if (!url || !token) {
|
|
17848
17401
|
throw new SearchSocketError(
|
|
17849
17402
|
"VECTOR_BACKEND_UNAVAILABLE",
|
|
17850
|
-
`
|
|
17403
|
+
`Missing Upstash Search credentials. Set ${config.upstash.urlEnv} and ${config.upstash.tokenEnv} environment variables, or pass upstash.url and upstash.token in your config.`
|
|
17851
17404
|
);
|
|
17852
17405
|
}
|
|
17853
|
-
const {
|
|
17854
|
-
const
|
|
17855
|
-
|
|
17856
|
-
const client = createClient({
|
|
17857
|
-
url: `file:${localPath}`
|
|
17858
|
-
});
|
|
17859
|
-
return new TursoVectorStore({
|
|
17860
|
-
client,
|
|
17861
|
-
dimension: config.vector.dimension
|
|
17862
|
-
});
|
|
17406
|
+
const { Search } = await import('@upstash/search');
|
|
17407
|
+
const client = new Search({ url, token });
|
|
17408
|
+
return new UpstashSearchStore({ client });
|
|
17863
17409
|
}
|
|
17864
17410
|
|
|
17865
17411
|
// src/utils/pattern.ts
|
|
@@ -17899,7 +17445,12 @@ function nonNegativeOrZero(value) {
|
|
|
17899
17445
|
}
|
|
17900
17446
|
return Math.max(0, value);
|
|
17901
17447
|
}
|
|
17902
|
-
function
|
|
17448
|
+
function normalizeForTitleMatch(text) {
|
|
17449
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, "").replace(/\s+/g, " ").trim();
|
|
17450
|
+
}
|
|
17451
|
+
function rankHits(hits, config, query) {
|
|
17452
|
+
const normalizedQuery = query ? normalizeForTitleMatch(query) : "";
|
|
17453
|
+
const titleMatchWeight = config.ranking.weights.titleMatch;
|
|
17903
17454
|
return hits.map((hit) => {
|
|
17904
17455
|
let score = Number.isFinite(hit.score) ? hit.score : Number.NEGATIVE_INFINITY;
|
|
17905
17456
|
if (config.ranking.enableIncomingLinkBoost) {
|
|
@@ -17910,6 +17461,12 @@ function rankHits(hits, config) {
|
|
|
17910
17461
|
const depthBoost = 1 / (1 + nonNegativeOrZero(hit.metadata.depth));
|
|
17911
17462
|
score += depthBoost * config.ranking.weights.depth;
|
|
17912
17463
|
}
|
|
17464
|
+
if (normalizedQuery && titleMatchWeight > 0) {
|
|
17465
|
+
const normalizedTitle = normalizeForTitleMatch(hit.metadata.title);
|
|
17466
|
+
if (normalizedQuery.length > 0 && normalizedTitle.length > 0 && (normalizedTitle.includes(normalizedQuery) || normalizedQuery.includes(normalizedTitle))) {
|
|
17467
|
+
score += titleMatchWeight;
|
|
17468
|
+
}
|
|
17469
|
+
}
|
|
17913
17470
|
return {
|
|
17914
17471
|
hit,
|
|
17915
17472
|
finalScore: Number.isFinite(score) ? score : Number.NEGATIVE_INFINITY
|
|
@@ -17919,6 +17476,30 @@ function rankHits(hits, config) {
|
|
|
17919
17476
|
return Number.isNaN(delta) ? 0 : delta;
|
|
17920
17477
|
});
|
|
17921
17478
|
}
|
|
17479
|
+
function trimByScoreGap(results, config) {
|
|
17480
|
+
if (results.length === 0) return results;
|
|
17481
|
+
const threshold = config.ranking.scoreGapThreshold;
|
|
17482
|
+
const minScore = config.ranking.minScore;
|
|
17483
|
+
if (minScore > 0 && results.length > 0) {
|
|
17484
|
+
const sortedScores = results.map((r) => r.pageScore).sort((a, b) => a - b);
|
|
17485
|
+
const mid = Math.floor(sortedScores.length / 2);
|
|
17486
|
+
const median = sortedScores.length % 2 === 0 ? (sortedScores[mid - 1] + sortedScores[mid]) / 2 : sortedScores[mid];
|
|
17487
|
+
if (median < minScore) return [];
|
|
17488
|
+
}
|
|
17489
|
+
if (threshold > 0 && results.length > 1) {
|
|
17490
|
+
for (let i = 1; i < results.length; i++) {
|
|
17491
|
+
const prev = results[i - 1].pageScore;
|
|
17492
|
+
const current = results[i].pageScore;
|
|
17493
|
+
if (prev > 0) {
|
|
17494
|
+
const gap = (prev - current) / prev;
|
|
17495
|
+
if (gap >= threshold) {
|
|
17496
|
+
return results.slice(0, i);
|
|
17497
|
+
}
|
|
17498
|
+
}
|
|
17499
|
+
}
|
|
17500
|
+
}
|
|
17501
|
+
return results;
|
|
17502
|
+
}
|
|
17922
17503
|
function findPageWeight(url, pageWeights) {
|
|
17923
17504
|
let bestPattern = "";
|
|
17924
17505
|
let bestWeight = 1;
|
|
@@ -17973,6 +17554,61 @@ function aggregateByPage(ranked, config) {
|
|
|
17973
17554
|
return Number.isNaN(delta) ? 0 : delta;
|
|
17974
17555
|
});
|
|
17975
17556
|
}
|
|
17557
|
+
function mergePageAndChunkResults(pageHits, rankedChunks, config) {
|
|
17558
|
+
if (pageHits.length === 0) return rankedChunks;
|
|
17559
|
+
const w = config.search.pageSearchWeight;
|
|
17560
|
+
const pageScoreMap = /* @__PURE__ */ new Map();
|
|
17561
|
+
for (const ph of pageHits) {
|
|
17562
|
+
pageScoreMap.set(ph.url, ph);
|
|
17563
|
+
}
|
|
17564
|
+
const pagesWithChunks = /* @__PURE__ */ new Set();
|
|
17565
|
+
const merged = rankedChunks.map((ranked) => {
|
|
17566
|
+
const url = ranked.hit.metadata.url;
|
|
17567
|
+
const pageHit = pageScoreMap.get(url);
|
|
17568
|
+
if (pageHit) {
|
|
17569
|
+
pagesWithChunks.add(url);
|
|
17570
|
+
const blended = (1 - w) * ranked.finalScore + w * pageHit.score;
|
|
17571
|
+
return {
|
|
17572
|
+
hit: ranked.hit,
|
|
17573
|
+
finalScore: Number.isFinite(blended) ? blended : ranked.finalScore
|
|
17574
|
+
};
|
|
17575
|
+
}
|
|
17576
|
+
return ranked;
|
|
17577
|
+
});
|
|
17578
|
+
for (const [url, pageHit] of pageScoreMap) {
|
|
17579
|
+
if (pagesWithChunks.has(url)) continue;
|
|
17580
|
+
const syntheticScore = pageHit.score * w;
|
|
17581
|
+
const syntheticHit = {
|
|
17582
|
+
id: `page:${url}`,
|
|
17583
|
+
score: pageHit.score,
|
|
17584
|
+
metadata: {
|
|
17585
|
+
projectId: "",
|
|
17586
|
+
scopeName: "",
|
|
17587
|
+
url: pageHit.url,
|
|
17588
|
+
path: pageHit.url,
|
|
17589
|
+
title: pageHit.title,
|
|
17590
|
+
sectionTitle: "",
|
|
17591
|
+
headingPath: [],
|
|
17592
|
+
snippet: pageHit.description || pageHit.title,
|
|
17593
|
+
chunkText: pageHit.description || pageHit.title,
|
|
17594
|
+
ordinal: 0,
|
|
17595
|
+
contentHash: "",
|
|
17596
|
+
depth: pageHit.depth,
|
|
17597
|
+
incomingLinks: pageHit.incomingLinks,
|
|
17598
|
+
routeFile: pageHit.routeFile,
|
|
17599
|
+
tags: pageHit.tags
|
|
17600
|
+
}
|
|
17601
|
+
};
|
|
17602
|
+
merged.push({
|
|
17603
|
+
hit: syntheticHit,
|
|
17604
|
+
finalScore: Number.isFinite(syntheticScore) ? syntheticScore : 0
|
|
17605
|
+
});
|
|
17606
|
+
}
|
|
17607
|
+
return merged.sort((a, b) => {
|
|
17608
|
+
const delta = b.finalScore - a.finalScore;
|
|
17609
|
+
return Number.isNaN(delta) ? 0 : delta;
|
|
17610
|
+
});
|
|
17611
|
+
}
|
|
17976
17612
|
|
|
17977
17613
|
// src/search/engine.ts
|
|
17978
17614
|
var requestSchema = z.object({
|
|
@@ -17981,35 +17617,25 @@ var requestSchema = z.object({
|
|
|
17981
17617
|
scope: z.string().optional(),
|
|
17982
17618
|
pathPrefix: z.string().optional(),
|
|
17983
17619
|
tags: z.array(z.string()).optional(),
|
|
17984
|
-
|
|
17985
|
-
groupBy: z.enum(["page", "chunk"]).optional(),
|
|
17986
|
-
stream: z.boolean().optional()
|
|
17620
|
+
groupBy: z.enum(["page", "chunk"]).optional()
|
|
17987
17621
|
});
|
|
17988
17622
|
var SearchEngine = class _SearchEngine {
|
|
17989
17623
|
cwd;
|
|
17990
17624
|
config;
|
|
17991
|
-
|
|
17992
|
-
vectorStore;
|
|
17993
|
-
reranker;
|
|
17625
|
+
store;
|
|
17994
17626
|
constructor(options) {
|
|
17995
17627
|
this.cwd = options.cwd;
|
|
17996
17628
|
this.config = options.config;
|
|
17997
|
-
this.
|
|
17998
|
-
this.vectorStore = options.vectorStore;
|
|
17999
|
-
this.reranker = options.reranker;
|
|
17629
|
+
this.store = options.store;
|
|
18000
17630
|
}
|
|
18001
17631
|
static async create(options = {}) {
|
|
18002
17632
|
const cwd = path.resolve(options.cwd ?? process.cwd());
|
|
18003
17633
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
18004
|
-
const
|
|
18005
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
18006
|
-
const reranker = options.reranker === void 0 ? createReranker(config) : options.reranker;
|
|
17634
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
18007
17635
|
return new _SearchEngine({
|
|
18008
17636
|
cwd,
|
|
18009
17637
|
config,
|
|
18010
|
-
|
|
18011
|
-
vectorStore,
|
|
18012
|
-
reranker
|
|
17638
|
+
store
|
|
18013
17639
|
});
|
|
18014
17640
|
}
|
|
18015
17641
|
getConfig() {
|
|
@@ -18023,142 +17649,90 @@ var SearchEngine = class _SearchEngine {
|
|
|
18023
17649
|
const input = parsed.data;
|
|
18024
17650
|
const totalStart = process.hrtime.bigint();
|
|
18025
17651
|
const resolvedScope = resolveScope(this.config, input.scope);
|
|
18026
|
-
await this.assertModelCompatibility(resolvedScope);
|
|
18027
17652
|
const topK = input.topK ?? 10;
|
|
18028
|
-
const wantsRerank = Boolean(input.rerank);
|
|
18029
17653
|
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
18030
17654
|
const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
|
|
18031
|
-
const
|
|
18032
|
-
|
|
18033
|
-
|
|
18034
|
-
|
|
18035
|
-
|
|
18036
|
-
|
|
18037
|
-
|
|
18038
|
-
|
|
18039
|
-
|
|
18040
|
-
|
|
18041
|
-
|
|
18042
|
-
|
|
18043
|
-
|
|
18044
|
-
|
|
18045
|
-
|
|
18046
|
-
|
|
18047
|
-
|
|
18048
|
-
|
|
18049
|
-
|
|
18050
|
-
|
|
18051
|
-
|
|
18052
|
-
|
|
18053
|
-
|
|
18054
|
-
|
|
18055
|
-
|
|
18056
|
-
|
|
18057
|
-
|
|
17655
|
+
const filterParts = [];
|
|
17656
|
+
if (input.pathPrefix) {
|
|
17657
|
+
const prefix = input.pathPrefix.startsWith("/") ? input.pathPrefix : `/${input.pathPrefix}`;
|
|
17658
|
+
filterParts.push(`url GLOB '${prefix}*'`);
|
|
17659
|
+
}
|
|
17660
|
+
if (input.tags && input.tags.length > 0) {
|
|
17661
|
+
for (const tag of input.tags) {
|
|
17662
|
+
filterParts.push(`tags GLOB '*${tag}*'`);
|
|
17663
|
+
}
|
|
17664
|
+
}
|
|
17665
|
+
const filter = filterParts.length > 0 ? filterParts.join(" AND ") : void 0;
|
|
17666
|
+
const useDualSearch = this.config.search.dualSearch && groupByPage;
|
|
17667
|
+
const searchStart = process.hrtime.bigint();
|
|
17668
|
+
let ranked;
|
|
17669
|
+
if (useDualSearch) {
|
|
17670
|
+
const chunkLimit = Math.max(topK * 10, 100);
|
|
17671
|
+
const pageLimit = 20;
|
|
17672
|
+
const [pageHits, chunkHits] = await Promise.all([
|
|
17673
|
+
this.store.searchPages(
|
|
17674
|
+
input.q,
|
|
17675
|
+
{
|
|
17676
|
+
limit: pageLimit,
|
|
17677
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17678
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17679
|
+
filter
|
|
17680
|
+
},
|
|
17681
|
+
resolvedScope
|
|
17682
|
+
),
|
|
17683
|
+
this.store.search(
|
|
17684
|
+
input.q,
|
|
17685
|
+
{
|
|
17686
|
+
limit: chunkLimit,
|
|
17687
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17688
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17689
|
+
reranking: false,
|
|
17690
|
+
filter
|
|
17691
|
+
},
|
|
17692
|
+
resolvedScope
|
|
17693
|
+
)
|
|
17694
|
+
]);
|
|
17695
|
+
const rankedChunks = rankHits(chunkHits, this.config, input.q);
|
|
17696
|
+
ranked = mergePageAndChunkResults(pageHits, rankedChunks, this.config);
|
|
17697
|
+
} else {
|
|
17698
|
+
const hits = await this.store.search(
|
|
17699
|
+
input.q,
|
|
17700
|
+
{
|
|
17701
|
+
limit: candidateK,
|
|
17702
|
+
semanticWeight: this.config.search.semanticWeight,
|
|
17703
|
+
inputEnrichment: this.config.search.inputEnrichment,
|
|
17704
|
+
reranking: this.config.search.reranking,
|
|
17705
|
+
filter
|
|
17706
|
+
},
|
|
17707
|
+
resolvedScope
|
|
17708
|
+
);
|
|
17709
|
+
ranked = rankHits(hits, this.config, input.q);
|
|
18058
17710
|
}
|
|
18059
|
-
const
|
|
17711
|
+
const searchMs = hrTimeMs(searchStart);
|
|
17712
|
+
const results = this.buildResults(ranked, topK, groupByPage, input.q);
|
|
18060
17713
|
return {
|
|
18061
17714
|
q: input.q,
|
|
18062
17715
|
scope: resolvedScope.scopeName,
|
|
18063
17716
|
results,
|
|
18064
17717
|
meta: {
|
|
18065
17718
|
timingsMs: {
|
|
18066
|
-
|
|
18067
|
-
vector: Math.round(vectorMs),
|
|
18068
|
-
rerank: Math.round(rerankMs),
|
|
17719
|
+
search: Math.round(searchMs),
|
|
18069
17720
|
total: Math.round(hrTimeMs(totalStart))
|
|
18070
|
-
},
|
|
18071
|
-
usedRerank,
|
|
18072
|
-
modelId: this.config.embeddings.model
|
|
18073
|
-
}
|
|
18074
|
-
};
|
|
18075
|
-
}
|
|
18076
|
-
async *searchStreaming(request) {
|
|
18077
|
-
const parsed = requestSchema.safeParse(request);
|
|
18078
|
-
if (!parsed.success) {
|
|
18079
|
-
throw new SearchSocketError("INVALID_REQUEST", parsed.error.issues[0]?.message ?? "Invalid request", 400);
|
|
18080
|
-
}
|
|
18081
|
-
const input = parsed.data;
|
|
18082
|
-
const wantsRerank = Boolean(input.rerank);
|
|
18083
|
-
if (!wantsRerank) {
|
|
18084
|
-
const response = await this.search(request);
|
|
18085
|
-
yield { phase: "initial", data: response };
|
|
18086
|
-
return;
|
|
18087
|
-
}
|
|
18088
|
-
const totalStart = process.hrtime.bigint();
|
|
18089
|
-
const resolvedScope = resolveScope(this.config, input.scope);
|
|
18090
|
-
await this.assertModelCompatibility(resolvedScope);
|
|
18091
|
-
const topK = input.topK ?? 10;
|
|
18092
|
-
const groupByPage = (input.groupBy ?? "page") === "page";
|
|
18093
|
-
const candidateK = groupByPage ? Math.max(topK * 10, 50) : Math.max(50, topK);
|
|
18094
|
-
const embedStart = process.hrtime.bigint();
|
|
18095
|
-
const queryEmbeddings = await this.embeddings.embedTexts([input.q], this.config.embeddings.model, "retrieval.query");
|
|
18096
|
-
const queryVector = queryEmbeddings[0];
|
|
18097
|
-
if (!queryVector || queryVector.length === 0 || queryVector.some((value) => !Number.isFinite(value))) {
|
|
18098
|
-
throw new SearchSocketError("VECTOR_BACKEND_UNAVAILABLE", "Unable to create query embedding.");
|
|
18099
|
-
}
|
|
18100
|
-
const embedMs = hrTimeMs(embedStart);
|
|
18101
|
-
const vectorStart = process.hrtime.bigint();
|
|
18102
|
-
const hits = await this.vectorStore.query(
|
|
18103
|
-
queryVector,
|
|
18104
|
-
{
|
|
18105
|
-
topK: candidateK,
|
|
18106
|
-
pathPrefix: input.pathPrefix,
|
|
18107
|
-
tags: input.tags
|
|
18108
|
-
},
|
|
18109
|
-
resolvedScope
|
|
18110
|
-
);
|
|
18111
|
-
const vectorMs = hrTimeMs(vectorStart);
|
|
18112
|
-
const ranked = rankHits(hits, this.config);
|
|
18113
|
-
const initialResults = this.buildResults(ranked, topK, groupByPage);
|
|
18114
|
-
yield {
|
|
18115
|
-
phase: "initial",
|
|
18116
|
-
data: {
|
|
18117
|
-
q: input.q,
|
|
18118
|
-
scope: resolvedScope.scopeName,
|
|
18119
|
-
results: initialResults,
|
|
18120
|
-
meta: {
|
|
18121
|
-
timingsMs: {
|
|
18122
|
-
embed: Math.round(embedMs),
|
|
18123
|
-
vector: Math.round(vectorMs),
|
|
18124
|
-
rerank: 0,
|
|
18125
|
-
total: Math.round(hrTimeMs(totalStart))
|
|
18126
|
-
},
|
|
18127
|
-
usedRerank: false,
|
|
18128
|
-
modelId: this.config.embeddings.model
|
|
18129
|
-
}
|
|
18130
|
-
}
|
|
18131
|
-
};
|
|
18132
|
-
const rerankStart = process.hrtime.bigint();
|
|
18133
|
-
const reranked = await this.rerankHits(input.q, ranked, topK);
|
|
18134
|
-
const rerankMs = hrTimeMs(rerankStart);
|
|
18135
|
-
const rerankedResults = this.buildResults(reranked, topK, groupByPage);
|
|
18136
|
-
yield {
|
|
18137
|
-
phase: "reranked",
|
|
18138
|
-
data: {
|
|
18139
|
-
q: input.q,
|
|
18140
|
-
scope: resolvedScope.scopeName,
|
|
18141
|
-
results: rerankedResults,
|
|
18142
|
-
meta: {
|
|
18143
|
-
timingsMs: {
|
|
18144
|
-
embed: Math.round(embedMs),
|
|
18145
|
-
vector: Math.round(vectorMs),
|
|
18146
|
-
rerank: Math.round(rerankMs),
|
|
18147
|
-
total: Math.round(hrTimeMs(totalStart))
|
|
18148
|
-
},
|
|
18149
|
-
usedRerank: true,
|
|
18150
|
-
modelId: this.config.embeddings.model
|
|
18151
17721
|
}
|
|
18152
17722
|
}
|
|
18153
17723
|
};
|
|
18154
17724
|
}
|
|
18155
|
-
|
|
18156
|
-
const
|
|
17725
|
+
ensureSnippet(hit) {
|
|
17726
|
+
const snippet = hit.hit.metadata.snippet;
|
|
17727
|
+
if (snippet && snippet.length >= 30) return snippet;
|
|
17728
|
+
const chunkText = hit.hit.metadata.chunkText;
|
|
17729
|
+
if (chunkText) return toSnippet(chunkText);
|
|
17730
|
+
return snippet || "";
|
|
17731
|
+
}
|
|
17732
|
+
buildResults(ordered, topK, groupByPage, _query) {
|
|
18157
17733
|
if (groupByPage) {
|
|
18158
17734
|
let pages = aggregateByPage(ordered, this.config);
|
|
18159
|
-
|
|
18160
|
-
pages = pages.filter((p) => p.pageScore >= minScore);
|
|
18161
|
-
}
|
|
17735
|
+
pages = trimByScoreGap(pages, this.config);
|
|
18162
17736
|
const minRatio = this.config.ranking.minChunkScoreRatio;
|
|
18163
17737
|
return pages.slice(0, topK).map((page) => {
|
|
18164
17738
|
const bestScore = page.bestChunk.finalScore;
|
|
@@ -18168,12 +17742,12 @@ var SearchEngine = class _SearchEngine {
|
|
|
18168
17742
|
url: page.url,
|
|
18169
17743
|
title: page.title,
|
|
18170
17744
|
sectionTitle: page.bestChunk.hit.metadata.sectionTitle || void 0,
|
|
18171
|
-
snippet: page.bestChunk
|
|
17745
|
+
snippet: this.ensureSnippet(page.bestChunk),
|
|
18172
17746
|
score: Number(page.pageScore.toFixed(6)),
|
|
18173
17747
|
routeFile: page.routeFile,
|
|
18174
17748
|
chunks: meaningful.length > 1 ? meaningful.map((c) => ({
|
|
18175
17749
|
sectionTitle: c.hit.metadata.sectionTitle || void 0,
|
|
18176
|
-
snippet: c
|
|
17750
|
+
snippet: this.ensureSnippet(c),
|
|
18177
17751
|
headingPath: c.hit.metadata.headingPath,
|
|
18178
17752
|
score: Number(c.finalScore.toFixed(6))
|
|
18179
17753
|
})) : void 0
|
|
@@ -18181,6 +17755,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18181
17755
|
});
|
|
18182
17756
|
} else {
|
|
18183
17757
|
let filtered = ordered;
|
|
17758
|
+
const minScore = this.config.ranking.minScore;
|
|
18184
17759
|
if (minScore > 0) {
|
|
18185
17760
|
filtered = ordered.filter((entry) => entry.finalScore >= minScore);
|
|
18186
17761
|
}
|
|
@@ -18188,7 +17763,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18188
17763
|
url: hit.metadata.url,
|
|
18189
17764
|
title: hit.metadata.title,
|
|
18190
17765
|
sectionTitle: hit.metadata.sectionTitle || void 0,
|
|
18191
|
-
snippet: hit
|
|
17766
|
+
snippet: this.ensureSnippet({ hit, finalScore }),
|
|
18192
17767
|
score: Number(finalScore.toFixed(6)),
|
|
18193
17768
|
routeFile: hit.metadata.routeFile
|
|
18194
17769
|
}));
|
|
@@ -18197,7 +17772,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18197
17772
|
async getPage(pathOrUrl, scope) {
|
|
18198
17773
|
const resolvedScope = resolveScope(this.config, scope);
|
|
18199
17774
|
const urlPath = this.resolveInputPath(pathOrUrl);
|
|
18200
|
-
const page = await this.
|
|
17775
|
+
const page = await this.store.getPage(urlPath, resolvedScope);
|
|
18201
17776
|
if (!page) {
|
|
18202
17777
|
throw new SearchSocketError("INVALID_REQUEST", `Indexed page not found for ${urlPath}`, 404);
|
|
18203
17778
|
}
|
|
@@ -18218,7 +17793,7 @@ var SearchEngine = class _SearchEngine {
|
|
|
18218
17793
|
};
|
|
18219
17794
|
}
|
|
18220
17795
|
async health() {
|
|
18221
|
-
return this.
|
|
17796
|
+
return this.store.health();
|
|
18222
17797
|
}
|
|
18223
17798
|
resolveInputPath(pathOrUrl) {
|
|
18224
17799
|
try {
|
|
@@ -18230,90 +17805,6 @@ var SearchEngine = class _SearchEngine {
|
|
|
18230
17805
|
const withoutQueryOrHash = pathOrUrl.split(/[?#]/)[0] ?? pathOrUrl;
|
|
18231
17806
|
return normalizeUrlPath(withoutQueryOrHash);
|
|
18232
17807
|
}
|
|
18233
|
-
async assertModelCompatibility(scope) {
|
|
18234
|
-
const modelId = await this.vectorStore.getScopeModelId(scope);
|
|
18235
|
-
if (modelId && modelId !== this.config.embeddings.model) {
|
|
18236
|
-
throw new SearchSocketError(
|
|
18237
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
18238
|
-
`Scope ${scope.scopeName} was indexed with ${modelId}. Current config uses ${this.config.embeddings.model}. Re-index with --force.`
|
|
18239
|
-
);
|
|
18240
|
-
}
|
|
18241
|
-
}
|
|
18242
|
-
async rerankHits(query, ranked, topK) {
|
|
18243
|
-
if (!this.config.rerank.enabled) {
|
|
18244
|
-
throw new SearchSocketError(
|
|
18245
|
-
"INVALID_REQUEST",
|
|
18246
|
-
"rerank=true requested but rerank.enabled is not set to true.",
|
|
18247
|
-
400
|
|
18248
|
-
);
|
|
18249
|
-
}
|
|
18250
|
-
if (!this.reranker) {
|
|
18251
|
-
throw new SearchSocketError(
|
|
18252
|
-
"CONFIG_MISSING",
|
|
18253
|
-
`rerank=true requested but ${this.config.embeddings.apiKeyEnv} is not set.`,
|
|
18254
|
-
400
|
|
18255
|
-
);
|
|
18256
|
-
}
|
|
18257
|
-
const pageGroups = /* @__PURE__ */ new Map();
|
|
18258
|
-
for (const entry of ranked) {
|
|
18259
|
-
const url = entry.hit.metadata.url;
|
|
18260
|
-
const group = pageGroups.get(url);
|
|
18261
|
-
if (group) group.push(entry);
|
|
18262
|
-
else pageGroups.set(url, [entry]);
|
|
18263
|
-
}
|
|
18264
|
-
const MAX_CHUNKS_PER_PAGE = 5;
|
|
18265
|
-
const MIN_CHUNKS_PER_PAGE = 1;
|
|
18266
|
-
const MIN_CHUNK_SCORE_RATIO = 0.5;
|
|
18267
|
-
const MAX_DOC_CHARS = 2e3;
|
|
18268
|
-
const pageCandidates = [];
|
|
18269
|
-
for (const [url, chunks] of pageGroups) {
|
|
18270
|
-
const byScore = [...chunks].sort((a, b) => b.finalScore - a.finalScore);
|
|
18271
|
-
const bestScore = byScore[0].finalScore;
|
|
18272
|
-
const scoreFloor = Number.isFinite(bestScore) ? bestScore * MIN_CHUNK_SCORE_RATIO : Number.NEGATIVE_INFINITY;
|
|
18273
|
-
const selected = byScore.filter(
|
|
18274
|
-
(c, i) => i < MIN_CHUNKS_PER_PAGE || c.finalScore >= scoreFloor
|
|
18275
|
-
).slice(0, MAX_CHUNKS_PER_PAGE);
|
|
18276
|
-
selected.sort((a, b) => (a.hit.metadata.ordinal ?? 0) - (b.hit.metadata.ordinal ?? 0));
|
|
18277
|
-
const first = selected[0].hit.metadata;
|
|
18278
|
-
const parts = [first.title];
|
|
18279
|
-
if (first.description) {
|
|
18280
|
-
parts.push(first.description);
|
|
18281
|
-
}
|
|
18282
|
-
if (first.keywords && first.keywords.length > 0) {
|
|
18283
|
-
parts.push(first.keywords.join(", "));
|
|
18284
|
-
}
|
|
18285
|
-
const body = selected.map((c) => c.hit.metadata.chunkText || c.hit.metadata.snippet).join("\n\n");
|
|
18286
|
-
parts.push(body);
|
|
18287
|
-
let text = parts.join("\n\n");
|
|
18288
|
-
if (text.length > MAX_DOC_CHARS) {
|
|
18289
|
-
text = text.slice(0, MAX_DOC_CHARS);
|
|
18290
|
-
}
|
|
18291
|
-
pageCandidates.push({ id: url, text });
|
|
18292
|
-
}
|
|
18293
|
-
const maxCandidates = Math.max(topK, this.config.rerank.topN);
|
|
18294
|
-
const cappedCandidates = pageCandidates.slice(0, maxCandidates);
|
|
18295
|
-
const reranked = await this.reranker.rerank(
|
|
18296
|
-
query,
|
|
18297
|
-
cappedCandidates,
|
|
18298
|
-
maxCandidates
|
|
18299
|
-
);
|
|
18300
|
-
const scoreByUrl = new Map(reranked.map((e) => [e.id, e.score]));
|
|
18301
|
-
return ranked.map((entry) => {
|
|
18302
|
-
const pageScore = scoreByUrl.get(entry.hit.metadata.url);
|
|
18303
|
-
const base = Number.isFinite(entry.finalScore) ? entry.finalScore : Number.NEGATIVE_INFINITY;
|
|
18304
|
-
if (pageScore === void 0 || !Number.isFinite(pageScore)) {
|
|
18305
|
-
return { ...entry, finalScore: base };
|
|
18306
|
-
}
|
|
18307
|
-
const combined = pageScore * this.config.ranking.weights.rerank + base * 1e-3;
|
|
18308
|
-
return {
|
|
18309
|
-
...entry,
|
|
18310
|
-
finalScore: Number.isFinite(combined) ? combined : base
|
|
18311
|
-
};
|
|
18312
|
-
}).sort((a, b) => {
|
|
18313
|
-
const delta = b.finalScore - a.finalScore;
|
|
18314
|
-
return Number.isNaN(delta) ? 0 : delta;
|
|
18315
|
-
});
|
|
18316
|
-
}
|
|
18317
17808
|
};
|
|
18318
17809
|
|
|
18319
17810
|
// src/sveltekit/handle.ts
|
|
@@ -18468,42 +17959,6 @@ function searchsocketHandle(options = {}) {
|
|
|
18468
17959
|
}
|
|
18469
17960
|
const engine = await getEngine();
|
|
18470
17961
|
const searchRequest = body;
|
|
18471
|
-
if (searchRequest.stream && searchRequest.rerank) {
|
|
18472
|
-
const encoder = new TextEncoder();
|
|
18473
|
-
const stream = new ReadableStream({
|
|
18474
|
-
async start(controller) {
|
|
18475
|
-
try {
|
|
18476
|
-
for await (const event2 of engine.searchStreaming(searchRequest)) {
|
|
18477
|
-
const line = JSON.stringify(event2) + "\n";
|
|
18478
|
-
controller.enqueue(encoder.encode(line));
|
|
18479
|
-
}
|
|
18480
|
-
} catch (streamError) {
|
|
18481
|
-
const errorEvent = {
|
|
18482
|
-
phase: "error",
|
|
18483
|
-
data: {
|
|
18484
|
-
error: {
|
|
18485
|
-
code: streamError instanceof SearchSocketError ? streamError.code : "INTERNAL_ERROR",
|
|
18486
|
-
message: streamError instanceof Error ? streamError.message : "Unknown error"
|
|
18487
|
-
}
|
|
18488
|
-
}
|
|
18489
|
-
};
|
|
18490
|
-
controller.enqueue(encoder.encode(JSON.stringify(errorEvent) + "\n"));
|
|
18491
|
-
} finally {
|
|
18492
|
-
controller.close();
|
|
18493
|
-
}
|
|
18494
|
-
}
|
|
18495
|
-
});
|
|
18496
|
-
return withCors(
|
|
18497
|
-
new Response(stream, {
|
|
18498
|
-
status: 200,
|
|
18499
|
-
headers: {
|
|
18500
|
-
"content-type": "application/x-ndjson"
|
|
18501
|
-
}
|
|
18502
|
-
}),
|
|
18503
|
-
event.request,
|
|
18504
|
-
config
|
|
18505
|
-
);
|
|
18506
|
-
}
|
|
18507
17962
|
const result = await engine.search(searchRequest);
|
|
18508
17963
|
return withCors(
|
|
18509
17964
|
new Response(JSON.stringify(result), {
|
|
@@ -18564,9 +18019,8 @@ function withCors(response, request, config) {
|
|
|
18564
18019
|
}
|
|
18565
18020
|
function ensureStateDirs(cwd, stateDir, scope) {
|
|
18566
18021
|
const statePath = path.resolve(cwd, stateDir);
|
|
18567
|
-
|
|
18568
|
-
|
|
18569
|
-
return { statePath, pagesPath };
|
|
18022
|
+
fs.mkdirSync(statePath, { recursive: true });
|
|
18023
|
+
return { statePath };
|
|
18570
18024
|
}
|
|
18571
18025
|
function sha1(input) {
|
|
18572
18026
|
return createHash("sha1").update(input).digest("hex");
|
|
@@ -18816,7 +18270,7 @@ function buildEmbeddingText(chunk, prependTitle) {
|
|
|
18816
18270
|
|
|
18817
18271
|
${chunk.chunkText}`;
|
|
18818
18272
|
}
|
|
18819
|
-
function
|
|
18273
|
+
function chunkPage(page, config, scope) {
|
|
18820
18274
|
const sections = parseHeadingSections(page.markdown, config.chunking.headingPathDepth);
|
|
18821
18275
|
const rawChunks = sections.flatMap((section) => splitSection(section, config.chunking));
|
|
18822
18276
|
const chunks = [];
|
|
@@ -19847,53 +19301,6 @@ function extractFromMarkdown(url, markdown, title) {
|
|
|
19847
19301
|
weight: mdWeight
|
|
19848
19302
|
};
|
|
19849
19303
|
}
|
|
19850
|
-
function yamlString(value) {
|
|
19851
|
-
return JSON.stringify(value);
|
|
19852
|
-
}
|
|
19853
|
-
function yamlArray(values) {
|
|
19854
|
-
return `[${values.map((v) => JSON.stringify(v)).join(", ")}]`;
|
|
19855
|
-
}
|
|
19856
|
-
function buildMirrorMarkdown(page) {
|
|
19857
|
-
const frontmatterLines = [
|
|
19858
|
-
"---",
|
|
19859
|
-
`url: ${yamlString(page.url)}`,
|
|
19860
|
-
`title: ${yamlString(page.title)}`,
|
|
19861
|
-
`scope: ${yamlString(page.scope)}`,
|
|
19862
|
-
`routeFile: ${yamlString(page.routeFile)}`,
|
|
19863
|
-
`routeResolution: ${yamlString(page.routeResolution)}`,
|
|
19864
|
-
`generatedAt: ${yamlString(page.generatedAt)}`,
|
|
19865
|
-
`incomingLinks: ${page.incomingLinks}`,
|
|
19866
|
-
`outgoingLinks: ${page.outgoingLinks}`,
|
|
19867
|
-
`depth: ${page.depth}`,
|
|
19868
|
-
`tags: ${yamlArray(page.tags)}`,
|
|
19869
|
-
"---",
|
|
19870
|
-
""
|
|
19871
|
-
];
|
|
19872
|
-
return `${frontmatterLines.join("\n")}${normalizeMarkdown(page.markdown)}`;
|
|
19873
|
-
}
|
|
19874
|
-
function stripGeneratedAt(content) {
|
|
19875
|
-
return content.replace(/^generatedAt: .*$/m, "");
|
|
19876
|
-
}
|
|
19877
|
-
async function writeMirrorPage(statePath, scope, page) {
|
|
19878
|
-
const relative = urlPathToMirrorRelative(page.url);
|
|
19879
|
-
const outputPath = path.join(statePath, "pages", scope.scopeName, relative);
|
|
19880
|
-
await fs4.mkdir(path.dirname(outputPath), { recursive: true });
|
|
19881
|
-
const newContent = buildMirrorMarkdown(page);
|
|
19882
|
-
try {
|
|
19883
|
-
const existing = await fs4.readFile(outputPath, "utf8");
|
|
19884
|
-
if (stripGeneratedAt(existing) === stripGeneratedAt(newContent)) {
|
|
19885
|
-
return outputPath;
|
|
19886
|
-
}
|
|
19887
|
-
} catch {
|
|
19888
|
-
}
|
|
19889
|
-
await fs4.writeFile(outputPath, newContent, "utf8");
|
|
19890
|
-
return outputPath;
|
|
19891
|
-
}
|
|
19892
|
-
async function cleanMirrorForScope(statePath, scope) {
|
|
19893
|
-
const target = path.join(statePath, "pages", scope.scopeName);
|
|
19894
|
-
await fs4.rm(target, { recursive: true, force: true });
|
|
19895
|
-
await fs4.mkdir(target, { recursive: true });
|
|
19896
|
-
}
|
|
19897
19304
|
function segmentToRegex(segment) {
|
|
19898
19305
|
if (segment.startsWith("(") && segment.endsWith(")")) {
|
|
19899
19306
|
return { regex: "", score: 0 };
|
|
@@ -20054,7 +19461,7 @@ async function parseManifest(cwd, outputDir) {
|
|
|
20054
19461
|
const manifestPath = path.resolve(cwd, outputDir, "server", "manifest-full.js");
|
|
20055
19462
|
let content;
|
|
20056
19463
|
try {
|
|
20057
|
-
content = await
|
|
19464
|
+
content = await fs3.readFile(manifestPath, "utf8");
|
|
20058
19465
|
} catch {
|
|
20059
19466
|
throw new SearchSocketError(
|
|
20060
19467
|
"BUILD_MANIFEST_NOT_FOUND",
|
|
@@ -20227,7 +19634,7 @@ async function discoverPages(server, buildConfig, pipelineMaxPages) {
|
|
|
20227
19634
|
const visited = /* @__PURE__ */ new Set();
|
|
20228
19635
|
const pages = [];
|
|
20229
19636
|
const queue = [];
|
|
20230
|
-
const limit =
|
|
19637
|
+
const limit = pLimit(8);
|
|
20231
19638
|
for (const seed of seedUrls) {
|
|
20232
19639
|
const normalized = normalizeUrlPath(seed);
|
|
20233
19640
|
if (!visited.has(normalized) && !isExcluded(normalized, exclude)) {
|
|
@@ -20309,7 +19716,7 @@ async function loadBuildPages(cwd, config, maxPages) {
|
|
|
20309
19716
|
const selected = typeof maxCount === "number" ? expanded.slice(0, maxCount) : expanded;
|
|
20310
19717
|
const server = await startPreviewServer(cwd, { previewTimeout: buildConfig.previewTimeout }, logger);
|
|
20311
19718
|
try {
|
|
20312
|
-
const concurrencyLimit =
|
|
19719
|
+
const concurrencyLimit = pLimit(8);
|
|
20313
19720
|
const results = await Promise.allSettled(
|
|
20314
19721
|
selected.map(
|
|
20315
19722
|
(route) => concurrencyLimit(async () => {
|
|
@@ -20383,7 +19790,7 @@ async function loadContentFilesPages(cwd, config, maxPages) {
|
|
|
20383
19790
|
const selected = typeof limit === "number" ? files.slice(0, limit) : files;
|
|
20384
19791
|
const pages = [];
|
|
20385
19792
|
for (const filePath of selected) {
|
|
20386
|
-
const raw = await
|
|
19793
|
+
const raw = await fs3.readFile(filePath, "utf8");
|
|
20387
19794
|
const markdown = filePath.endsWith(".md") ? raw : normalizeSvelteToMarkdown(raw);
|
|
20388
19795
|
pages.push({
|
|
20389
19796
|
url: filePathToUrl(filePath, baseDir),
|
|
@@ -20478,7 +19885,7 @@ async function loadCrawledPages(config, maxPages) {
|
|
|
20478
19885
|
const routes = await resolveRoutes(config);
|
|
20479
19886
|
const maxCount = typeof maxPages === "number" ? Math.max(0, Math.floor(maxPages)) : void 0;
|
|
20480
19887
|
const selected = typeof maxCount === "number" ? routes.slice(0, maxCount) : routes;
|
|
20481
|
-
const concurrencyLimit =
|
|
19888
|
+
const concurrencyLimit = pLimit(8);
|
|
20482
19889
|
const results = await Promise.allSettled(
|
|
20483
19890
|
selected.map(
|
|
20484
19891
|
(route) => concurrencyLimit(async () => {
|
|
@@ -20519,7 +19926,7 @@ async function loadStaticOutputPages(cwd, config, maxPages) {
|
|
|
20519
19926
|
const selected = typeof limit === "number" ? htmlFiles.slice(0, limit) : htmlFiles;
|
|
20520
19927
|
const pages = [];
|
|
20521
19928
|
for (const filePath of selected) {
|
|
20522
|
-
const html = await
|
|
19929
|
+
const html = await fs3.readFile(filePath, "utf8");
|
|
20523
19930
|
pages.push({
|
|
20524
19931
|
url: staticHtmlFileToUrl(filePath, outputDir),
|
|
20525
19932
|
html,
|
|
@@ -20582,7 +19989,7 @@ function isBlockedByRobots(urlPath, rules3) {
|
|
|
20582
19989
|
}
|
|
20583
19990
|
async function loadRobotsTxtFromDir(dir) {
|
|
20584
19991
|
try {
|
|
20585
|
-
const content = await
|
|
19992
|
+
const content = await fs3.readFile(path.join(dir, "robots.txt"), "utf8");
|
|
20586
19993
|
return parseRobotsTxt(content);
|
|
20587
19994
|
} catch {
|
|
20588
19995
|
return null;
|
|
@@ -20601,34 +20008,41 @@ async function fetchRobotsTxt(baseUrl) {
|
|
|
20601
20008
|
}
|
|
20602
20009
|
|
|
20603
20010
|
// src/indexing/pipeline.ts
|
|
20604
|
-
|
|
20605
|
-
|
|
20606
|
-
|
|
20607
|
-
|
|
20608
|
-
|
|
20011
|
+
function buildPageSummary(page, maxChars = 3500) {
|
|
20012
|
+
const parts = [page.title];
|
|
20013
|
+
if (page.description) {
|
|
20014
|
+
parts.push(page.description);
|
|
20015
|
+
}
|
|
20016
|
+
if (page.keywords && page.keywords.length > 0) {
|
|
20017
|
+
parts.push(page.keywords.join(", "));
|
|
20018
|
+
}
|
|
20019
|
+
const plainBody = page.markdown.replace(/```[\s\S]*?```/g, " ").replace(/`([^`]+)`/g, "$1").replace(/!?\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/^#{1,6}\s+/gm, "").replace(/[>*_|~\-]/g, " ").replace(/\s+/g, " ").trim();
|
|
20020
|
+
if (plainBody) {
|
|
20021
|
+
parts.push(plainBody);
|
|
20022
|
+
}
|
|
20023
|
+
const joined = parts.join("\n\n");
|
|
20024
|
+
if (joined.length <= maxChars) return joined;
|
|
20025
|
+
return joined.slice(0, maxChars).trim();
|
|
20026
|
+
}
|
|
20609
20027
|
var IndexPipeline = class _IndexPipeline {
|
|
20610
20028
|
cwd;
|
|
20611
20029
|
config;
|
|
20612
|
-
|
|
20613
|
-
vectorStore;
|
|
20030
|
+
store;
|
|
20614
20031
|
logger;
|
|
20615
20032
|
constructor(options) {
|
|
20616
20033
|
this.cwd = options.cwd;
|
|
20617
20034
|
this.config = options.config;
|
|
20618
|
-
this.
|
|
20619
|
-
this.vectorStore = options.vectorStore;
|
|
20035
|
+
this.store = options.store;
|
|
20620
20036
|
this.logger = options.logger;
|
|
20621
20037
|
}
|
|
20622
20038
|
static async create(options = {}) {
|
|
20623
20039
|
const cwd = path.resolve(options.cwd ?? process.cwd());
|
|
20624
20040
|
const config = options.config ?? await loadConfig({ cwd, configPath: options.configPath });
|
|
20625
|
-
const
|
|
20626
|
-
const vectorStore = options.vectorStore ?? await createVectorStore(config, cwd);
|
|
20041
|
+
const store = options.store ?? await createUpstashStore(config);
|
|
20627
20042
|
return new _IndexPipeline({
|
|
20628
20043
|
cwd,
|
|
20629
20044
|
config,
|
|
20630
|
-
|
|
20631
|
-
vectorStore,
|
|
20045
|
+
store,
|
|
20632
20046
|
logger: options.logger ?? new Logger()
|
|
20633
20047
|
});
|
|
20634
20048
|
}
|
|
@@ -20648,25 +20062,17 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20648
20062
|
stageTimingsMs[name] = Math.round(hrTimeMs(start));
|
|
20649
20063
|
};
|
|
20650
20064
|
const scope = resolveScope(this.config, options.scopeOverride);
|
|
20651
|
-
|
|
20065
|
+
ensureStateDirs(this.cwd, this.config.state.dir);
|
|
20652
20066
|
const sourceMode = options.sourceOverride ?? this.config.source.mode;
|
|
20653
|
-
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode},
|
|
20067
|
+
this.logger.info(`Indexing scope "${scope.scopeName}" (source: ${sourceMode}, backend: upstash-search)`);
|
|
20654
20068
|
if (options.force) {
|
|
20655
20069
|
this.logger.info("Force mode enabled \u2014 full rebuild");
|
|
20656
|
-
await cleanMirrorForScope(statePath, scope);
|
|
20657
20070
|
}
|
|
20658
20071
|
if (options.dryRun) {
|
|
20659
20072
|
this.logger.info("Dry run \u2014 no writes will be performed");
|
|
20660
20073
|
}
|
|
20661
20074
|
const manifestStart = stageStart();
|
|
20662
|
-
const existingHashes = await this.
|
|
20663
|
-
const existingModelId = await this.vectorStore.getScopeModelId(scope);
|
|
20664
|
-
if (existingModelId && existingModelId !== this.config.embeddings.model && !options.force) {
|
|
20665
|
-
throw new SearchSocketError(
|
|
20666
|
-
"EMBEDDING_MODEL_MISMATCH",
|
|
20667
|
-
`Scope ${scope.scopeName} uses model ${existingModelId}. Re-run with --force to migrate.`
|
|
20668
|
-
);
|
|
20669
|
-
}
|
|
20075
|
+
const existingHashes = options.force ? /* @__PURE__ */ new Map() : await this.store.getContentHashes(scope);
|
|
20670
20076
|
stageEnd("manifest", manifestStart);
|
|
20671
20077
|
this.logger.debug(`Manifest: ${existingHashes.size} existing chunk hashes loaded`);
|
|
20672
20078
|
const sourceStart = stageStart();
|
|
@@ -20795,9 +20201,9 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20795
20201
|
}
|
|
20796
20202
|
stageEnd("links", linkStart);
|
|
20797
20203
|
this.logger.debug(`Link analysis: computed incoming links for ${incomingLinkCount.size} pages (${stageTimingsMs["links"]}ms)`);
|
|
20798
|
-
const
|
|
20799
|
-
this.logger.info("
|
|
20800
|
-
const
|
|
20204
|
+
const pagesStart = stageStart();
|
|
20205
|
+
this.logger.info("Building indexed pages...");
|
|
20206
|
+
const pages = [];
|
|
20801
20207
|
let routeExact = 0;
|
|
20802
20208
|
let routeBestEffort = 0;
|
|
20803
20209
|
const precomputedRoutes = /* @__PURE__ */ new Map();
|
|
@@ -20826,7 +20232,7 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20826
20232
|
} else {
|
|
20827
20233
|
routeExact += 1;
|
|
20828
20234
|
}
|
|
20829
|
-
const
|
|
20235
|
+
const indexedPage = {
|
|
20830
20236
|
url: page.url,
|
|
20831
20237
|
title: page.title,
|
|
20832
20238
|
scope: scope.scopeName,
|
|
@@ -20841,35 +20247,38 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20841
20247
|
description: page.description,
|
|
20842
20248
|
keywords: page.keywords
|
|
20843
20249
|
};
|
|
20844
|
-
|
|
20845
|
-
|
|
20846
|
-
await writeMirrorPage(statePath, scope, mirror);
|
|
20847
|
-
}
|
|
20848
|
-
this.logger.event("markdown_written", { url: page.url });
|
|
20250
|
+
pages.push(indexedPage);
|
|
20251
|
+
this.logger.event("page_indexed", { url: page.url });
|
|
20849
20252
|
}
|
|
20850
20253
|
if (!options.dryRun) {
|
|
20851
|
-
const pageRecords =
|
|
20852
|
-
|
|
20853
|
-
|
|
20854
|
-
|
|
20855
|
-
|
|
20856
|
-
|
|
20857
|
-
|
|
20858
|
-
|
|
20859
|
-
|
|
20860
|
-
|
|
20861
|
-
|
|
20862
|
-
|
|
20863
|
-
|
|
20864
|
-
|
|
20865
|
-
|
|
20866
|
-
|
|
20254
|
+
const pageRecords = pages.map((p) => {
|
|
20255
|
+
const summary = buildPageSummary(p);
|
|
20256
|
+
return {
|
|
20257
|
+
url: p.url,
|
|
20258
|
+
title: p.title,
|
|
20259
|
+
markdown: p.markdown,
|
|
20260
|
+
projectId: scope.projectId,
|
|
20261
|
+
scopeName: scope.scopeName,
|
|
20262
|
+
routeFile: p.routeFile,
|
|
20263
|
+
routeResolution: p.routeResolution,
|
|
20264
|
+
incomingLinks: p.incomingLinks,
|
|
20265
|
+
outgoingLinks: p.outgoingLinks,
|
|
20266
|
+
depth: p.depth,
|
|
20267
|
+
tags: p.tags,
|
|
20268
|
+
indexedAt: p.generatedAt,
|
|
20269
|
+
summary,
|
|
20270
|
+
description: p.description,
|
|
20271
|
+
keywords: p.keywords
|
|
20272
|
+
};
|
|
20273
|
+
});
|
|
20274
|
+
await this.store.deletePages(scope);
|
|
20275
|
+
await this.store.upsertPages(pageRecords, scope);
|
|
20867
20276
|
}
|
|
20868
|
-
stageEnd("
|
|
20869
|
-
this.logger.info(`
|
|
20277
|
+
stageEnd("pages", pagesStart);
|
|
20278
|
+
this.logger.info(`Indexed ${pages.length} page${pages.length === 1 ? "" : "s"} (${routeExact} exact, ${routeBestEffort} best-effort) (${stageTimingsMs["pages"]}ms)`);
|
|
20870
20279
|
const chunkStart = stageStart();
|
|
20871
20280
|
this.logger.info("Chunking pages...");
|
|
20872
|
-
let chunks =
|
|
20281
|
+
let chunks = pages.flatMap((page) => chunkPage(page, this.config, scope));
|
|
20873
20282
|
const maxChunks = typeof options.maxChunks === "number" ? Math.max(0, Math.floor(options.maxChunks)) : void 0;
|
|
20874
20283
|
if (typeof maxChunks === "number") {
|
|
20875
20284
|
chunks = chunks.slice(0, maxChunks);
|
|
@@ -20901,125 +20310,59 @@ var IndexPipeline = class _IndexPipeline {
|
|
|
20901
20310
|
});
|
|
20902
20311
|
const deletes = [...existingHashes.keys()].filter((chunkKey) => !currentChunkMap.has(chunkKey));
|
|
20903
20312
|
this.logger.info(`Changes detected: ${changedChunks.length} changed, ${deletes.length} deleted, ${chunks.length - changedChunks.length} unchanged`);
|
|
20904
|
-
const
|
|
20905
|
-
|
|
20906
|
-
for (const chunk of changedChunks) {
|
|
20907
|
-
chunkTokenEstimates.set(chunk.chunkKey, this.embeddings.estimateTokens(buildEmbeddingText(chunk, this.config.chunking.prependTitle)));
|
|
20908
|
-
}
|
|
20909
|
-
const estimatedTokens = changedChunks.reduce(
|
|
20910
|
-
(sum, chunk) => sum + (chunkTokenEstimates.get(chunk.chunkKey) ?? 0),
|
|
20911
|
-
0
|
|
20912
|
-
);
|
|
20913
|
-
const pricePer1k = this.config.embeddings.pricePer1kTokens ?? EMBEDDING_PRICE_PER_1K_TOKENS_USD[this.config.embeddings.model] ?? DEFAULT_EMBEDDING_PRICE_PER_1K;
|
|
20914
|
-
const estimatedCostUSD = estimatedTokens / 1e3 * pricePer1k;
|
|
20915
|
-
let newEmbeddings = 0;
|
|
20916
|
-
const vectorsByChunk = /* @__PURE__ */ new Map();
|
|
20313
|
+
const upsertStart = stageStart();
|
|
20314
|
+
let documentsUpserted = 0;
|
|
20917
20315
|
if (!options.dryRun && changedChunks.length > 0) {
|
|
20918
|
-
this.logger.info(`
|
|
20919
|
-
const
|
|
20920
|
-
|
|
20921
|
-
|
|
20922
|
-
|
|
20923
|
-
|
|
20924
|
-
|
|
20925
|
-
|
|
20926
|
-
|
|
20927
|
-
|
|
20928
|
-
);
|
|
20929
|
-
|
|
20930
|
-
for (let i = 0; i < changedChunks.length; i += 1) {
|
|
20931
|
-
const chunk = changedChunks[i];
|
|
20932
|
-
const embedding = embeddings[i];
|
|
20933
|
-
if (!chunk || !embedding || embedding.length === 0 || embedding.some((value) => !Number.isFinite(value))) {
|
|
20934
|
-
throw new SearchSocketError(
|
|
20935
|
-
"VECTOR_BACKEND_UNAVAILABLE",
|
|
20936
|
-
`Embedding provider returned an invalid vector for chunk index ${i}.`
|
|
20937
|
-
);
|
|
20938
|
-
}
|
|
20939
|
-
vectorsByChunk.set(chunk.chunkKey, embedding);
|
|
20940
|
-
newEmbeddings += 1;
|
|
20941
|
-
this.logger.event("embedded_new", { chunkKey: chunk.chunkKey });
|
|
20942
|
-
}
|
|
20943
|
-
}
|
|
20944
|
-
stageEnd("embedding", embedStart);
|
|
20945
|
-
if (changedChunks.length > 0) {
|
|
20946
|
-
this.logger.info(`Embedded ${newEmbeddings} chunk${newEmbeddings === 1 ? "" : "s"} (${stageTimingsMs["embedding"]}ms)`);
|
|
20947
|
-
} else {
|
|
20948
|
-
this.logger.info("No chunks to embed \u2014 all up to date");
|
|
20949
|
-
}
|
|
20950
|
-
const syncStart = stageStart();
|
|
20951
|
-
if (!options.dryRun) {
|
|
20952
|
-
this.logger.info("Syncing vectors...");
|
|
20953
|
-
const upserts = [];
|
|
20954
|
-
for (const chunk of changedChunks) {
|
|
20955
|
-
const vector = vectorsByChunk.get(chunk.chunkKey);
|
|
20956
|
-
if (!vector) {
|
|
20957
|
-
continue;
|
|
20958
|
-
}
|
|
20959
|
-
upserts.push({
|
|
20316
|
+
this.logger.info(`Upserting ${changedChunks.length} chunk${changedChunks.length === 1 ? "" : "s"} to Upstash Search...`);
|
|
20317
|
+
const UPSTASH_CONTENT_LIMIT = 4096;
|
|
20318
|
+
const docs = changedChunks.map((chunk) => {
|
|
20319
|
+
const title = chunk.title;
|
|
20320
|
+
const sectionTitle = chunk.sectionTitle ?? "";
|
|
20321
|
+
const url = chunk.url;
|
|
20322
|
+
const tags = chunk.tags.join(",");
|
|
20323
|
+
const headingPath = chunk.headingPath.join(" > ");
|
|
20324
|
+
const otherFieldsLen = title.length + sectionTitle.length + url.length + tags.length + headingPath.length;
|
|
20325
|
+
const textBudget = Math.max(500, UPSTASH_CONTENT_LIMIT - otherFieldsLen - 50);
|
|
20326
|
+
const text = buildEmbeddingText(chunk, this.config.chunking.prependTitle).slice(0, textBudget);
|
|
20327
|
+
return {
|
|
20960
20328
|
id: chunk.chunkKey,
|
|
20961
|
-
|
|
20329
|
+
content: { title, sectionTitle, text, url, tags, headingPath },
|
|
20962
20330
|
metadata: {
|
|
20963
20331
|
projectId: scope.projectId,
|
|
20964
20332
|
scopeName: scope.scopeName,
|
|
20965
|
-
url: chunk.url,
|
|
20966
20333
|
path: chunk.path,
|
|
20967
|
-
title: chunk.title,
|
|
20968
|
-
sectionTitle: chunk.sectionTitle ?? "",
|
|
20969
|
-
headingPath: chunk.headingPath,
|
|
20970
20334
|
snippet: chunk.snippet,
|
|
20971
|
-
chunkText: chunk.chunkText.slice(0, 4e3),
|
|
20972
20335
|
ordinal: chunk.ordinal,
|
|
20973
20336
|
contentHash: chunk.contentHash,
|
|
20974
|
-
modelId: this.config.embeddings.model,
|
|
20975
20337
|
depth: chunk.depth,
|
|
20976
20338
|
incomingLinks: chunk.incomingLinks,
|
|
20977
20339
|
routeFile: chunk.routeFile,
|
|
20978
|
-
|
|
20979
|
-
|
|
20980
|
-
keywords: chunk.keywords
|
|
20340
|
+
description: chunk.description ?? "",
|
|
20341
|
+
keywords: (chunk.keywords ?? []).join(",")
|
|
20981
20342
|
}
|
|
20982
|
-
}
|
|
20983
|
-
}
|
|
20984
|
-
if (upserts.length > 0) {
|
|
20985
|
-
await this.vectorStore.upsert(upserts, scope);
|
|
20986
|
-
this.logger.event("upserted", { count: upserts.length });
|
|
20987
|
-
}
|
|
20988
|
-
if (deletes.length > 0) {
|
|
20989
|
-
await this.vectorStore.deleteByIds(deletes, scope);
|
|
20990
|
-
this.logger.event("deleted", { count: deletes.length });
|
|
20991
|
-
}
|
|
20992
|
-
}
|
|
20993
|
-
stageEnd("sync", syncStart);
|
|
20994
|
-
this.logger.debug(`Sync complete (${stageTimingsMs["sync"]}ms)`);
|
|
20995
|
-
const finalizeStart = stageStart();
|
|
20996
|
-
if (!options.dryRun) {
|
|
20997
|
-
const scopeInfo = {
|
|
20998
|
-
projectId: scope.projectId,
|
|
20999
|
-
scopeName: scope.scopeName,
|
|
21000
|
-
modelId: this.config.embeddings.model,
|
|
21001
|
-
lastIndexedAt: nowIso(),
|
|
21002
|
-
vectorCount: chunks.length,
|
|
21003
|
-
lastEstimateTokens: estimatedTokens,
|
|
21004
|
-
lastEstimateCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
21005
|
-
lastEstimateChangedChunks: changedChunks.length
|
|
21006
|
-
};
|
|
21007
|
-
await this.vectorStore.recordScope(scopeInfo);
|
|
21008
|
-
this.logger.event("registry_updated", {
|
|
21009
|
-
scope: scope.scopeName,
|
|
21010
|
-
vectorCount: chunks.length
|
|
20343
|
+
};
|
|
21011
20344
|
});
|
|
20345
|
+
await this.store.upsertChunks(docs, scope);
|
|
20346
|
+
documentsUpserted = docs.length;
|
|
20347
|
+
this.logger.event("upserted", { count: docs.length });
|
|
20348
|
+
}
|
|
20349
|
+
if (!options.dryRun && deletes.length > 0) {
|
|
20350
|
+
await this.store.deleteByIds(deletes, scope);
|
|
20351
|
+
this.logger.event("deleted", { count: deletes.length });
|
|
20352
|
+
}
|
|
20353
|
+
stageEnd("upsert", upsertStart);
|
|
20354
|
+
if (changedChunks.length > 0) {
|
|
20355
|
+
this.logger.info(`Upserted ${documentsUpserted} document${documentsUpserted === 1 ? "" : "s"} (${stageTimingsMs["upsert"]}ms)`);
|
|
20356
|
+
} else {
|
|
20357
|
+
this.logger.info("No chunks to upsert \u2014 all up to date");
|
|
21012
20358
|
}
|
|
21013
|
-
stageEnd("finalize", finalizeStart);
|
|
21014
20359
|
this.logger.info("Done.");
|
|
21015
20360
|
return {
|
|
21016
|
-
pagesProcessed:
|
|
20361
|
+
pagesProcessed: pages.length,
|
|
21017
20362
|
chunksTotal: chunks.length,
|
|
21018
20363
|
chunksChanged: changedChunks.length,
|
|
21019
|
-
|
|
20364
|
+
documentsUpserted,
|
|
21020
20365
|
deletes: deletes.length,
|
|
21021
|
-
estimatedTokens,
|
|
21022
|
-
estimatedCostUSD: Number(estimatedCostUSD.toFixed(8)),
|
|
21023
20366
|
routeExact,
|
|
21024
20367
|
routeBestEffort,
|
|
21025
20368
|
stageTimingsMs
|
|
@@ -21050,30 +20393,11 @@ function shouldRunAutoIndex(options) {
|
|
|
21050
20393
|
}
|
|
21051
20394
|
return false;
|
|
21052
20395
|
}
|
|
21053
|
-
function searchsocketViteConfig() {
|
|
21054
|
-
return {
|
|
21055
|
-
name: "searchsocket:config",
|
|
21056
|
-
config() {
|
|
21057
|
-
return {
|
|
21058
|
-
ssr: {
|
|
21059
|
-
external: ["@libsql/client", "libsql"]
|
|
21060
|
-
}
|
|
21061
|
-
};
|
|
21062
|
-
}
|
|
21063
|
-
};
|
|
21064
|
-
}
|
|
21065
20396
|
function searchsocketVitePlugin(options = {}) {
|
|
21066
20397
|
let executed = false;
|
|
21067
20398
|
let running = false;
|
|
21068
20399
|
return {
|
|
21069
20400
|
name: "searchsocket:auto-index",
|
|
21070
|
-
config() {
|
|
21071
|
-
return {
|
|
21072
|
-
ssr: {
|
|
21073
|
-
external: ["@libsql/client", "libsql"]
|
|
21074
|
-
}
|
|
21075
|
-
};
|
|
21076
|
-
},
|
|
21077
20401
|
async closeBundle() {
|
|
21078
20402
|
if (executed || running) {
|
|
21079
20403
|
return;
|
|
@@ -21101,9 +20425,8 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21101
20425
|
verbose: options.verbose
|
|
21102
20426
|
});
|
|
21103
20427
|
logger3.info(
|
|
21104
|
-
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged}
|
|
20428
|
+
`[searchsocket] indexed pages=${stats.pagesProcessed} chunks=${stats.chunksTotal} changed=${stats.chunksChanged} upserted=${stats.documentsUpserted}`
|
|
21105
20429
|
);
|
|
21106
|
-
logger3.info("[searchsocket] markdown mirror written under .searchsocket/pages/<scope> (safe to commit for content workflows).");
|
|
21107
20430
|
executed = true;
|
|
21108
20431
|
} finally {
|
|
21109
20432
|
running = false;
|
|
@@ -21111,6 +20434,186 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21111
20434
|
}
|
|
21112
20435
|
};
|
|
21113
20436
|
}
|
|
20437
|
+
|
|
20438
|
+
// src/sveltekit/scroll-to-text.ts
|
|
20439
|
+
var HIGHLIGHT_CLASS = "ssk-highlight";
|
|
20440
|
+
var HIGHLIGHT_DURATION = 2e3;
|
|
20441
|
+
var HIGHLIGHT_MARKER_ATTR = "data-ssk-highlight-marker";
|
|
20442
|
+
var HIGHLIGHT_NAME = "ssk-search-match";
|
|
20443
|
+
var styleInjected = false;
|
|
20444
|
+
function ensureHighlightStyle() {
|
|
20445
|
+
if (styleInjected || typeof document === "undefined") return;
|
|
20446
|
+
styleInjected = true;
|
|
20447
|
+
const style = document.createElement("style");
|
|
20448
|
+
style.textContent = `
|
|
20449
|
+
@keyframes ssk-highlight-fade {
|
|
20450
|
+
0% { background-color: rgba(16, 185, 129, 0.18); }
|
|
20451
|
+
100% { background-color: transparent; }
|
|
20452
|
+
}
|
|
20453
|
+
.${HIGHLIGHT_CLASS} {
|
|
20454
|
+
animation: ssk-highlight-fade ${HIGHLIGHT_DURATION}ms ease-out forwards;
|
|
20455
|
+
border-radius: 4px;
|
|
20456
|
+
}
|
|
20457
|
+
::highlight(${HIGHLIGHT_NAME}) {
|
|
20458
|
+
background-color: rgba(16, 185, 129, 0.18);
|
|
20459
|
+
}
|
|
20460
|
+
`;
|
|
20461
|
+
document.head.appendChild(style);
|
|
20462
|
+
}
|
|
20463
|
+
var IGNORED_TAGS = /* @__PURE__ */ new Set(["SCRIPT", "STYLE", "NOSCRIPT", "TEMPLATE"]);
|
|
20464
|
+
function buildTextMap(root2) {
|
|
20465
|
+
const walker = document.createTreeWalker(root2, NodeFilter.SHOW_TEXT, {
|
|
20466
|
+
acceptNode(node) {
|
|
20467
|
+
const parent = node.parentElement;
|
|
20468
|
+
if (!parent || IGNORED_TAGS.has(parent.tagName)) return NodeFilter.FILTER_REJECT;
|
|
20469
|
+
return NodeFilter.FILTER_ACCEPT;
|
|
20470
|
+
}
|
|
20471
|
+
});
|
|
20472
|
+
const chunks = [];
|
|
20473
|
+
let text = "";
|
|
20474
|
+
let current;
|
|
20475
|
+
while (current = walker.nextNode()) {
|
|
20476
|
+
const value = current.nodeValue ?? "";
|
|
20477
|
+
if (!value) continue;
|
|
20478
|
+
chunks.push({ node: current, start: text.length, end: text.length + value.length });
|
|
20479
|
+
text += value;
|
|
20480
|
+
}
|
|
20481
|
+
return { text, chunks };
|
|
20482
|
+
}
|
|
20483
|
+
function normalize(text) {
|
|
20484
|
+
return text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
20485
|
+
}
|
|
20486
|
+
function escapeRegExp(value) {
|
|
20487
|
+
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
20488
|
+
}
|
|
20489
|
+
function buildNeedleRegex(needle) {
|
|
20490
|
+
const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
|
|
20491
|
+
if (tokenParts.length > 1) {
|
|
20492
|
+
const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]+");
|
|
20493
|
+
return new RegExp(pattern, "iu");
|
|
20494
|
+
}
|
|
20495
|
+
if (tokenParts.length === 1) {
|
|
20496
|
+
return new RegExp(escapeRegExp(tokenParts[0]), "iu");
|
|
20497
|
+
}
|
|
20498
|
+
if (!needle) return null;
|
|
20499
|
+
return new RegExp(escapeRegExp(needle).replace(/\s+/g, "\\s+"), "i");
|
|
20500
|
+
}
|
|
20501
|
+
function buildLenientRegex(needle) {
|
|
20502
|
+
const tokenParts = needle.split(/[^\p{L}\p{N}]+/u).filter(Boolean);
|
|
20503
|
+
if (tokenParts.length <= 1) return null;
|
|
20504
|
+
const pattern = tokenParts.map(escapeRegExp).join("[^\\p{L}\\p{N}]*");
|
|
20505
|
+
return new RegExp(pattern, "iu");
|
|
20506
|
+
}
|
|
20507
|
+
function findMatch(fullText, needle) {
|
|
20508
|
+
const regex = buildNeedleRegex(needle);
|
|
20509
|
+
if (regex) {
|
|
20510
|
+
const m = regex.exec(fullText);
|
|
20511
|
+
if (m && typeof m.index === "number") {
|
|
20512
|
+
return { start: m.index, end: m.index + m[0].length };
|
|
20513
|
+
}
|
|
20514
|
+
}
|
|
20515
|
+
const lenient = buildLenientRegex(needle);
|
|
20516
|
+
if (lenient) {
|
|
20517
|
+
const m = lenient.exec(fullText);
|
|
20518
|
+
if (m && typeof m.index === "number") {
|
|
20519
|
+
return { start: m.index, end: m.index + m[0].length };
|
|
20520
|
+
}
|
|
20521
|
+
}
|
|
20522
|
+
return null;
|
|
20523
|
+
}
|
|
20524
|
+
function resolveRange(map, offsets) {
|
|
20525
|
+
let startChunk;
|
|
20526
|
+
let endChunk;
|
|
20527
|
+
for (const chunk of map.chunks) {
|
|
20528
|
+
if (!startChunk && offsets.start >= chunk.start && offsets.start < chunk.end) {
|
|
20529
|
+
startChunk = chunk;
|
|
20530
|
+
}
|
|
20531
|
+
if (offsets.end > chunk.start && offsets.end <= chunk.end) {
|
|
20532
|
+
endChunk = chunk;
|
|
20533
|
+
}
|
|
20534
|
+
if (startChunk && endChunk) break;
|
|
20535
|
+
}
|
|
20536
|
+
if (!startChunk || !endChunk) return null;
|
|
20537
|
+
const range = document.createRange();
|
|
20538
|
+
range.setStart(startChunk.node, offsets.start - startChunk.start);
|
|
20539
|
+
range.setEnd(endChunk.node, offsets.end - endChunk.start);
|
|
20540
|
+
return range;
|
|
20541
|
+
}
|
|
20542
|
+
function hasCustomHighlightAPI() {
|
|
20543
|
+
return typeof CSS !== "undefined" && typeof CSS.highlights !== "undefined";
|
|
20544
|
+
}
|
|
20545
|
+
var highlightTimer = null;
|
|
20546
|
+
function highlightWithCSS(range) {
|
|
20547
|
+
ensureHighlightStyle();
|
|
20548
|
+
const hl = new globalThis.Highlight(range);
|
|
20549
|
+
CSS.highlights.set(HIGHLIGHT_NAME, hl);
|
|
20550
|
+
if (highlightTimer) clearTimeout(highlightTimer);
|
|
20551
|
+
highlightTimer = setTimeout(() => {
|
|
20552
|
+
CSS.highlights.delete(HIGHLIGHT_NAME);
|
|
20553
|
+
highlightTimer = null;
|
|
20554
|
+
}, HIGHLIGHT_DURATION);
|
|
20555
|
+
}
|
|
20556
|
+
function unwrapMarker(marker) {
|
|
20557
|
+
if (!marker.isConnected) return;
|
|
20558
|
+
const parent = marker.parentNode;
|
|
20559
|
+
if (!parent) return;
|
|
20560
|
+
while (marker.firstChild) parent.insertBefore(marker.firstChild, marker);
|
|
20561
|
+
parent.removeChild(marker);
|
|
20562
|
+
if (parent instanceof Element) parent.normalize();
|
|
20563
|
+
}
|
|
20564
|
+
function highlightWithDOM(range) {
|
|
20565
|
+
ensureHighlightStyle();
|
|
20566
|
+
try {
|
|
20567
|
+
const marker = document.createElement("span");
|
|
20568
|
+
marker.classList.add(HIGHLIGHT_CLASS);
|
|
20569
|
+
marker.setAttribute(HIGHLIGHT_MARKER_ATTR, "true");
|
|
20570
|
+
range.surroundContents(marker);
|
|
20571
|
+
setTimeout(() => unwrapMarker(marker), HIGHLIGHT_DURATION);
|
|
20572
|
+
return marker;
|
|
20573
|
+
} catch {
|
|
20574
|
+
const ancestor = range.commonAncestorContainer;
|
|
20575
|
+
const el = ancestor instanceof Element ? ancestor : ancestor.parentElement;
|
|
20576
|
+
if (el) {
|
|
20577
|
+
el.classList.add(HIGHLIGHT_CLASS);
|
|
20578
|
+
setTimeout(() => el.classList.remove(HIGHLIGHT_CLASS), HIGHLIGHT_DURATION);
|
|
20579
|
+
return el;
|
|
20580
|
+
}
|
|
20581
|
+
return document.body;
|
|
20582
|
+
}
|
|
20583
|
+
}
|
|
20584
|
+
function scrollToRange(range) {
|
|
20585
|
+
const rect = range.getBoundingClientRect();
|
|
20586
|
+
window.scrollTo({
|
|
20587
|
+
top: window.scrollY + rect.top - window.innerHeight / 3,
|
|
20588
|
+
behavior: "smooth"
|
|
20589
|
+
});
|
|
20590
|
+
}
|
|
20591
|
+
function scrollIntoViewIfPossible(el) {
|
|
20592
|
+
if (typeof el.scrollIntoView === "function") {
|
|
20593
|
+
el.scrollIntoView({ behavior: "smooth", block: "start" });
|
|
20594
|
+
}
|
|
20595
|
+
}
|
|
20596
|
+
function searchsocketScrollToText(navigation) {
|
|
20597
|
+
if (typeof document === "undefined") return;
|
|
20598
|
+
const params = navigation.to?.url.searchParams;
|
|
20599
|
+
const raw = params?.get("_sskt") ?? params?.get("_ssk");
|
|
20600
|
+
if (!raw) return;
|
|
20601
|
+
const needle = normalize(raw);
|
|
20602
|
+
if (!needle) return;
|
|
20603
|
+
const map = buildTextMap(document.body);
|
|
20604
|
+
const offsets = findMatch(map.text, needle);
|
|
20605
|
+
if (!offsets) return;
|
|
20606
|
+
const range = resolveRange(map, offsets);
|
|
20607
|
+
if (!range) return;
|
|
20608
|
+
if (hasCustomHighlightAPI()) {
|
|
20609
|
+
highlightWithCSS(range);
|
|
20610
|
+
scrollToRange(range);
|
|
20611
|
+
} else {
|
|
20612
|
+
const marker = highlightWithDOM(range);
|
|
20613
|
+
const target = typeof marker.scrollIntoView === "function" ? marker : marker.parentElement;
|
|
20614
|
+
if (target) scrollIntoViewIfPossible(target);
|
|
20615
|
+
}
|
|
20616
|
+
}
|
|
21114
20617
|
/*! Bundled license information:
|
|
21115
20618
|
|
|
21116
20619
|
@mixmark-io/domino/lib/style_parser.js:
|
|
@@ -21123,6 +20626,6 @@ function searchsocketVitePlugin(options = {}) {
|
|
|
21123
20626
|
*)
|
|
21124
20627
|
*/
|
|
21125
20628
|
|
|
21126
|
-
export { searchsocketHandle,
|
|
20629
|
+
export { searchsocketHandle, searchsocketScrollToText, searchsocketVitePlugin };
|
|
21127
20630
|
//# sourceMappingURL=sveltekit.js.map
|
|
21128
20631
|
//# sourceMappingURL=sveltekit.js.map
|