searchsocket 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -42
- package/dist/cli.js +348 -111
- package/dist/client.d.cts +1 -1
- package/dist/client.d.ts +1 -1
- package/dist/index.cjs +367 -104
- package/dist/index.d.cts +20 -3
- package/dist/index.d.ts +20 -3
- package/dist/index.js +365 -103
- package/dist/sveltekit.cjs +350 -104
- package/dist/sveltekit.d.cts +8 -2
- package/dist/sveltekit.d.ts +8 -2
- package/dist/sveltekit.js +349 -102
- package/dist/{types-D1K46vwd.d.cts → types-DAXk6A3Y.d.cts} +25 -13
- package/dist/{types-D1K46vwd.d.ts → types-DAXk6A3Y.d.ts} +25 -13
- package/package.json +3 -3
- package/dist/cli.js.map +0 -1
- package/dist/client.cjs.map +0 -1
- package/dist/client.js.map +0 -1
- package/dist/index.cjs.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/sveltekit.cjs.map +0 -1
- package/dist/sveltekit.js.map +0 -1
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
type ScopeMode = "fixed" | "git" | "env";
|
|
2
2
|
type SourceMode = "static-output" | "crawl" | "content-files" | "build";
|
|
3
|
-
type EmbeddingProvider = "
|
|
4
|
-
type RerankProvider = "none" | "jina";
|
|
3
|
+
type EmbeddingProvider = "jina";
|
|
5
4
|
interface SearchSocketConfig {
|
|
6
5
|
project?: {
|
|
7
6
|
id?: string;
|
|
@@ -31,6 +30,10 @@ interface SearchSocketConfig {
|
|
|
31
30
|
paramValues?: Record<string, string[]>;
|
|
32
31
|
exclude?: string[];
|
|
33
32
|
previewTimeout?: number;
|
|
33
|
+
discover?: boolean;
|
|
34
|
+
seedUrls?: string[];
|
|
35
|
+
maxPages?: number;
|
|
36
|
+
maxDepth?: number;
|
|
34
37
|
};
|
|
35
38
|
};
|
|
36
39
|
extract?: {
|
|
@@ -59,6 +62,7 @@ interface SearchSocketConfig {
|
|
|
59
62
|
embeddings?: {
|
|
60
63
|
provider?: EmbeddingProvider;
|
|
61
64
|
model?: string;
|
|
65
|
+
apiKey?: string;
|
|
62
66
|
apiKeyEnv?: string;
|
|
63
67
|
batchSize?: number;
|
|
64
68
|
concurrency?: number;
|
|
@@ -67,18 +71,17 @@ interface SearchSocketConfig {
|
|
|
67
71
|
vector?: {
|
|
68
72
|
dimension?: number;
|
|
69
73
|
turso?: {
|
|
74
|
+
url?: string;
|
|
75
|
+
authToken?: string;
|
|
70
76
|
urlEnv?: string;
|
|
71
77
|
authTokenEnv?: string;
|
|
72
78
|
localPath?: string;
|
|
73
79
|
};
|
|
74
80
|
};
|
|
75
81
|
rerank?: {
|
|
76
|
-
|
|
82
|
+
enabled?: boolean;
|
|
77
83
|
topN?: number;
|
|
78
|
-
|
|
79
|
-
apiKeyEnv?: string;
|
|
80
|
-
model?: string;
|
|
81
|
-
};
|
|
84
|
+
model?: string;
|
|
82
85
|
};
|
|
83
86
|
ranking?: {
|
|
84
87
|
enableIncomingLinkBoost?: boolean;
|
|
@@ -87,6 +90,7 @@ interface SearchSocketConfig {
|
|
|
87
90
|
aggregationCap?: number;
|
|
88
91
|
aggregationDecay?: number;
|
|
89
92
|
minChunkScoreRatio?: number;
|
|
93
|
+
minScore?: number;
|
|
90
94
|
weights?: {
|
|
91
95
|
incomingLinks?: number;
|
|
92
96
|
depth?: number;
|
|
@@ -146,6 +150,10 @@ interface ResolvedSearchSocketConfig {
|
|
|
146
150
|
paramValues: Record<string, string[]>;
|
|
147
151
|
exclude: string[];
|
|
148
152
|
previewTimeout: number;
|
|
153
|
+
discover: boolean;
|
|
154
|
+
seedUrls: string[];
|
|
155
|
+
maxPages: number;
|
|
156
|
+
maxDepth: number;
|
|
149
157
|
};
|
|
150
158
|
};
|
|
151
159
|
extract: {
|
|
@@ -174,6 +182,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
174
182
|
embeddings: {
|
|
175
183
|
provider: EmbeddingProvider;
|
|
176
184
|
model: string;
|
|
185
|
+
apiKey?: string;
|
|
177
186
|
apiKeyEnv: string;
|
|
178
187
|
batchSize: number;
|
|
179
188
|
concurrency: number;
|
|
@@ -182,18 +191,17 @@ interface ResolvedSearchSocketConfig {
|
|
|
182
191
|
vector: {
|
|
183
192
|
dimension?: number;
|
|
184
193
|
turso: {
|
|
194
|
+
url?: string;
|
|
195
|
+
authToken?: string;
|
|
185
196
|
urlEnv: string;
|
|
186
197
|
authTokenEnv: string;
|
|
187
198
|
localPath: string;
|
|
188
199
|
};
|
|
189
200
|
};
|
|
190
201
|
rerank: {
|
|
191
|
-
|
|
202
|
+
enabled: boolean;
|
|
192
203
|
topN: number;
|
|
193
|
-
|
|
194
|
-
apiKeyEnv: string;
|
|
195
|
-
model: string;
|
|
196
|
-
};
|
|
204
|
+
model: string;
|
|
197
205
|
};
|
|
198
206
|
ranking: {
|
|
199
207
|
enableIncomingLinkBoost: boolean;
|
|
@@ -202,6 +210,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
202
210
|
aggregationCap: number;
|
|
203
211
|
aggregationDecay: number;
|
|
204
212
|
minChunkScoreRatio: number;
|
|
213
|
+
minScore: number;
|
|
205
214
|
weights: {
|
|
206
215
|
incomingLinks: number;
|
|
207
216
|
depth: number;
|
|
@@ -265,6 +274,8 @@ interface VectorRecord {
|
|
|
265
274
|
sectionTitle: string;
|
|
266
275
|
headingPath: string[];
|
|
267
276
|
snippet: string;
|
|
277
|
+
chunkText: string;
|
|
278
|
+
ordinal: number;
|
|
268
279
|
contentHash: string;
|
|
269
280
|
modelId: string;
|
|
270
281
|
depth: number;
|
|
@@ -323,9 +334,10 @@ interface VectorStore {
|
|
|
323
334
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
324
335
|
deletePages(scope: Scope): Promise<void>;
|
|
325
336
|
getScopeModelId(scope: Scope): Promise<string | null>;
|
|
337
|
+
dropAllTables(): Promise<void>;
|
|
326
338
|
}
|
|
327
339
|
interface EmbeddingsProvider {
|
|
328
|
-
embedTexts(texts: string[], modelId: string): Promise<number[][]>;
|
|
340
|
+
embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
|
|
329
341
|
estimateTokens(text: string): number;
|
|
330
342
|
}
|
|
331
343
|
interface RerankCandidate {
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
type ScopeMode = "fixed" | "git" | "env";
|
|
2
2
|
type SourceMode = "static-output" | "crawl" | "content-files" | "build";
|
|
3
|
-
type EmbeddingProvider = "
|
|
4
|
-
type RerankProvider = "none" | "jina";
|
|
3
|
+
type EmbeddingProvider = "jina";
|
|
5
4
|
interface SearchSocketConfig {
|
|
6
5
|
project?: {
|
|
7
6
|
id?: string;
|
|
@@ -31,6 +30,10 @@ interface SearchSocketConfig {
|
|
|
31
30
|
paramValues?: Record<string, string[]>;
|
|
32
31
|
exclude?: string[];
|
|
33
32
|
previewTimeout?: number;
|
|
33
|
+
discover?: boolean;
|
|
34
|
+
seedUrls?: string[];
|
|
35
|
+
maxPages?: number;
|
|
36
|
+
maxDepth?: number;
|
|
34
37
|
};
|
|
35
38
|
};
|
|
36
39
|
extract?: {
|
|
@@ -59,6 +62,7 @@ interface SearchSocketConfig {
|
|
|
59
62
|
embeddings?: {
|
|
60
63
|
provider?: EmbeddingProvider;
|
|
61
64
|
model?: string;
|
|
65
|
+
apiKey?: string;
|
|
62
66
|
apiKeyEnv?: string;
|
|
63
67
|
batchSize?: number;
|
|
64
68
|
concurrency?: number;
|
|
@@ -67,18 +71,17 @@ interface SearchSocketConfig {
|
|
|
67
71
|
vector?: {
|
|
68
72
|
dimension?: number;
|
|
69
73
|
turso?: {
|
|
74
|
+
url?: string;
|
|
75
|
+
authToken?: string;
|
|
70
76
|
urlEnv?: string;
|
|
71
77
|
authTokenEnv?: string;
|
|
72
78
|
localPath?: string;
|
|
73
79
|
};
|
|
74
80
|
};
|
|
75
81
|
rerank?: {
|
|
76
|
-
|
|
82
|
+
enabled?: boolean;
|
|
77
83
|
topN?: number;
|
|
78
|
-
|
|
79
|
-
apiKeyEnv?: string;
|
|
80
|
-
model?: string;
|
|
81
|
-
};
|
|
84
|
+
model?: string;
|
|
82
85
|
};
|
|
83
86
|
ranking?: {
|
|
84
87
|
enableIncomingLinkBoost?: boolean;
|
|
@@ -87,6 +90,7 @@ interface SearchSocketConfig {
|
|
|
87
90
|
aggregationCap?: number;
|
|
88
91
|
aggregationDecay?: number;
|
|
89
92
|
minChunkScoreRatio?: number;
|
|
93
|
+
minScore?: number;
|
|
90
94
|
weights?: {
|
|
91
95
|
incomingLinks?: number;
|
|
92
96
|
depth?: number;
|
|
@@ -146,6 +150,10 @@ interface ResolvedSearchSocketConfig {
|
|
|
146
150
|
paramValues: Record<string, string[]>;
|
|
147
151
|
exclude: string[];
|
|
148
152
|
previewTimeout: number;
|
|
153
|
+
discover: boolean;
|
|
154
|
+
seedUrls: string[];
|
|
155
|
+
maxPages: number;
|
|
156
|
+
maxDepth: number;
|
|
149
157
|
};
|
|
150
158
|
};
|
|
151
159
|
extract: {
|
|
@@ -174,6 +182,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
174
182
|
embeddings: {
|
|
175
183
|
provider: EmbeddingProvider;
|
|
176
184
|
model: string;
|
|
185
|
+
apiKey?: string;
|
|
177
186
|
apiKeyEnv: string;
|
|
178
187
|
batchSize: number;
|
|
179
188
|
concurrency: number;
|
|
@@ -182,18 +191,17 @@ interface ResolvedSearchSocketConfig {
|
|
|
182
191
|
vector: {
|
|
183
192
|
dimension?: number;
|
|
184
193
|
turso: {
|
|
194
|
+
url?: string;
|
|
195
|
+
authToken?: string;
|
|
185
196
|
urlEnv: string;
|
|
186
197
|
authTokenEnv: string;
|
|
187
198
|
localPath: string;
|
|
188
199
|
};
|
|
189
200
|
};
|
|
190
201
|
rerank: {
|
|
191
|
-
|
|
202
|
+
enabled: boolean;
|
|
192
203
|
topN: number;
|
|
193
|
-
|
|
194
|
-
apiKeyEnv: string;
|
|
195
|
-
model: string;
|
|
196
|
-
};
|
|
204
|
+
model: string;
|
|
197
205
|
};
|
|
198
206
|
ranking: {
|
|
199
207
|
enableIncomingLinkBoost: boolean;
|
|
@@ -202,6 +210,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
202
210
|
aggregationCap: number;
|
|
203
211
|
aggregationDecay: number;
|
|
204
212
|
minChunkScoreRatio: number;
|
|
213
|
+
minScore: number;
|
|
205
214
|
weights: {
|
|
206
215
|
incomingLinks: number;
|
|
207
216
|
depth: number;
|
|
@@ -265,6 +274,8 @@ interface VectorRecord {
|
|
|
265
274
|
sectionTitle: string;
|
|
266
275
|
headingPath: string[];
|
|
267
276
|
snippet: string;
|
|
277
|
+
chunkText: string;
|
|
278
|
+
ordinal: number;
|
|
268
279
|
contentHash: string;
|
|
269
280
|
modelId: string;
|
|
270
281
|
depth: number;
|
|
@@ -323,9 +334,10 @@ interface VectorStore {
|
|
|
323
334
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
324
335
|
deletePages(scope: Scope): Promise<void>;
|
|
325
336
|
getScopeModelId(scope: Scope): Promise<string | null>;
|
|
337
|
+
dropAllTables(): Promise<void>;
|
|
326
338
|
}
|
|
327
339
|
interface EmbeddingsProvider {
|
|
328
|
-
embedTexts(texts: string[], modelId: string): Promise<number[][]>;
|
|
340
|
+
embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
|
|
329
341
|
estimateTokens(text: string): number;
|
|
330
342
|
}
|
|
331
343
|
interface RerankCandidate {
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "searchsocket",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Semantic site search and MCP retrieval for SvelteKit static sites",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Greg Priday <greg@siteorigin.com>",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
9
|
-
"url": "https://github.com/gregpriday/searchsocket.git"
|
|
9
|
+
"url": "git+https://github.com/gregpriday/searchsocket.git"
|
|
10
10
|
},
|
|
11
11
|
"homepage": "https://github.com/gregpriday/searchsocket",
|
|
12
12
|
"bugs": {
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"type": "module",
|
|
26
26
|
"files": [
|
|
27
27
|
"dist",
|
|
28
|
+
"!dist/**/*.map",
|
|
28
29
|
"README.md"
|
|
29
30
|
],
|
|
30
31
|
"bin": {
|
|
@@ -69,7 +70,6 @@
|
|
|
69
70
|
"fast-glob": "^3.3.3",
|
|
70
71
|
"gray-matter": "^4.0.3",
|
|
71
72
|
"jiti": "^2.6.1",
|
|
72
|
-
"openai": "^6.19.0",
|
|
73
73
|
"p-limit": "^7.3.0",
|
|
74
74
|
"turndown": "^7.2.2",
|
|
75
75
|
"turndown-plugin-gfm": "^1.0.2",
|