searchsocket 0.2.1 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +120 -42
- package/dist/cli.js +370 -115
- package/dist/client.d.cts +1 -1
- package/dist/client.d.ts +1 -1
- package/dist/index.cjs +391 -109
- package/dist/index.d.cts +20 -3
- package/dist/index.d.ts +20 -3
- package/dist/index.js +389 -108
- package/dist/sveltekit.cjs +374 -109
- package/dist/sveltekit.d.cts +8 -2
- package/dist/sveltekit.d.ts +8 -2
- package/dist/sveltekit.js +373 -107
- package/dist/{types-D1K46vwd.d.cts → types-BrG6XTUU.d.cts} +29 -13
- package/dist/{types-D1K46vwd.d.ts → types-BrG6XTUU.d.ts} +29 -13
- package/package.json +1 -2
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
type ScopeMode = "fixed" | "git" | "env";
|
|
2
2
|
type SourceMode = "static-output" | "crawl" | "content-files" | "build";
|
|
3
|
-
type EmbeddingProvider = "
|
|
4
|
-
type RerankProvider = "none" | "jina";
|
|
3
|
+
type EmbeddingProvider = "jina";
|
|
5
4
|
interface SearchSocketConfig {
|
|
6
5
|
project?: {
|
|
7
6
|
id?: string;
|
|
@@ -31,6 +30,10 @@ interface SearchSocketConfig {
|
|
|
31
30
|
paramValues?: Record<string, string[]>;
|
|
32
31
|
exclude?: string[];
|
|
33
32
|
previewTimeout?: number;
|
|
33
|
+
discover?: boolean;
|
|
34
|
+
seedUrls?: string[];
|
|
35
|
+
maxPages?: number;
|
|
36
|
+
maxDepth?: number;
|
|
34
37
|
};
|
|
35
38
|
};
|
|
36
39
|
extract?: {
|
|
@@ -59,6 +62,7 @@ interface SearchSocketConfig {
|
|
|
59
62
|
embeddings?: {
|
|
60
63
|
provider?: EmbeddingProvider;
|
|
61
64
|
model?: string;
|
|
65
|
+
apiKey?: string;
|
|
62
66
|
apiKeyEnv?: string;
|
|
63
67
|
batchSize?: number;
|
|
64
68
|
concurrency?: number;
|
|
@@ -67,18 +71,17 @@ interface SearchSocketConfig {
|
|
|
67
71
|
vector?: {
|
|
68
72
|
dimension?: number;
|
|
69
73
|
turso?: {
|
|
74
|
+
url?: string;
|
|
75
|
+
authToken?: string;
|
|
70
76
|
urlEnv?: string;
|
|
71
77
|
authTokenEnv?: string;
|
|
72
78
|
localPath?: string;
|
|
73
79
|
};
|
|
74
80
|
};
|
|
75
81
|
rerank?: {
|
|
76
|
-
|
|
82
|
+
enabled?: boolean;
|
|
77
83
|
topN?: number;
|
|
78
|
-
|
|
79
|
-
apiKeyEnv?: string;
|
|
80
|
-
model?: string;
|
|
81
|
-
};
|
|
84
|
+
model?: string;
|
|
82
85
|
};
|
|
83
86
|
ranking?: {
|
|
84
87
|
enableIncomingLinkBoost?: boolean;
|
|
@@ -87,6 +90,7 @@ interface SearchSocketConfig {
|
|
|
87
90
|
aggregationCap?: number;
|
|
88
91
|
aggregationDecay?: number;
|
|
89
92
|
minChunkScoreRatio?: number;
|
|
93
|
+
minScore?: number;
|
|
90
94
|
weights?: {
|
|
91
95
|
incomingLinks?: number;
|
|
92
96
|
depth?: number;
|
|
@@ -146,6 +150,10 @@ interface ResolvedSearchSocketConfig {
|
|
|
146
150
|
paramValues: Record<string, string[]>;
|
|
147
151
|
exclude: string[];
|
|
148
152
|
previewTimeout: number;
|
|
153
|
+
discover: boolean;
|
|
154
|
+
seedUrls: string[];
|
|
155
|
+
maxPages: number;
|
|
156
|
+
maxDepth: number;
|
|
149
157
|
};
|
|
150
158
|
};
|
|
151
159
|
extract: {
|
|
@@ -174,6 +182,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
174
182
|
embeddings: {
|
|
175
183
|
provider: EmbeddingProvider;
|
|
176
184
|
model: string;
|
|
185
|
+
apiKey?: string;
|
|
177
186
|
apiKeyEnv: string;
|
|
178
187
|
batchSize: number;
|
|
179
188
|
concurrency: number;
|
|
@@ -182,18 +191,17 @@ interface ResolvedSearchSocketConfig {
|
|
|
182
191
|
vector: {
|
|
183
192
|
dimension?: number;
|
|
184
193
|
turso: {
|
|
194
|
+
url?: string;
|
|
195
|
+
authToken?: string;
|
|
185
196
|
urlEnv: string;
|
|
186
197
|
authTokenEnv: string;
|
|
187
198
|
localPath: string;
|
|
188
199
|
};
|
|
189
200
|
};
|
|
190
201
|
rerank: {
|
|
191
|
-
|
|
202
|
+
enabled: boolean;
|
|
192
203
|
topN: number;
|
|
193
|
-
|
|
194
|
-
apiKeyEnv: string;
|
|
195
|
-
model: string;
|
|
196
|
-
};
|
|
204
|
+
model: string;
|
|
197
205
|
};
|
|
198
206
|
ranking: {
|
|
199
207
|
enableIncomingLinkBoost: boolean;
|
|
@@ -202,6 +210,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
202
210
|
aggregationCap: number;
|
|
203
211
|
aggregationDecay: number;
|
|
204
212
|
minChunkScoreRatio: number;
|
|
213
|
+
minScore: number;
|
|
205
214
|
weights: {
|
|
206
215
|
incomingLinks: number;
|
|
207
216
|
depth: number;
|
|
@@ -252,6 +261,8 @@ interface Chunk {
|
|
|
252
261
|
routeFile: string;
|
|
253
262
|
tags: string[];
|
|
254
263
|
contentHash: string;
|
|
264
|
+
description?: string;
|
|
265
|
+
keywords?: string[];
|
|
255
266
|
}
|
|
256
267
|
interface VectorRecord {
|
|
257
268
|
id: string;
|
|
@@ -265,12 +276,16 @@ interface VectorRecord {
|
|
|
265
276
|
sectionTitle: string;
|
|
266
277
|
headingPath: string[];
|
|
267
278
|
snippet: string;
|
|
279
|
+
chunkText: string;
|
|
280
|
+
ordinal: number;
|
|
268
281
|
contentHash: string;
|
|
269
282
|
modelId: string;
|
|
270
283
|
depth: number;
|
|
271
284
|
incomingLinks: number;
|
|
272
285
|
routeFile: string;
|
|
273
286
|
tags: string[];
|
|
287
|
+
description?: string;
|
|
288
|
+
keywords?: string[];
|
|
274
289
|
};
|
|
275
290
|
}
|
|
276
291
|
interface QueryOpts {
|
|
@@ -323,9 +338,10 @@ interface VectorStore {
|
|
|
323
338
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
324
339
|
deletePages(scope: Scope): Promise<void>;
|
|
325
340
|
getScopeModelId(scope: Scope): Promise<string | null>;
|
|
341
|
+
dropAllTables(): Promise<void>;
|
|
326
342
|
}
|
|
327
343
|
interface EmbeddingsProvider {
|
|
328
|
-
embedTexts(texts: string[], modelId: string): Promise<number[][]>;
|
|
344
|
+
embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
|
|
329
345
|
estimateTokens(text: string): number;
|
|
330
346
|
}
|
|
331
347
|
interface RerankCandidate {
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
type ScopeMode = "fixed" | "git" | "env";
|
|
2
2
|
type SourceMode = "static-output" | "crawl" | "content-files" | "build";
|
|
3
|
-
type EmbeddingProvider = "
|
|
4
|
-
type RerankProvider = "none" | "jina";
|
|
3
|
+
type EmbeddingProvider = "jina";
|
|
5
4
|
interface SearchSocketConfig {
|
|
6
5
|
project?: {
|
|
7
6
|
id?: string;
|
|
@@ -31,6 +30,10 @@ interface SearchSocketConfig {
|
|
|
31
30
|
paramValues?: Record<string, string[]>;
|
|
32
31
|
exclude?: string[];
|
|
33
32
|
previewTimeout?: number;
|
|
33
|
+
discover?: boolean;
|
|
34
|
+
seedUrls?: string[];
|
|
35
|
+
maxPages?: number;
|
|
36
|
+
maxDepth?: number;
|
|
34
37
|
};
|
|
35
38
|
};
|
|
36
39
|
extract?: {
|
|
@@ -59,6 +62,7 @@ interface SearchSocketConfig {
|
|
|
59
62
|
embeddings?: {
|
|
60
63
|
provider?: EmbeddingProvider;
|
|
61
64
|
model?: string;
|
|
65
|
+
apiKey?: string;
|
|
62
66
|
apiKeyEnv?: string;
|
|
63
67
|
batchSize?: number;
|
|
64
68
|
concurrency?: number;
|
|
@@ -67,18 +71,17 @@ interface SearchSocketConfig {
|
|
|
67
71
|
vector?: {
|
|
68
72
|
dimension?: number;
|
|
69
73
|
turso?: {
|
|
74
|
+
url?: string;
|
|
75
|
+
authToken?: string;
|
|
70
76
|
urlEnv?: string;
|
|
71
77
|
authTokenEnv?: string;
|
|
72
78
|
localPath?: string;
|
|
73
79
|
};
|
|
74
80
|
};
|
|
75
81
|
rerank?: {
|
|
76
|
-
|
|
82
|
+
enabled?: boolean;
|
|
77
83
|
topN?: number;
|
|
78
|
-
|
|
79
|
-
apiKeyEnv?: string;
|
|
80
|
-
model?: string;
|
|
81
|
-
};
|
|
84
|
+
model?: string;
|
|
82
85
|
};
|
|
83
86
|
ranking?: {
|
|
84
87
|
enableIncomingLinkBoost?: boolean;
|
|
@@ -87,6 +90,7 @@ interface SearchSocketConfig {
|
|
|
87
90
|
aggregationCap?: number;
|
|
88
91
|
aggregationDecay?: number;
|
|
89
92
|
minChunkScoreRatio?: number;
|
|
93
|
+
minScore?: number;
|
|
90
94
|
weights?: {
|
|
91
95
|
incomingLinks?: number;
|
|
92
96
|
depth?: number;
|
|
@@ -146,6 +150,10 @@ interface ResolvedSearchSocketConfig {
|
|
|
146
150
|
paramValues: Record<string, string[]>;
|
|
147
151
|
exclude: string[];
|
|
148
152
|
previewTimeout: number;
|
|
153
|
+
discover: boolean;
|
|
154
|
+
seedUrls: string[];
|
|
155
|
+
maxPages: number;
|
|
156
|
+
maxDepth: number;
|
|
149
157
|
};
|
|
150
158
|
};
|
|
151
159
|
extract: {
|
|
@@ -174,6 +182,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
174
182
|
embeddings: {
|
|
175
183
|
provider: EmbeddingProvider;
|
|
176
184
|
model: string;
|
|
185
|
+
apiKey?: string;
|
|
177
186
|
apiKeyEnv: string;
|
|
178
187
|
batchSize: number;
|
|
179
188
|
concurrency: number;
|
|
@@ -182,18 +191,17 @@ interface ResolvedSearchSocketConfig {
|
|
|
182
191
|
vector: {
|
|
183
192
|
dimension?: number;
|
|
184
193
|
turso: {
|
|
194
|
+
url?: string;
|
|
195
|
+
authToken?: string;
|
|
185
196
|
urlEnv: string;
|
|
186
197
|
authTokenEnv: string;
|
|
187
198
|
localPath: string;
|
|
188
199
|
};
|
|
189
200
|
};
|
|
190
201
|
rerank: {
|
|
191
|
-
|
|
202
|
+
enabled: boolean;
|
|
192
203
|
topN: number;
|
|
193
|
-
|
|
194
|
-
apiKeyEnv: string;
|
|
195
|
-
model: string;
|
|
196
|
-
};
|
|
204
|
+
model: string;
|
|
197
205
|
};
|
|
198
206
|
ranking: {
|
|
199
207
|
enableIncomingLinkBoost: boolean;
|
|
@@ -202,6 +210,7 @@ interface ResolvedSearchSocketConfig {
|
|
|
202
210
|
aggregationCap: number;
|
|
203
211
|
aggregationDecay: number;
|
|
204
212
|
minChunkScoreRatio: number;
|
|
213
|
+
minScore: number;
|
|
205
214
|
weights: {
|
|
206
215
|
incomingLinks: number;
|
|
207
216
|
depth: number;
|
|
@@ -252,6 +261,8 @@ interface Chunk {
|
|
|
252
261
|
routeFile: string;
|
|
253
262
|
tags: string[];
|
|
254
263
|
contentHash: string;
|
|
264
|
+
description?: string;
|
|
265
|
+
keywords?: string[];
|
|
255
266
|
}
|
|
256
267
|
interface VectorRecord {
|
|
257
268
|
id: string;
|
|
@@ -265,12 +276,16 @@ interface VectorRecord {
|
|
|
265
276
|
sectionTitle: string;
|
|
266
277
|
headingPath: string[];
|
|
267
278
|
snippet: string;
|
|
279
|
+
chunkText: string;
|
|
280
|
+
ordinal: number;
|
|
268
281
|
contentHash: string;
|
|
269
282
|
modelId: string;
|
|
270
283
|
depth: number;
|
|
271
284
|
incomingLinks: number;
|
|
272
285
|
routeFile: string;
|
|
273
286
|
tags: string[];
|
|
287
|
+
description?: string;
|
|
288
|
+
keywords?: string[];
|
|
274
289
|
};
|
|
275
290
|
}
|
|
276
291
|
interface QueryOpts {
|
|
@@ -323,9 +338,10 @@ interface VectorStore {
|
|
|
323
338
|
getPage(url: string, scope: Scope): Promise<PageRecord | null>;
|
|
324
339
|
deletePages(scope: Scope): Promise<void>;
|
|
325
340
|
getScopeModelId(scope: Scope): Promise<string | null>;
|
|
341
|
+
dropAllTables(): Promise<void>;
|
|
326
342
|
}
|
|
327
343
|
interface EmbeddingsProvider {
|
|
328
|
-
embedTexts(texts: string[], modelId: string): Promise<number[][]>;
|
|
344
|
+
embedTexts(texts: string[], modelId: string, task?: string): Promise<number[][]>;
|
|
329
345
|
estimateTokens(text: string): number;
|
|
330
346
|
}
|
|
331
347
|
interface RerankCandidate {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "searchsocket",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Semantic site search and MCP retrieval for SvelteKit static sites",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Greg Priday <greg@siteorigin.com>",
|
|
@@ -70,7 +70,6 @@
|
|
|
70
70
|
"fast-glob": "^3.3.3",
|
|
71
71
|
"gray-matter": "^4.0.3",
|
|
72
72
|
"jiti": "^2.6.1",
|
|
73
|
-
"openai": "^6.19.0",
|
|
74
73
|
"p-limit": "^7.3.0",
|
|
75
74
|
"turndown": "^7.2.2",
|
|
76
75
|
"turndown-plugin-gfm": "^1.0.2",
|