@agentgazer/shared 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/normalize.test.d.ts +2 -0
- package/dist/__tests__/normalize.test.d.ts.map +1 -0
- package/dist/__tests__/normalize.test.js +159 -0
- package/dist/__tests__/normalize.test.js.map +1 -0
- package/dist/__tests__/pricing.test.js +24 -37
- package/dist/__tests__/pricing.test.js.map +1 -1
- package/dist/__tests__/providers.test.js +59 -32
- package/dist/__tests__/providers.test.js.map +1 -1
- package/dist/__tests__/simhash.test.d.ts +2 -0
- package/dist/__tests__/simhash.test.d.ts.map +1 -0
- package/dist/__tests__/simhash.test.js +107 -0
- package/dist/__tests__/simhash.test.js.map +1 -0
- package/dist/index.d.ts +6 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +20 -1
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +3 -2
- package/dist/logger.js.map +1 -1
- package/dist/models.d.ts +4 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +22 -34
- package/dist/models.js.map +1 -1
- package/dist/normalize.d.ts +35 -0
- package/dist/normalize.d.ts.map +1 -0
- package/dist/normalize.js +119 -0
- package/dist/normalize.js.map +1 -0
- package/dist/parsers.d.ts.map +1 -1
- package/dist/parsers.js +0 -1
- package/dist/parsers.js.map +1 -1
- package/dist/pricing.d.ts +6 -0
- package/dist/pricing.d.ts.map +1 -1
- package/dist/pricing.js +41 -11
- package/dist/pricing.js.map +1 -1
- package/dist/provider-validator.d.ts +19 -0
- package/dist/provider-validator.d.ts.map +1 -0
- package/dist/provider-validator.js +314 -0
- package/dist/provider-validator.js.map +1 -0
- package/dist/providers.d.ts +32 -2
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +117 -17
- package/dist/providers.js.map +1 -1
- package/dist/simhash.d.ts +28 -0
- package/dist/simhash.d.ts.map +1 -0
- package/dist/simhash.js +98 -0
- package/dist/simhash.js.map +1 -0
- package/package.json +1 -1
package/dist/providers.js
CHANGED
|
@@ -1,12 +1,29 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.KNOWN_PROVIDER_NAMES = void 0;
|
|
3
|
+
exports.SELECTABLE_PROVIDER_NAMES = exports.KNOWN_PROVIDER_NAMES = exports.PROVIDER_DISPLAY_NAMES = void 0;
|
|
4
4
|
exports.detectProvider = detectProvider;
|
|
5
5
|
exports.detectProviderByHostname = detectProviderByHostname;
|
|
6
6
|
exports.getProviderBaseUrl = getProviderBaseUrl;
|
|
7
|
+
exports.getProviderRootUrl = getProviderRootUrl;
|
|
8
|
+
exports.providerUsesPathRouting = providerUsesPathRouting;
|
|
9
|
+
exports.getProviderChatEndpoint = getProviderChatEndpoint;
|
|
7
10
|
exports.getProviderAuthHeader = getProviderAuthHeader;
|
|
8
11
|
exports.parsePathPrefix = parsePathPrefix;
|
|
12
|
+
exports.rewriteProviderPath = rewriteProviderPath;
|
|
9
13
|
exports.parseAgentPath = parseAgentPath;
|
|
14
|
+
/** Mapping of provider names to their popular model names for display. */
|
|
15
|
+
exports.PROVIDER_DISPLAY_NAMES = {
|
|
16
|
+
openai: "OpenAI (GPT-4)",
|
|
17
|
+
anthropic: "Anthropic (Claude)",
|
|
18
|
+
google: "Google (Gemini)",
|
|
19
|
+
mistral: "Mistral",
|
|
20
|
+
cohere: "Cohere (Command)",
|
|
21
|
+
deepseek: "DeepSeek",
|
|
22
|
+
moonshot: "Moonshot (Kimi)",
|
|
23
|
+
zhipu: "Zhipu (GLM-4)",
|
|
24
|
+
minimax: "MiniMax (abab)",
|
|
25
|
+
baichuan: "Baichuan",
|
|
26
|
+
};
|
|
10
27
|
/** All known provider names (excludes "unknown"). Single source of truth. */
|
|
11
28
|
exports.KNOWN_PROVIDER_NAMES = [
|
|
12
29
|
"openai",
|
|
@@ -19,7 +36,22 @@ exports.KNOWN_PROVIDER_NAMES = [
|
|
|
19
36
|
"zhipu",
|
|
20
37
|
"minimax",
|
|
21
38
|
"baichuan",
|
|
22
|
-
|
|
39
|
+
];
|
|
40
|
+
/**
|
|
41
|
+
* Provider names available for user selection in UI/CLI.
|
|
42
|
+
* Excludes providers without active API access.
|
|
43
|
+
*/
|
|
44
|
+
exports.SELECTABLE_PROVIDER_NAMES = [
|
|
45
|
+
"openai",
|
|
46
|
+
"anthropic",
|
|
47
|
+
"google",
|
|
48
|
+
"mistral",
|
|
49
|
+
"cohere",
|
|
50
|
+
"deepseek",
|
|
51
|
+
"moonshot",
|
|
52
|
+
"zhipu",
|
|
53
|
+
"minimax",
|
|
54
|
+
"baichuan",
|
|
23
55
|
];
|
|
24
56
|
const PROVIDER_PATTERNS = [
|
|
25
57
|
{
|
|
@@ -50,7 +82,7 @@ const PROVIDER_PATTERNS = [
|
|
|
50
82
|
},
|
|
51
83
|
{
|
|
52
84
|
name: "moonshot",
|
|
53
|
-
hostPatterns: [/^api\.moonshot\.cn$/],
|
|
85
|
+
hostPatterns: [/^api\.moonshot\.ai$/, /^api\.moonshot\.cn$/],
|
|
54
86
|
},
|
|
55
87
|
{
|
|
56
88
|
name: "zhipu",
|
|
@@ -58,16 +90,12 @@ const PROVIDER_PATTERNS = [
|
|
|
58
90
|
},
|
|
59
91
|
{
|
|
60
92
|
name: "minimax",
|
|
61
|
-
hostPatterns: [/^api\.minimax\.chat$/],
|
|
93
|
+
hostPatterns: [/^api\.minimax\.io$/, /^api\.minimax\.chat$/],
|
|
62
94
|
},
|
|
63
95
|
{
|
|
64
96
|
name: "baichuan",
|
|
65
97
|
hostPatterns: [/^api\.baichuan-ai\.com$/],
|
|
66
98
|
},
|
|
67
|
-
{
|
|
68
|
-
name: "yi",
|
|
69
|
-
hostPatterns: [/^api\.lingyiwanwu\.com$/],
|
|
70
|
-
},
|
|
71
99
|
];
|
|
72
100
|
function detectProvider(url) {
|
|
73
101
|
let hostname = "";
|
|
@@ -123,26 +151,76 @@ function detectProviderByHostname(url) {
|
|
|
123
151
|
return "unknown";
|
|
124
152
|
}
|
|
125
153
|
function getProviderBaseUrl(provider) {
|
|
154
|
+
// Base URLs include version path so users can set OPENAI_BASE_URL=http://localhost:4000/openai
|
|
155
|
+
// and the SDK will send /openai/chat/completions which becomes /v1/chat/completions
|
|
156
|
+
const urls = {
|
|
157
|
+
openai: "https://api.openai.com/v1",
|
|
158
|
+
anthropic: "https://api.anthropic.com/v1",
|
|
159
|
+
google: "https://generativelanguage.googleapis.com/v1beta/openai",
|
|
160
|
+
mistral: "https://api.mistral.ai/v1",
|
|
161
|
+
cohere: "https://api.cohere.com/v2",
|
|
162
|
+
deepseek: "https://api.deepseek.com/v1",
|
|
163
|
+
moonshot: "https://api.moonshot.ai/v1",
|
|
164
|
+
zhipu: "https://api.z.ai/api/paas/v4",
|
|
165
|
+
minimax: "https://api.minimax.io/v1",
|
|
166
|
+
baichuan: "https://api.baichuan-ai.com/v1",
|
|
167
|
+
};
|
|
168
|
+
return urls[provider] ?? null;
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Returns the root API URL for a provider for path-based routing.
|
|
172
|
+
* Includes version prefix where needed (e.g., /v1beta for Google).
|
|
173
|
+
* Used for providers that need path-based routing (e.g., Google's native API).
|
|
174
|
+
*/
|
|
175
|
+
function getProviderRootUrl(provider) {
|
|
126
176
|
const urls = {
|
|
127
177
|
openai: "https://api.openai.com",
|
|
128
178
|
anthropic: "https://api.anthropic.com",
|
|
129
|
-
google: "https://generativelanguage.googleapis.com",
|
|
179
|
+
google: "https://generativelanguage.googleapis.com/v1beta", // Include version prefix
|
|
130
180
|
mistral: "https://api.mistral.ai",
|
|
131
181
|
cohere: "https://api.cohere.com",
|
|
132
182
|
deepseek: "https://api.deepseek.com",
|
|
133
|
-
moonshot: "https://api.moonshot.
|
|
134
|
-
zhipu: "https://
|
|
135
|
-
minimax: "https://api.minimax.
|
|
183
|
+
moonshot: "https://api.moonshot.ai",
|
|
184
|
+
zhipu: "https://api.z.ai",
|
|
185
|
+
minimax: "https://api.minimax.io",
|
|
136
186
|
baichuan: "https://api.baichuan-ai.com",
|
|
137
|
-
yi: "https://api.lingyiwanwu.com",
|
|
138
187
|
};
|
|
139
188
|
return urls[provider] ?? null;
|
|
140
189
|
}
|
|
190
|
+
/**
|
|
191
|
+
* Check if a provider uses path-based routing (client provides the full path).
|
|
192
|
+
* These providers expect the trailing path to be preserved, not replaced with a fixed endpoint.
|
|
193
|
+
*/
|
|
194
|
+
function providerUsesPathRouting(provider) {
|
|
195
|
+
// Google's native API uses paths like /v1beta/models/{model}:generateContent
|
|
196
|
+
return provider === "google";
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Returns the complete chat endpoint URL for a provider.
|
|
200
|
+
* This is the full URL including path - no additional path construction needed.
|
|
201
|
+
* Returns null for unknown providers.
|
|
202
|
+
*/
|
|
203
|
+
function getProviderChatEndpoint(provider) {
|
|
204
|
+
const endpoints = {
|
|
205
|
+
openai: "https://api.openai.com/v1/chat/completions",
|
|
206
|
+
anthropic: "https://api.anthropic.com/v1/messages",
|
|
207
|
+
google: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
|
208
|
+
mistral: "https://api.mistral.ai/v1/chat/completions",
|
|
209
|
+
cohere: "https://api.cohere.com/v2/chat",
|
|
210
|
+
deepseek: "https://api.deepseek.com/v1/chat/completions",
|
|
211
|
+
moonshot: "https://api.moonshot.ai/v1/chat/completions",
|
|
212
|
+
zhipu: "https://api.z.ai/api/paas/v4/chat/completions",
|
|
213
|
+
minimax: "https://api.minimax.io/v1/text/chatcompletion_v2",
|
|
214
|
+
baichuan: "https://api.baichuan-ai.com/v1/chat/completions",
|
|
215
|
+
};
|
|
216
|
+
return endpoints[provider] ?? null;
|
|
217
|
+
}
|
|
141
218
|
/**
|
|
142
219
|
* Returns the auth header name and value for a given provider.
|
|
143
220
|
* Different providers use different header conventions.
|
|
221
|
+
* @param useNativeApi - For Google, whether to use native API auth (x-goog-api-key) vs OpenAI-compatible (Bearer)
|
|
144
222
|
*/
|
|
145
|
-
function getProviderAuthHeader(provider, apiKey) {
|
|
223
|
+
function getProviderAuthHeader(provider, apiKey, useNativeApi = false) {
|
|
146
224
|
switch (provider) {
|
|
147
225
|
case "openai":
|
|
148
226
|
case "mistral":
|
|
@@ -152,12 +230,15 @@ function getProviderAuthHeader(provider, apiKey) {
|
|
|
152
230
|
case "zhipu":
|
|
153
231
|
case "minimax":
|
|
154
232
|
case "baichuan":
|
|
155
|
-
case "yi":
|
|
156
233
|
return { name: "authorization", value: `Bearer ${apiKey}` };
|
|
157
234
|
case "anthropic":
|
|
158
235
|
return { name: "x-api-key", value: apiKey };
|
|
159
236
|
case "google":
|
|
160
|
-
|
|
237
|
+
// Google native API uses x-goog-api-key, OpenAI-compatible uses Bearer
|
|
238
|
+
if (useNativeApi) {
|
|
239
|
+
return { name: "x-goog-api-key", value: apiKey };
|
|
240
|
+
}
|
|
241
|
+
return { name: "authorization", value: `Bearer ${apiKey}` };
|
|
161
242
|
default:
|
|
162
243
|
return null;
|
|
163
244
|
}
|
|
@@ -174,10 +255,29 @@ function parsePathPrefix(path) {
|
|
|
174
255
|
const segment = match[1].toLowerCase();
|
|
175
256
|
const rest = match[2] ?? "/";
|
|
176
257
|
if (exports.KNOWN_PROVIDER_NAMES.includes(segment)) {
|
|
177
|
-
|
|
258
|
+
// Apply path rewriting for non-OpenAI-compatible providers
|
|
259
|
+
const rewrittenPath = rewriteProviderPath(segment, rest);
|
|
260
|
+
return { provider: segment, remainingPath: rewrittenPath };
|
|
178
261
|
}
|
|
179
262
|
return null;
|
|
180
263
|
}
|
|
264
|
+
/**
|
|
265
|
+
* Rewrite OpenAI-compatible paths to provider-specific paths.
|
|
266
|
+
* Some Chinese providers use different endpoint structures.
|
|
267
|
+
*/
|
|
268
|
+
function rewriteProviderPath(provider, path) {
|
|
269
|
+
switch (provider) {
|
|
270
|
+
case "minimax":
|
|
271
|
+
// MiniMax uses /v1/text/chatcompletion_v2 instead of /v1/chat/completions
|
|
272
|
+
if (path === "/v1/chat/completions" || path.startsWith("/v1/chat/completions?")) {
|
|
273
|
+
return path.replace("/v1/chat/completions", "/v1/text/chatcompletion_v2");
|
|
274
|
+
}
|
|
275
|
+
break;
|
|
276
|
+
// Zhipu base URL already includes /api/paas, so /v4/chat/completions works directly
|
|
277
|
+
// Moonshot and Baichuan use standard OpenAI-compatible paths
|
|
278
|
+
}
|
|
279
|
+
return path;
|
|
280
|
+
}
|
|
181
281
|
/**
|
|
182
282
|
* Parse a URL path to extract an agent ID from /agents/{id}/... format.
|
|
183
283
|
* Given "/agents/my-bot/openai/v1/chat/completions" returns:
|
package/dist/providers.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"providers.js","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"providers.js","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":";;;AA6GA,wCA4BC;AAMD,4DAmBC;AAED,gDAgBC;AAOD,gDAcC;AAMD,0DAGC;AAOD,0DAcC;AAOD,sDA0BC;AAOD,0CAaC;AAMD,kDAYC;AAQD,wCAQC;AA3SD,0EAA0E;AAC7D,QAAA,sBAAsB,GAA2B;IAC5D,MAAM,EAAE,gBAAgB;IACxB,SAAS,EAAE,oBAAoB;IAC/B,MAAM,EAAE,iBAAiB;IACzB,OAAO,EAAE,SAAS;IAClB,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,UAAU;IACpB,QAAQ,EAAE,iBAAiB;IAC3B,KAAK,EAAE,eAAe;IACtB,OAAO,EAAE,gBAAgB;IACzB,QAAQ,EAAE,UAAU;CACrB,CAAC;AAEF,6EAA6E;AAChE,QAAA,oBAAoB,GAAmB;IAClD,QAAQ;IACR,WAAW;IACX,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,UAAU;IACV,UAAU;IACV,OAAO;IACP,SAAS;IACT,UAAU;CACX,CAAC;AAEF;;;GAGG;AACU,QAAA,yBAAyB,GAAmB;IACvD,QAAQ;IACR,WAAW;IACX,QAAQ;IACR,SAAS;IACT,QAAQ;IACR,UAAU;IACV,UAAU;IACV,OAAO;IACP,SAAS;IACT,UAAU;CACX,CAAC;AAEF,MAAM,iBAAiB,GAAsB;IAC3C;QACE,IAAI,EAAE,QAAQ;QACd,YAAY,EAAE,CAAC,oBAAoB,CAAC;QACpC,YAAY,EAAE,CAAC,yBAAyB,EAAE,mBAAmB,CAAC;KAC/D;IACD;QACE,IAAI,EAAE,WAAW;QACjB,YAAY,EAAE,CAAC,uBAAuB,CAAC;QACvC,YAAY,EAAE,CAAC,gBAAgB,CAAC;KACjC;IACD;QACE,IAAI,EAAE,QAAQ;QACd,YAAY,EAAE,CAAC,uCAAuC,CAAC;KACxD;IACD;QACE,IAAI,EAAE,SAAS;QACf,YAAY,EAAE,CAAC,oBAAoB,CAAC;KACrC;IACD;QACE,IAAI,EAAE,QAAQ;QACd,YAAY,EAAE,CAAC,oBAAoB,EAAE,mBAAmB,CAAC;KAC1D;IACD;QACE,IAAI,EAAE,UAAU;QAChB,YAAY,EAAE,CAAC,sBAAsB,CAAC;KACvC;IACD;QACE,IAAI,EAAE,UAAU;QAChB,YAAY,EAAE,CAAC,qBAAqB,EAAE,qBAAqB,CAAC;KAC7D;IACD;QACE,IAAI,EAAE,OAAO;QACb,YAAY,EAAE,CAAC,sBAAsB,EAAE,cAAc,CAAC;KACvD;IACD;QACE,IAAI,EAAE,SAAS;QACf,YAAY,EAAE,CAAC,oBAAoB,EAAE,sBAAsB,CAAC;KAC7D;IACD;QACE,IAAI,EAAE,UAAU;QAChB,YAAY,EAAE,CAAC,yBAAyB,CAAC;KAC1C;CACF,CAAC;AAEF,SAAgB,cAAc,CAAC,GAAW;IACxC,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,QAAQ,GAAG,GAAG,CAAC;IACnB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC3B,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,6DAA6D;IAC/D,CAAC;IAED,KAAK,MAAM,QAAQ,IAAI,iBAAiB,EAAE,CAAC;QACzC,IAAI,QAAQ,EAAE,CAAC;YACb,KAAK,MAAM,WAAW,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;gBAChD,IAAI,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC/B,OAAO,QAAQ,CAAC,IAAI,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;QACD,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;YAC1B,KAAK,MAAM,WAAW,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;gBAChD,IAAI,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC/B,OAAO,QAAQ,CAAC,IAAI,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;GAGG;AACH,SAAgB,wBAAwB,CAAC,GAAW;IAClD,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,IAAI,CAAC,QAAQ;QAAE,OAAO,SAAS,CAAC;IAEhC,KAAK,MAAM,QAAQ,IAAI,iBAAiB,EAAE,CAAC;QACzC,KAAK,MAAM,WAAW,IAAI,QAAQ,CAAC,YAAY,EAAE,CAAC;YAChD,IAAI,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC/B,OAAO,QAAQ,CAAC,IAAI,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,SAAgB,kBAAkB,CAAC,QAAsB;IACvD,+FAA+F;IAC/F,oFAAoF;IACpF,MAAM,IAAI,GAA2B;QACnC,MAAM,EAAE,2BAA2B;QACnC,SAAS,EAAE,8BAA8B;QACzC,MAAM,EAAE,yDAAyD;QACjE,OAAO,EAAE,2BAA2B;QACpC,MAAM,EAAE,2BAA2B;QACnC,QAAQ,EAAE,6BAA6B;QACvC,QAAQ,EAAE,4BAA4B;QACtC,KAAK,EAAE,8BAA8B;QACrC,OAAO,EAAE,2BAA2B;QACpC,QAAQ,EAAE,gCAAgC;KAC3C,CAAC;IACF,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;AAChC,CAAC;AAED;;;;GAIG;AACH,SAAgB,kBAAkB,CAAC,QAAsB;IACvD,MAAM,IAAI,GAA2B;QACnC,MAAM,EAAE,wBAAwB;QAChC,SAAS,EAAE,2BAA2B;QACtC,MAAM,EAAE,kDAAkD,EAAG,yBAAyB;QACtF,OAAO,EAAE,wBAAwB;QACjC,MAAM,EAAE,wBAAwB;QAChC,QAAQ,EAAE,0BAA0B;QACpC,QAAQ,EAAE,yBAAyB;QACnC,KAAK,EAAE,kBAAkB;QACzB,OAAO,EAAE,wBAAwB;QACjC,QAAQ,EAAE,6BAA6B;KACxC,CAAC;IACF,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,SAAgB,uBAAuB,CAAC,QAAsB;IAC5D,6EAA6E;IAC7E,OAAO,QAAQ,KAAK,QAAQ,CAAC;AAC/B,CAAC;AAED;;;;GAIG;AACH,SAAgB,uBAAuB,CAAC,QAAsB;IAC5D,MAAM,SAAS,GAA2B;QACxC,MAAM,EAAE,4CAA4C;QACpD,SAAS,EAAE,uCAAuC;QAClD,MAAM,EAAE,0EAA0E;QAClF,OAAO,EAAE,4CAA4C;QACrD,MAAM,EAAE,gCAAgC;QACxC,QAAQ,EAAE,8CAA8C;QACxD,QAAQ,EAAE,6CAA6C;QACvD,KAAK,EAAE,+CAA+C;QACtD,OAAO,EAAE,kDAAkD;QAC3D,QAAQ,EAAE,iDAAiD;KAC5D,CAAC;IACF,OAAO,SAAS,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC;AACrC,CAAC;AAED;;;;GAIG;AACH,SAAgB,qBAAqB,CACnC,QAAsB,EACtB,MAAc,EACd,eAAwB,KAAK;IAE7B,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ,CAAC;QACd,KAAK,SAAS,CAAC;QACf,KAAK,QAAQ,CAAC;QACd,KAAK,UAAU,CAAC;QAChB,KAAK,UAAU,CAAC;QAChB,KAAK,OAAO,CAAC;QACb,KAAK,SAAS,CAAC;QACf,KAAK,UAAU;YACb,OAAO,EAAE,IAAI,EAAE,eAAe,EAAE,KAAK,EAAE,UAAU,MAAM,EAAE,EAAE,CAAC;QAC9D,KAAK,WAAW;YACd,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QAC9C,KAAK,QAAQ;YACX,uEAAuE;YACvE,IAAI,YAAY,EAAE,CAAC;gBACjB,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;YACnD,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,eAAe,EAAE,KAAK,EAAE,UAAU,MAAM,EAAE,EAAE,CAAC;QAC9D;YACE,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,SAAgB,eAAe,CAC7B,IAAY;IAEZ,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;IAC/C,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IACvC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IAC7B,IAAI,4BAAoB,CAAC,QAAQ,CAAC,OAAuB,CAAC,EAAE,CAAC;QAC3D,2DAA2D;QAC3D,MAAM,aAAa,GAAG,mBAAmB,CAAC,OAAuB,EAAE,IAAI,CAAC,CAAC;QACzE,OAAO,EAAE,QAAQ,EAAE,OAAuB,EAAE,aAAa,EAAE,aAAa,EAAE,CAAC;IAC7E,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAgB,mBAAmB,CAAC,QAAsB,EAAE,IAAY;IACtE,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,SAAS;YACZ,0EAA0E;YAC1E,IAAI,IAAI,KAAK,sBAAsB,IAAI,IAAI,CAAC,UAAU,CAAC,uBAAuB,CAAC,EAAE,CAAC;gBAChF,OAAO,IAAI,CAAC,OAAO,CAAC,sBAAsB,EAAE,4BAA4B,CAAC,CAAC;YAC5E,CAAC;YACD,MAAM;QACR,oFAAoF;QACpF,6DAA6D;IAC/D,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAC5B,IAAY;IAEZ,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;IACvD,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACzB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IAC7B,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;AAC1C,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SimHash - Locality Sensitive Hashing for text similarity detection.
|
|
3
|
+
* Used by Google for web page deduplication.
|
|
4
|
+
*
|
|
5
|
+
* Similar texts produce hashes with small Hamming distance.
|
|
6
|
+
* Different texts produce hashes with large Hamming distance.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Compute 64-bit SimHash for text.
|
|
10
|
+
*
|
|
11
|
+
* Algorithm:
|
|
12
|
+
* 1. Tokenize text into n-grams
|
|
13
|
+
* 2. Hash each token to 64-bit value
|
|
14
|
+
* 3. For each bit position, sum +1 if bit is 1, -1 if bit is 0
|
|
15
|
+
* 4. Final hash: bit is 1 if sum > 0, else 0
|
|
16
|
+
*/
|
|
17
|
+
export declare function computeSimHash(text: string): bigint;
|
|
18
|
+
/**
|
|
19
|
+
* Compute Hamming distance between two SimHash values.
|
|
20
|
+
* Returns the number of differing bits.
|
|
21
|
+
*/
|
|
22
|
+
export declare function hammingDistance(a: bigint, b: bigint): number;
|
|
23
|
+
/**
|
|
24
|
+
* Check if two SimHash values are similar.
|
|
25
|
+
* Default threshold of 3 bits difference is commonly used.
|
|
26
|
+
*/
|
|
27
|
+
export declare function isSimilar(a: bigint, b: bigint, threshold?: number): boolean;
|
|
28
|
+
//# sourceMappingURL=simhash.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"simhash.d.ts","sourceRoot":"","sources":["../src/simhash.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAiCH;;;;;;;;GAQG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA8BnD;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAW5D;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,SAAI,GAAG,OAAO,CAEtE"}
|
package/dist/simhash.js
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* SimHash - Locality Sensitive Hashing for text similarity detection.
|
|
4
|
+
* Used by Google for web page deduplication.
|
|
5
|
+
*
|
|
6
|
+
* Similar texts produce hashes with small Hamming distance.
|
|
7
|
+
* Different texts produce hashes with large Hamming distance.
|
|
8
|
+
*/
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.computeSimHash = computeSimHash;
|
|
11
|
+
exports.hammingDistance = hammingDistance;
|
|
12
|
+
exports.isSimilar = isSimilar;
|
|
13
|
+
// 64-bit SimHash using bigint
|
|
14
|
+
const HASH_BITS = 64n;
|
|
15
|
+
/**
|
|
16
|
+
* Simple string hash function (FNV-1a variant)
|
|
17
|
+
*/
|
|
18
|
+
function hashToken(token) {
|
|
19
|
+
let hash = 0xcbf29ce484222325n; // FNV offset basis
|
|
20
|
+
for (let i = 0; i < token.length; i++) {
|
|
21
|
+
hash ^= BigInt(token.charCodeAt(i));
|
|
22
|
+
hash = BigInt.asUintN(64, hash * 0x100000001b3n); // FNV prime
|
|
23
|
+
}
|
|
24
|
+
return hash;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Tokenize text into n-grams (default: 3-grams)
|
|
28
|
+
*/
|
|
29
|
+
function tokenize(text, n = 3) {
|
|
30
|
+
const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
31
|
+
if (normalized.length < n) {
|
|
32
|
+
return [normalized];
|
|
33
|
+
}
|
|
34
|
+
const tokens = [];
|
|
35
|
+
for (let i = 0; i <= normalized.length - n; i++) {
|
|
36
|
+
tokens.push(normalized.slice(i, i + n));
|
|
37
|
+
}
|
|
38
|
+
return tokens;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Compute 64-bit SimHash for text.
|
|
42
|
+
*
|
|
43
|
+
* Algorithm:
|
|
44
|
+
* 1. Tokenize text into n-grams
|
|
45
|
+
* 2. Hash each token to 64-bit value
|
|
46
|
+
* 3. For each bit position, sum +1 if bit is 1, -1 if bit is 0
|
|
47
|
+
* 4. Final hash: bit is 1 if sum > 0, else 0
|
|
48
|
+
*/
|
|
49
|
+
function computeSimHash(text) {
|
|
50
|
+
const tokens = tokenize(text);
|
|
51
|
+
if (tokens.length === 0) {
|
|
52
|
+
return 0n;
|
|
53
|
+
}
|
|
54
|
+
// Vector of 64 counters (one per bit position)
|
|
55
|
+
const vector = new Array(64).fill(0);
|
|
56
|
+
for (const token of tokens) {
|
|
57
|
+
const hash = hashToken(token);
|
|
58
|
+
for (let i = 0; i < 64; i++) {
|
|
59
|
+
// Check if bit i is set
|
|
60
|
+
if ((hash >> BigInt(i)) & 1n) {
|
|
61
|
+
vector[i]++;
|
|
62
|
+
}
|
|
63
|
+
else {
|
|
64
|
+
vector[i]--;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// Build final hash from vector
|
|
69
|
+
let result = 0n;
|
|
70
|
+
for (let i = 0; i < 64; i++) {
|
|
71
|
+
if (vector[i] > 0) {
|
|
72
|
+
result |= 1n << BigInt(i);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return result;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Compute Hamming distance between two SimHash values.
|
|
79
|
+
* Returns the number of differing bits.
|
|
80
|
+
*/
|
|
81
|
+
function hammingDistance(a, b) {
|
|
82
|
+
let xor = a ^ b;
|
|
83
|
+
let count = 0;
|
|
84
|
+
// Count set bits (Brian Kernighan's algorithm)
|
|
85
|
+
while (xor > 0n) {
|
|
86
|
+
xor &= xor - 1n;
|
|
87
|
+
count++;
|
|
88
|
+
}
|
|
89
|
+
return count;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Check if two SimHash values are similar.
|
|
93
|
+
* Default threshold of 3 bits difference is commonly used.
|
|
94
|
+
*/
|
|
95
|
+
function isSimilar(a, b, threshold = 3) {
|
|
96
|
+
return hammingDistance(a, b) <= threshold;
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=simhash.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"simhash.js","sourceRoot":"","sources":["../src/simhash.ts"],"names":[],"mappings":";AAAA;;;;;;GAMG;;AA0CH,wCA8BC;AAMD,0CAWC;AAMD,8BAEC;AA/FD,8BAA8B;AAC9B,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB;;GAEG;AACH,SAAS,SAAS,CAAC,KAAa;IAC9B,IAAI,IAAI,GAAG,mBAAmB,CAAC,CAAC,mBAAmB;IACnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,YAAY;IAChE,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,IAAY,EAAE,CAAC,GAAG,CAAC;IACnC,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IAClE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,UAAU,CAAC,CAAC;IACtB,CAAC;IAED,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;;GAQG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC9B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,+CAA+C;IAC/C,MAAM,MAAM,GAAa,IAAI,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE/C,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,wBAAwB;YACxB,IAAI,CAAC,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC;gBAC7B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;YACd,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,+BAA+B;IAC/B,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAgB,eAAe,CAAC,CAAS,EAAE,CAAS;IAClD,IAAI,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC;IAChB,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,+CAA+C;IAC/C,OAAO,GAAG,GAAG,EAAE,EAAE,CAAC;QAChB,GAAG,IAAI,GAAG,GAAG,EAAE,CAAC;QAChB,KAAK,EAAE,CAAC;IACV,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;GAGG;AACH,SAAgB,SAAS,CAAC,CAAS,EAAE,CAAS,EAAE,SAAS,GAAG,CAAC;IAC3D,OAAO,eAAe,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,SAAS,CAAC;AAC5C,CAAC"}
|