@memograph/cli 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +402 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +97 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/detect.d.ts +30 -0
- package/dist/core/detect.d.ts.map +1 -0
- package/dist/core/detect.js +212 -0
- package/dist/core/detect.js.map +1 -0
- package/dist/core/extract.d.ts +6 -0
- package/dist/core/extract.d.ts.map +1 -0
- package/dist/core/extract.js +104 -0
- package/dist/core/extract.js.map +1 -0
- package/dist/core/inspect.d.ts +7 -0
- package/dist/core/inspect.d.ts.map +1 -0
- package/dist/core/inspect.js +98 -0
- package/dist/core/inspect.js.map +1 -0
- package/dist/core/llm/client.d.ts +55 -0
- package/dist/core/llm/client.d.ts.map +1 -0
- package/dist/core/llm/client.js +199 -0
- package/dist/core/llm/client.js.map +1 -0
- package/dist/core/llm/detect-llm.d.ts +28 -0
- package/dist/core/llm/detect-llm.d.ts.map +1 -0
- package/dist/core/llm/detect-llm.js +212 -0
- package/dist/core/llm/detect-llm.js.map +1 -0
- package/dist/core/llm/extract-llm.d.ts +27 -0
- package/dist/core/llm/extract-llm.d.ts.map +1 -0
- package/dist/core/llm/extract-llm.js +151 -0
- package/dist/core/llm/extract-llm.js.map +1 -0
- package/dist/core/llm/prompts.d.ts +28 -0
- package/dist/core/llm/prompts.d.ts.map +1 -0
- package/dist/core/llm/prompts.js +172 -0
- package/dist/core/llm/prompts.js.map +1 -0
- package/dist/core/llm/providers.d.ts +34 -0
- package/dist/core/llm/providers.d.ts.map +1 -0
- package/dist/core/llm/providers.js +169 -0
- package/dist/core/llm/providers.js.map +1 -0
- package/dist/core/load.d.ts +10 -0
- package/dist/core/load.d.ts.map +1 -0
- package/dist/core/load.js +106 -0
- package/dist/core/load.js.map +1 -0
- package/dist/core/normalize.d.ts +30 -0
- package/dist/core/normalize.d.ts.map +1 -0
- package/dist/core/normalize.js +63 -0
- package/dist/core/normalize.js.map +1 -0
- package/dist/core/render.d.ts +10 -0
- package/dist/core/render.d.ts.map +1 -0
- package/dist/core/render.js +60 -0
- package/dist/core/render.js.map +1 -0
- package/dist/core/score.d.ts +27 -0
- package/dist/core/score.d.ts.map +1 -0
- package/dist/core/score.js +59 -0
- package/dist/core/score.js.map +1 -0
- package/dist/core/types.d.ts +162 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +6 -0
- package/dist/core/types.js.map +1 -0
- package/dist/interactive/index.d.ts +67 -0
- package/dist/interactive/index.d.ts.map +1 -0
- package/dist/interactive/index.js +794 -0
- package/dist/interactive/index.js.map +1 -0
- package/dist/interactive/settings.d.ts +36 -0
- package/dist/interactive/settings.d.ts.map +1 -0
- package/dist/interactive/settings.js +174 -0
- package/dist/interactive/settings.js.map +1 -0
- package/dist/interactive/wizard.d.ts +10 -0
- package/dist/interactive/wizard.d.ts.map +1 -0
- package/dist/interactive/wizard.js +249 -0
- package/dist/interactive/wizard.js.map +1 -0
- package/package.json +49 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt templates for LLM-based analysis
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* System prompt for fact extraction
|
|
6
|
+
*/
|
|
7
|
+
export declare const FACT_EXTRACTION_SYSTEM = "You are an expert at extracting important facts and preferences from conversation transcripts.\nYour task is to identify and extract:\n1. Identity information (name, role, etc.)\n2. User preferences (language, tone, format, etc.)\n3. Context information (goals, constraints, background)\n4. Any other facts that should be remembered\n\nFor each fact, provide:\n- A key (category:attribute format, e.g., \"identity:name\", \"pref:language\")\n- The value\n- Confidence score (0.0-1.0)\n- A brief justification\n\nRespond ONLY with valid JSON in the following format:\n{\n \"facts\": [\n {\n \"fact_key\": \"category:attribute\",\n \"fact_value\": \"value\",\n \"confidence\": 0.9,\n \"justification\": \"brief explanation\"\n }\n ]\n}";
|
|
8
|
+
/**
|
|
9
|
+
* User prompt for fact extraction
|
|
10
|
+
*/
|
|
11
|
+
export declare function createFactExtractionPrompt(messages: string[]): string;
|
|
12
|
+
/**
|
|
13
|
+
* User prompt for fact extraction from raw text
|
|
14
|
+
*/
|
|
15
|
+
export declare function createFactExtractionPromptFromRaw(rawText: string): string;
|
|
16
|
+
/**
|
|
17
|
+
* System prompt for drift detection
|
|
18
|
+
*/
|
|
19
|
+
export declare const DRIFT_DETECTION_SYSTEM = "You are an expert at analyzing conversation transcripts for memory drift issues.\nMemory drift occurs when an AI assistant forgets or loses track of important information.\n\nYour task is to detect:\n1. Repetition clusters: User repeating similar requests\n2. Session reset: Assistant indicating starting over or forgetting\n3. Preference forgotten: User restating preferences that should have been remembered\n4. Contradictions: Conflicting information about the same fact\n5. Context loss: Assistant losing important contextual information\n6. Inconsistent behavior: Assistant acting inconsistently with stated preferences\n\nFor each detected event, provide:\n- Event type\n- Severity (1-5, higher is worse)\n- Confidence (0.0-1.0)\n- Message indices involved\n- Evidence/snippets\n- Human-readable summary\n\nAdditionally, calculate an overall drift_score (0-100):\n- 0-20: Excellent memory retention, no issues\n- 21-40: Minor drift, occasional repetition\n- 41-60: Moderate drift, noticeable issues\n- 61-80: Significant drift, frequent problems\n- 81-100: Severe drift, major memory failures\n\nRespond ONLY with valid JSON in the following format:\n{\n \"drift_score\": 0-100,\n \"events\": [\n {\n \"type\": \"repetition_cluster|session_reset|preference_forgotten|contradiction|context_loss|inconsistent_behavior\",\n \"severity\": 1-5,\n \"confidence\": 0.0-1.0,\n \"msg_idxs\": [0, 1, 2],\n \"snippets\": [\"text snippets\"],\n \"summary\": \"human-readable summary\",\n \"details\": {} // additional details specific to event type\n }\n ]\n}";
|
|
20
|
+
/**
|
|
21
|
+
* User prompt for drift detection
|
|
22
|
+
*/
|
|
23
|
+
export declare function createDriftDetectionPrompt(messages: string[], facts: string): string;
|
|
24
|
+
/**
|
|
25
|
+
* User prompt for drift detection from raw text
|
|
26
|
+
*/
|
|
27
|
+
export declare function createDriftDetectionPromptFromRaw(rawText: string, facts: string): string;
|
|
28
|
+
//# sourceMappingURL=prompts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../src/core/llm/prompts.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,eAAO,MAAM,sBAAsB,8vBAuBjC,CAAC;AAEH;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,MAAM,CAgBrE;AAED;;GAEG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAezE;AAED;;GAEG;AACH,eAAO,MAAM,sBAAsB,kkDAwCjC,CAAC;AAEH;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAqBpF;AAED;;GAEG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAsBxF"}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Prompt templates for LLM-based analysis
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.DRIFT_DETECTION_SYSTEM = exports.FACT_EXTRACTION_SYSTEM = void 0;
|
|
7
|
+
exports.createFactExtractionPrompt = createFactExtractionPrompt;
|
|
8
|
+
exports.createFactExtractionPromptFromRaw = createFactExtractionPromptFromRaw;
|
|
9
|
+
exports.createDriftDetectionPrompt = createDriftDetectionPrompt;
|
|
10
|
+
exports.createDriftDetectionPromptFromRaw = createDriftDetectionPromptFromRaw;
|
|
11
|
+
/**
|
|
12
|
+
* System prompt for fact extraction
|
|
13
|
+
*/
|
|
14
|
+
exports.FACT_EXTRACTION_SYSTEM = `You are an expert at extracting important facts and preferences from conversation transcripts.
|
|
15
|
+
Your task is to identify and extract:
|
|
16
|
+
1. Identity information (name, role, etc.)
|
|
17
|
+
2. User preferences (language, tone, format, etc.)
|
|
18
|
+
3. Context information (goals, constraints, background)
|
|
19
|
+
4. Any other facts that should be remembered
|
|
20
|
+
|
|
21
|
+
For each fact, provide:
|
|
22
|
+
- A key (category:attribute format, e.g., "identity:name", "pref:language")
|
|
23
|
+
- The value
|
|
24
|
+
- Confidence score (0.0-1.0)
|
|
25
|
+
- A brief justification
|
|
26
|
+
|
|
27
|
+
Respond ONLY with valid JSON in the following format:
|
|
28
|
+
{
|
|
29
|
+
"facts": [
|
|
30
|
+
{
|
|
31
|
+
"fact_key": "category:attribute",
|
|
32
|
+
"fact_value": "value",
|
|
33
|
+
"confidence": 0.9,
|
|
34
|
+
"justification": "brief explanation"
|
|
35
|
+
}
|
|
36
|
+
]
|
|
37
|
+
}`;
|
|
38
|
+
/**
|
|
39
|
+
* User prompt for fact extraction
|
|
40
|
+
*/
|
|
41
|
+
function createFactExtractionPrompt(messages) {
|
|
42
|
+
const transcript = messages.join('\n---\n');
|
|
43
|
+
return `Extract all important facts and preferences from the following conversation transcript.
|
|
44
|
+
|
|
45
|
+
Only extract from USER messages. Ignore assistant messages for fact extraction.
|
|
46
|
+
|
|
47
|
+
Focus on:
|
|
48
|
+
- Identity (name, role, etc.)
|
|
49
|
+
- Preferences (language, tone, format, style)
|
|
50
|
+
- Context (goals, constraints, requirements)
|
|
51
|
+
- Important facts that should be remembered
|
|
52
|
+
|
|
53
|
+
Transcript:
|
|
54
|
+
${transcript}
|
|
55
|
+
|
|
56
|
+
Respond with JSON containing the facts array.`;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* User prompt for fact extraction from raw text
|
|
60
|
+
*/
|
|
61
|
+
function createFactExtractionPromptFromRaw(rawText) {
|
|
62
|
+
return `Extract all important facts and preferences from the following transcript text.
|
|
63
|
+
|
|
64
|
+
The content may not be structured as messages. Infer user facts and preferences from the text.
|
|
65
|
+
|
|
66
|
+
Focus on:
|
|
67
|
+
- Identity (name, role, etc.)
|
|
68
|
+
- Preferences (language, tone, format, style)
|
|
69
|
+
- Context (goals, constraints, requirements)
|
|
70
|
+
- Important facts that should be remembered
|
|
71
|
+
|
|
72
|
+
Transcript text:
|
|
73
|
+
${rawText}
|
|
74
|
+
|
|
75
|
+
Respond with JSON containing the facts array.`;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* System prompt for drift detection
|
|
79
|
+
*/
|
|
80
|
+
exports.DRIFT_DETECTION_SYSTEM = `You are an expert at analyzing conversation transcripts for memory drift issues.
|
|
81
|
+
Memory drift occurs when an AI assistant forgets or loses track of important information.
|
|
82
|
+
|
|
83
|
+
Your task is to detect:
|
|
84
|
+
1. Repetition clusters: User repeating similar requests
|
|
85
|
+
2. Session reset: Assistant indicating starting over or forgetting
|
|
86
|
+
3. Preference forgotten: User restating preferences that should have been remembered
|
|
87
|
+
4. Contradictions: Conflicting information about the same fact
|
|
88
|
+
5. Context loss: Assistant losing important contextual information
|
|
89
|
+
6. Inconsistent behavior: Assistant acting inconsistently with stated preferences
|
|
90
|
+
|
|
91
|
+
For each detected event, provide:
|
|
92
|
+
- Event type
|
|
93
|
+
- Severity (1-5, higher is worse)
|
|
94
|
+
- Confidence (0.0-1.0)
|
|
95
|
+
- Message indices involved
|
|
96
|
+
- Evidence/snippets
|
|
97
|
+
- Human-readable summary
|
|
98
|
+
|
|
99
|
+
Additionally, calculate an overall drift_score (0-100):
|
|
100
|
+
- 0-20: Excellent memory retention, no issues
|
|
101
|
+
- 21-40: Minor drift, occasional repetition
|
|
102
|
+
- 41-60: Moderate drift, noticeable issues
|
|
103
|
+
- 61-80: Significant drift, frequent problems
|
|
104
|
+
- 81-100: Severe drift, major memory failures
|
|
105
|
+
|
|
106
|
+
Respond ONLY with valid JSON in the following format:
|
|
107
|
+
{
|
|
108
|
+
"drift_score": 0-100,
|
|
109
|
+
"events": [
|
|
110
|
+
{
|
|
111
|
+
"type": "repetition_cluster|session_reset|preference_forgotten|contradiction|context_loss|inconsistent_behavior",
|
|
112
|
+
"severity": 1-5,
|
|
113
|
+
"confidence": 0.0-1.0,
|
|
114
|
+
"msg_idxs": [0, 1, 2],
|
|
115
|
+
"snippets": ["text snippets"],
|
|
116
|
+
"summary": "human-readable summary",
|
|
117
|
+
"details": {} // additional details specific to event type
|
|
118
|
+
}
|
|
119
|
+
]
|
|
120
|
+
}`;
|
|
121
|
+
/**
|
|
122
|
+
* User prompt for drift detection
|
|
123
|
+
*/
|
|
124
|
+
function createDriftDetectionPrompt(messages, facts) {
|
|
125
|
+
const transcript = messages.join('\n---\n');
|
|
126
|
+
return `Analyze the following conversation transcript for memory drift events.
|
|
127
|
+
|
|
128
|
+
Transcript (each message is numbered starting from 0):
|
|
129
|
+
${transcript}
|
|
130
|
+
|
|
131
|
+
Important facts extracted from the conversation:
|
|
132
|
+
${facts || "No facts extracted"}
|
|
133
|
+
|
|
134
|
+
Look for:
|
|
135
|
+
1. User repeating similar requests (repetition_cluster)
|
|
136
|
+
2. Assistant indicating starting over or forgetting (session_reset)
|
|
137
|
+
3. User restating preferences that were previously stated (preference_forgotten)
|
|
138
|
+
4. Contradictions in information (contradiction)
|
|
139
|
+
5. Assistant losing important context (context_loss)
|
|
140
|
+
6. Assistant acting inconsistently with stated preferences (inconsistent_behavior)
|
|
141
|
+
|
|
142
|
+
Provide message indices (0-based) for each event.
|
|
143
|
+
|
|
144
|
+
Respond with JSON containing the events array.`;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* User prompt for drift detection from raw text
|
|
148
|
+
*/
|
|
149
|
+
function createDriftDetectionPromptFromRaw(rawText, facts) {
|
|
150
|
+
return `Analyze the following transcript text for memory drift events.
|
|
151
|
+
|
|
152
|
+
The content may not be structured as messages. Use best judgment to identify drift patterns.
|
|
153
|
+
|
|
154
|
+
Transcript text:
|
|
155
|
+
${rawText}
|
|
156
|
+
|
|
157
|
+
Important facts extracted from the transcript:
|
|
158
|
+
${facts || "No facts extracted"}
|
|
159
|
+
|
|
160
|
+
Look for:
|
|
161
|
+
1. User repeating similar requests (repetition_cluster)
|
|
162
|
+
2. Assistant indicating starting over or forgetting (session_reset)
|
|
163
|
+
3. User restating preferences that were previously stated (preference_forgotten)
|
|
164
|
+
4. Contradictions in information (contradiction)
|
|
165
|
+
5. Assistant losing important context (context_loss)
|
|
166
|
+
6. Assistant acting inconsistently with stated preferences (inconsistent_behavior)
|
|
167
|
+
|
|
168
|
+
If message indices are unknown, set msg_idxs to an empty array.
|
|
169
|
+
|
|
170
|
+
Respond with JSON containing the events array.`;
|
|
171
|
+
}
|
|
172
|
+
//# sourceMappingURL=prompts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompts.js","sourceRoot":"","sources":["../../../src/core/llm/prompts.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAiCH,gEAgBC;AAKD,8EAeC;AAkDD,gEAqBC;AAKD,8EAsBC;AArKD;;GAEG;AACU,QAAA,sBAAsB,GAAG;;;;;;;;;;;;;;;;;;;;;;;EAuBpC,CAAC;AAEH;;GAEG;AACH,SAAgB,0BAA0B,CAAC,QAAkB;IAC3D,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC5C,OAAO;;;;;;;;;;;EAWP,UAAU;;8CAEkC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,SAAgB,iCAAiC,CAAC,OAAe;IAC/D,OAAO;;;;;;;;;;;EAWP,OAAO;;8CAEqC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACU,QAAA,sBAAsB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAwCpC,CAAC;AAEH;;GAEG;AACH,SAAgB,0BAA0B,CAAC,QAAkB,EAAE,KAAa;IAC1E,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC5C,OAAO;;;EAGP,UAAU;;;EAGV,KAAK,IAAI,oBAAoB;;;;;;;;;;;;+CAYgB,CAAC;AAChD,CAAC;AAED;;GAEG;AACH,SAAgB,iCAAiC,CAAC,OAAe,EAAE,KAAa;IAC9E,OAAO;;;;;EAKP,OAAO;;;EAGP,KAAK,IAAI,oBAAoB;;;;;;;;;;;;+CAYgB,CAAC;AAChD,CAAC"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Provider Registry
|
|
3
|
+
* Maps all supported providers with their configurations
|
|
4
|
+
*/
|
|
5
|
+
export type LLMProvider = "openai" | "anthropic" | "gemini" | "mistral" | "cohere" | "xai" | "perplexity" | "openrouter" | "together" | "groq" | "fireworks" | "deepseek" | "ollama" | "lmstudio" | "vllm" | "localai" | "openai_compatible";
|
|
6
|
+
export interface ProviderInfo {
|
|
7
|
+
label: string;
|
|
8
|
+
category: "cloud" | "aggregator" | "local";
|
|
9
|
+
defaultBaseUrl?: string;
|
|
10
|
+
needsApiKey: boolean;
|
|
11
|
+
modelPresets?: string[];
|
|
12
|
+
openAICompatible?: boolean;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Provider catalog with all supported LLM providers
|
|
16
|
+
*/
|
|
17
|
+
export declare const PROVIDERS: Record<LLMProvider, ProviderInfo>;
|
|
18
|
+
/**
|
|
19
|
+
* Get provider info by provider key
|
|
20
|
+
*/
|
|
21
|
+
export declare function getProviderInfo(provider: LLMProvider): ProviderInfo;
|
|
22
|
+
/**
|
|
23
|
+
* Check if provider is OpenAI-compatible
|
|
24
|
+
*/
|
|
25
|
+
export declare function isOpenAICompatible(provider: LLMProvider): boolean;
|
|
26
|
+
/**
|
|
27
|
+
* Get list of providers by category
|
|
28
|
+
*/
|
|
29
|
+
export declare function getProvidersByCategory(category: "cloud" | "aggregator" | "local"): LLMProvider[];
|
|
30
|
+
/**
|
|
31
|
+
* Get all provider labels for display
|
|
32
|
+
*/
|
|
33
|
+
export declare function getAllProviderLabels(): Record<LLMProvider, string>;
|
|
34
|
+
//# sourceMappingURL=providers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/core/llm/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,WAAW,GACnB,QAAQ,GACR,WAAW,GACX,QAAQ,GACR,SAAS,GACT,QAAQ,GACR,KAAK,GACL,YAAY,GACZ,YAAY,GACZ,UAAU,GACV,MAAM,GACN,WAAW,GACX,UAAU,GACV,QAAQ,GACR,UAAU,GACV,MAAM,GACN,SAAS,GACT,mBAAmB,CAAC;AAExB,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,GAAG,YAAY,GAAG,OAAO,CAAC;IAC3C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,OAAO,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,eAAO,MAAM,SAAS,EAAE,MAAM,CAAC,WAAW,EAAE,YAAY,CA+IvD,CAAC;AAEF;;GAEG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,WAAW,GAAG,YAAY,CAEnE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAEjE;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,OAAO,GAAG,YAAY,GAAG,OAAO,GAAG,WAAW,EAAE,CAIhG;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,CAIlE"}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* LLM Provider Registry
|
|
4
|
+
* Maps all supported providers with their configurations
|
|
5
|
+
*/
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.PROVIDERS = void 0;
|
|
8
|
+
exports.getProviderInfo = getProviderInfo;
|
|
9
|
+
exports.isOpenAICompatible = isOpenAICompatible;
|
|
10
|
+
exports.getProvidersByCategory = getProvidersByCategory;
|
|
11
|
+
exports.getAllProviderLabels = getAllProviderLabels;
|
|
12
|
+
/**
|
|
13
|
+
* Provider catalog with all supported LLM providers
|
|
14
|
+
*/
|
|
15
|
+
exports.PROVIDERS = {
|
|
16
|
+
// Cloud Providers (Direct)
|
|
17
|
+
openai: {
|
|
18
|
+
label: "OpenAI",
|
|
19
|
+
category: "cloud",
|
|
20
|
+
defaultBaseUrl: "https://api.openai.com/v1",
|
|
21
|
+
needsApiKey: true,
|
|
22
|
+
openAICompatible: true,
|
|
23
|
+
modelPresets: ["gpt-4o-mini", "gpt-4o", "gpt-4.1-mini"],
|
|
24
|
+
},
|
|
25
|
+
anthropic: {
|
|
26
|
+
label: "Anthropic (Claude)",
|
|
27
|
+
category: "cloud",
|
|
28
|
+
defaultBaseUrl: "https://api.anthropic.com",
|
|
29
|
+
needsApiKey: true,
|
|
30
|
+
modelPresets: ["claude-3-5-sonnet", "claude-3-5-haiku"],
|
|
31
|
+
},
|
|
32
|
+
gemini: {
|
|
33
|
+
label: "Google Gemini",
|
|
34
|
+
category: "cloud",
|
|
35
|
+
defaultBaseUrl: "https://generativelanguage.googleapis.com",
|
|
36
|
+
needsApiKey: true,
|
|
37
|
+
modelPresets: ["gemini-1.5-flash", "gemini-1.5-pro"],
|
|
38
|
+
},
|
|
39
|
+
mistral: {
|
|
40
|
+
label: "Mistral",
|
|
41
|
+
category: "cloud",
|
|
42
|
+
defaultBaseUrl: "https://api.mistral.ai/v1",
|
|
43
|
+
needsApiKey: true,
|
|
44
|
+
openAICompatible: true,
|
|
45
|
+
modelPresets: ["mistral-small", "mistral-large"],
|
|
46
|
+
},
|
|
47
|
+
cohere: {
|
|
48
|
+
label: "Cohere",
|
|
49
|
+
category: "cloud",
|
|
50
|
+
defaultBaseUrl: "https://api.cohere.com",
|
|
51
|
+
needsApiKey: true,
|
|
52
|
+
modelPresets: ["command-r", "command-r-plus"],
|
|
53
|
+
},
|
|
54
|
+
xai: {
|
|
55
|
+
label: "xAI (Grok)",
|
|
56
|
+
category: "cloud",
|
|
57
|
+
defaultBaseUrl: "https://api.x.ai/v1",
|
|
58
|
+
needsApiKey: true,
|
|
59
|
+
openAICompatible: true,
|
|
60
|
+
modelPresets: ["grok-2", "grok-2-mini"],
|
|
61
|
+
},
|
|
62
|
+
perplexity: {
|
|
63
|
+
label: "Perplexity",
|
|
64
|
+
category: "cloud",
|
|
65
|
+
defaultBaseUrl: "https://api.perplexity.ai",
|
|
66
|
+
needsApiKey: true,
|
|
67
|
+
openAICompatible: true,
|
|
68
|
+
modelPresets: ["sonar", "sonar-pro"],
|
|
69
|
+
},
|
|
70
|
+
// Aggregators / Routers (OpenAI-compatible)
|
|
71
|
+
openrouter: {
|
|
72
|
+
label: "OpenRouter",
|
|
73
|
+
category: "aggregator",
|
|
74
|
+
defaultBaseUrl: "https://openrouter.ai/api/v1",
|
|
75
|
+
needsApiKey: true,
|
|
76
|
+
openAICompatible: true,
|
|
77
|
+
},
|
|
78
|
+
together: {
|
|
79
|
+
label: "Together.ai",
|
|
80
|
+
category: "aggregator",
|
|
81
|
+
defaultBaseUrl: "https://api.together.xyz/v1",
|
|
82
|
+
needsApiKey: true,
|
|
83
|
+
openAICompatible: true,
|
|
84
|
+
},
|
|
85
|
+
groq: {
|
|
86
|
+
label: "Groq",
|
|
87
|
+
category: "aggregator",
|
|
88
|
+
defaultBaseUrl: "https://api.groq.com/openai/v1",
|
|
89
|
+
needsApiKey: true,
|
|
90
|
+
openAICompatible: true,
|
|
91
|
+
},
|
|
92
|
+
fireworks: {
|
|
93
|
+
label: "Fireworks.ai",
|
|
94
|
+
category: "aggregator",
|
|
95
|
+
defaultBaseUrl: "https://api.fireworks.ai/inference/v1",
|
|
96
|
+
needsApiKey: true,
|
|
97
|
+
openAICompatible: true,
|
|
98
|
+
},
|
|
99
|
+
deepseek: {
|
|
100
|
+
label: "DeepSeek",
|
|
101
|
+
category: "aggregator",
|
|
102
|
+
defaultBaseUrl: "https://api.deepseek.com",
|
|
103
|
+
needsApiKey: true,
|
|
104
|
+
openAICompatible: true,
|
|
105
|
+
},
|
|
106
|
+
// Local / Self-hosted
|
|
107
|
+
ollama: {
|
|
108
|
+
label: "Ollama (Local)",
|
|
109
|
+
category: "local",
|
|
110
|
+
defaultBaseUrl: "http://localhost:11434/v1",
|
|
111
|
+
needsApiKey: false,
|
|
112
|
+
openAICompatible: true,
|
|
113
|
+
},
|
|
114
|
+
lmstudio: {
|
|
115
|
+
label: "LM Studio (Local)",
|
|
116
|
+
category: "local",
|
|
117
|
+
defaultBaseUrl: "http://localhost:1234/v1",
|
|
118
|
+
needsApiKey: false,
|
|
119
|
+
openAICompatible: true,
|
|
120
|
+
},
|
|
121
|
+
vllm: {
|
|
122
|
+
label: "vLLM (Self-hosted)",
|
|
123
|
+
category: "local",
|
|
124
|
+
defaultBaseUrl: "http://localhost:8000/v1",
|
|
125
|
+
needsApiKey: false,
|
|
126
|
+
openAICompatible: true,
|
|
127
|
+
},
|
|
128
|
+
localai: {
|
|
129
|
+
label: "LocalAI (Self-hosted)",
|
|
130
|
+
category: "local",
|
|
131
|
+
defaultBaseUrl: "http://localhost:8080/v1",
|
|
132
|
+
needsApiKey: false,
|
|
133
|
+
openAICompatible: true,
|
|
134
|
+
},
|
|
135
|
+
// Custom OpenAI-compatible endpoint
|
|
136
|
+
openai_compatible: {
|
|
137
|
+
label: "Custom (OpenAI-compatible)",
|
|
138
|
+
category: "aggregator",
|
|
139
|
+
needsApiKey: true,
|
|
140
|
+
openAICompatible: true,
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
/**
|
|
144
|
+
* Get provider info by provider key
|
|
145
|
+
*/
|
|
146
|
+
function getProviderInfo(provider) {
|
|
147
|
+
return exports.PROVIDERS[provider];
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Check if provider is OpenAI-compatible
|
|
151
|
+
*/
|
|
152
|
+
function isOpenAICompatible(provider) {
|
|
153
|
+
return exports.PROVIDERS[provider]?.openAICompatible || false;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Get list of providers by category
|
|
157
|
+
*/
|
|
158
|
+
function getProvidersByCategory(category) {
|
|
159
|
+
return Object.entries(exports.PROVIDERS)
|
|
160
|
+
.filter(([_, info]) => info.category === category)
|
|
161
|
+
.map(([key, _]) => key);
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Get all provider labels for display
|
|
165
|
+
*/
|
|
166
|
+
function getAllProviderLabels() {
|
|
167
|
+
return Object.fromEntries(Object.entries(exports.PROVIDERS).map(([key, info]) => [key, info.label]));
|
|
168
|
+
}
|
|
169
|
+
//# sourceMappingURL=providers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.js","sourceRoot":"","sources":["../../../src/core/llm/providers.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAqLH,0CAEC;AAKD,gDAEC;AAKD,wDAIC;AAKD,oDAIC;AAlLD;;GAEG;AACU,QAAA,SAAS,GAAsC;IAC1D,2BAA2B;IAC3B,MAAM,EAAE;QACN,KAAK,EAAE,QAAQ;QACf,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,aAAa,EAAE,QAAQ,EAAE,cAAc,CAAC;KACxD;IAED,SAAS,EAAE;QACT,KAAK,EAAE,oBAAoB;QAC3B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,YAAY,EAAE,CAAC,mBAAmB,EAAE,kBAAkB,CAAC;KACxD;IAED,MAAM,EAAE;QACN,KAAK,EAAE,eAAe;QACtB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2CAA2C;QAC3D,WAAW,EAAE,IAAI;QACjB,YAAY,EAAE,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;KACrD;IAED,OAAO,EAAE;QACP,KAAK,EAAE,SAAS;QAChB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,eAAe,EAAE,eAAe,CAAC;KACjD;IAED,MAAM,EAAE;QACN,KAAK,EAAE,QAAQ;QACf,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,wBAAwB;QACxC,WAAW,EAAE,IAAI;QACjB,YAAY,EAAE,CAAC,WAAW,EAAE,gBAAgB,CAAC;KAC9C;IAED,GAAG,EAAE;QACH,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,QAAQ,EAAE,aAAa,CAAC;KACxC;IAED,UAAU,EAAE;QACV,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACrC;IAED,4CAA4C;IAC5C,UAAU,EAAE;QACV,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,8BAA8B;QAC9C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,QAAQ,EAAE;QACR,KAAK,EAAE,aAAa;QACpB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,6BAA6B;QAC7C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,IAAI,EAAE;QACJ,KAAK,EAAE,MAAM;QACb,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,gCAAgC;QAChD,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,SAAS,EAAE;QACT,KAAK,EAAE,cAAc;QACrB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,uCAAuC;QACvD,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,QAAQ,EAAE;QACR,KAAK,EAAE,UAAU;QACjB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,sBAAsB;IACtB,MAAM,EAAE;QACN,KAAK,EAAE,gBAAgB;QACvB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,QAAQ,EAAE;QACR,KAAK,EAAE,mBAAmB;QAC1B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,IAAI,EAAE;QACJ,KAAK,EAAE,oBAAoB;QAC3B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,OAAO,EAAE;QACP,KAAK,EAAE,uBAAuB;QAC9B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,oCAAoC;IACpC,iBAAiB,EAAE;QACjB,KAAK,EAAE,4BAA4B;QACnC,QAAQ,EAAE,YAAY;QACtB,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;CACF,CAAC;AAEF;;GAEG;AACH,SAAgB,eAAe,CAAC,QAAqB;IACnD,OAAO,iBAAS,CAAC,QAAQ,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,QAAqB;IACtD,OAAO,iBAAS,CAAC,QAAQ,CAAC,EAAE,gBAAgB,IAAI,KAAK,CAAC;AACxD,CAAC;AAED;;GAEG;AACH,SAAgB,sBAAsB,CAAC,QAA0C;IAC/E,OAAO,MAAM,CAAC,OAAO,CAAC,iBAAS,CAAC;SAC7B,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,KAAK,QAAQ,CAAC;SACjD,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAkB,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB;IAClC,OAAO,MAAM,CAAC,WAAW,CACvB,MAAM,CAAC,OAAO,CAAC,iBAAS,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,GAAkB,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAClD,CAAC;AACnC,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { Transcript } from './types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Load and normalize a transcript from a JSON file
|
|
4
|
+
*/
|
|
5
|
+
export declare function loadTranscript(path: string, maxMessages?: number): Promise<Transcript>;
|
|
6
|
+
/**
|
|
7
|
+
* Normalize raw input to canonical Transcript format
|
|
8
|
+
*/
|
|
9
|
+
export declare function normalizeTranscript(raw: any, maxMessages?: number): Transcript;
|
|
10
|
+
//# sourceMappingURL=load.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"load.d.ts","sourceRoot":"","sources":["../../src/core/load.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAqB,MAAM,YAAY,CAAC;AAE3D;;GAEG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CA2B5F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,UAAU,CAiC9E"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.loadTranscript = loadTranscript;
|
|
4
|
+
exports.normalizeTranscript = normalizeTranscript;
|
|
5
|
+
const promises_1 = require("fs/promises");
|
|
6
|
+
/**
|
|
7
|
+
* Load and normalize a transcript from a JSON file
|
|
8
|
+
*/
|
|
9
|
+
async function loadTranscript(path, maxMessages) {
|
|
10
|
+
try {
|
|
11
|
+
const content = await (0, promises_1.readFile)(path, 'utf-8');
|
|
12
|
+
const raw = JSON.parse(content);
|
|
13
|
+
return normalizeTranscript(raw, maxMessages);
|
|
14
|
+
}
|
|
15
|
+
catch (error) {
|
|
16
|
+
if (error.code === 'ENOENT') {
|
|
17
|
+
throw new Error(`Transcript file not found: ${path}`);
|
|
18
|
+
}
|
|
19
|
+
if (error instanceof SyntaxError) {
|
|
20
|
+
const content = await (0, promises_1.readFile)(path, 'utf-8');
|
|
21
|
+
return {
|
|
22
|
+
schema_version: 'raw',
|
|
23
|
+
messages: [],
|
|
24
|
+
raw_text: content,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
if (error instanceof Error && error.message.includes('Invalid transcript')) {
|
|
28
|
+
const content = await (0, promises_1.readFile)(path, 'utf-8');
|
|
29
|
+
return {
|
|
30
|
+
schema_version: 'raw',
|
|
31
|
+
messages: [],
|
|
32
|
+
raw_text: content,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
throw error;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Normalize raw input to canonical Transcript format
|
|
40
|
+
*/
|
|
41
|
+
function normalizeTranscript(raw, maxMessages) {
|
|
42
|
+
// Handle both { messages: [...] } and raw array formats
|
|
43
|
+
let messages;
|
|
44
|
+
let schema_version;
|
|
45
|
+
if (Array.isArray(raw)) {
|
|
46
|
+
messages = raw;
|
|
47
|
+
schema_version = '1.0';
|
|
48
|
+
}
|
|
49
|
+
else if (raw && Array.isArray(raw.messages)) {
|
|
50
|
+
messages = raw.messages;
|
|
51
|
+
schema_version = raw.schema_version || '1.0';
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
throw new Error('Invalid transcript: expected messages array or { messages: [...] }');
|
|
55
|
+
}
|
|
56
|
+
if (messages.length === 0) {
|
|
57
|
+
console.warn('Warning: Empty transcript (no messages)');
|
|
58
|
+
}
|
|
59
|
+
// Apply max messages limit
|
|
60
|
+
if (maxMessages && maxMessages > 0) {
|
|
61
|
+
messages = messages.slice(0, maxMessages);
|
|
62
|
+
}
|
|
63
|
+
// Normalize each message
|
|
64
|
+
const normalized = messages.map((msg, arrayIdx) => {
|
|
65
|
+
return normalizeMessage(msg, arrayIdx);
|
|
66
|
+
});
|
|
67
|
+
return {
|
|
68
|
+
schema_version,
|
|
69
|
+
messages: normalized,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Normalize a single message
|
|
74
|
+
*/
|
|
75
|
+
function normalizeMessage(msg, arrayIdx) {
|
|
76
|
+
// Auto-assign idx if missing
|
|
77
|
+
const idx = typeof msg.idx === 'number' ? msg.idx : arrayIdx;
|
|
78
|
+
// Coerce role to valid value
|
|
79
|
+
const role = normalizeRole(msg.role);
|
|
80
|
+
// Stringify content if not a string
|
|
81
|
+
const content = typeof msg.content === 'string' ? msg.content : String(msg.content || '');
|
|
82
|
+
// Estimate tokens if missing (rough approximation: chars / 4)
|
|
83
|
+
const tokens = typeof msg.tokens === 'number' ? msg.tokens : Math.ceil(content.length / 4);
|
|
84
|
+
return {
|
|
85
|
+
idx,
|
|
86
|
+
role,
|
|
87
|
+
content,
|
|
88
|
+
tokens,
|
|
89
|
+
...(msg.ts && { ts: msg.ts }),
|
|
90
|
+
...(msg.session_id && { session_id: msg.session_id }),
|
|
91
|
+
...(msg.metadata && { metadata: msg.metadata }),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Normalize role to valid value
|
|
96
|
+
*/
|
|
97
|
+
function normalizeRole(role) {
|
|
98
|
+
const validRoles = ['system', 'user', 'assistant', 'tool'];
|
|
99
|
+
const normalized = String(role || 'user').toLowerCase();
|
|
100
|
+
if (validRoles.includes(normalized)) {
|
|
101
|
+
return normalized;
|
|
102
|
+
}
|
|
103
|
+
// Default to 'user' for unknown roles
|
|
104
|
+
return 'user';
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=load.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"load.js","sourceRoot":"","sources":["../../src/core/load.ts"],"names":[],"mappings":";;AAMA,wCA2BC;AAKD,kDAiCC;AAvED,0CAAuC;AAGvC;;GAEG;AACI,KAAK,UAAU,cAAc,CAAC,IAAY,EAAE,WAAoB;IACrE,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAChC,OAAO,mBAAmB,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACvD,MAAM,IAAI,KAAK,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC;QACxD,CAAC;QACD,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC9C,OAAO;gBACL,cAAc,EAAE,KAAK;gBACrB,QAAQ,EAAE,EAAE;gBACZ,QAAQ,EAAE,OAAO;aAClB,CAAC;QACJ,CAAC;QACD,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YAC3E,MAAM,OAAO,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC9C,OAAO;gBACL,cAAc,EAAE,KAAK;gBACrB,QAAQ,EAAE,EAAE;gBACZ,QAAQ,EAAE,OAAO;aAClB,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,GAAQ,EAAE,WAAoB;IAChE,wDAAwD;IACxD,IAAI,QAAe,CAAC;IACpB,IAAI,cAAsB,CAAC;IAE3B,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACvB,QAAQ,GAAG,GAAG,CAAC;QACf,cAAc,GAAG,KAAK,CAAC;IACzB,CAAC;SAAM,IAAI,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC9C,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;QACxB,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,KAAK,CAAC;IAC/C,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,oEAAoE,CAAC,CAAC;IACxF,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAC1D,CAAC;IAED,2BAA2B;IAC3B,IAAI,WAAW,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;QACnC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAC5C,CAAC;IAED,yBAAyB;IACzB,MAAM,UAAU,GAAwB,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;QACrE,OAAO,gBAAgB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,OAAO;QACL,cAAc;QACd,QAAQ,EAAE,UAAU;KACrB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,GAAQ,EAAE,QAAgB;IAClD,6BAA6B;IAC7B,MAAM,GAAG,GAAG,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE7D,6BAA6B;IAC7B,MAAM,IAAI,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAErC,oCAAoC;IACpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;IAE1F,8DAA8D;IAC9D,MAAM,MAAM,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE3F,OAAO;QACL,GAAG;QACH,IAAI;QACJ,OAAO;QACP,MAAM;QACN,GAAG,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC;QAC7B,GAAG,CAAC,GAAG,CAAC,UAAU,IAAI,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC;QACrD,GAAG,CAAC,GAAG,CAAC,QAAQ,IAAI,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC;KAChD,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,IAAS;IAC9B,MAAM,UAAU,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IAC3D,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;IAExD,IAAI,UAAU,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QACpC,OAAO,UAAsD,CAAC;IAChE,CAAC;IAED,sCAAsC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text normalization and similarity utilities
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Normalize text for comparison
|
|
6
|
+
* - Lowercase
|
|
7
|
+
* - Trim whitespace
|
|
8
|
+
* - Collapse multiple spaces
|
|
9
|
+
* - Remove special characters
|
|
10
|
+
*/
|
|
11
|
+
export declare function normalizeText(text: string): string;
|
|
12
|
+
/**
|
|
13
|
+
* Tokenize text into words
|
|
14
|
+
*/
|
|
15
|
+
export declare function tokenize(text: string): string[];
|
|
16
|
+
/**
|
|
17
|
+
* Create a signature from first N tokens
|
|
18
|
+
* Used for bucketing to enable O(n) similarity detection
|
|
19
|
+
*/
|
|
20
|
+
export declare function makeSignature(tokens: string[], n?: number): string;
|
|
21
|
+
/**
|
|
22
|
+
* Calculate Jaccard similarity between two sets
|
|
23
|
+
* Returns value between 0 (no overlap) and 1 (identical)
|
|
24
|
+
*/
|
|
25
|
+
export declare function jaccardSimilarity(a: Set<string>, b: Set<string>): number;
|
|
26
|
+
/**
|
|
27
|
+
* Check if two token sets are similar above threshold
|
|
28
|
+
*/
|
|
29
|
+
export declare function areSimilar(tokens1: string[], tokens2: string[], threshold?: number): boolean;
|
|
30
|
+
//# sourceMappingURL=normalize.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.d.ts","sourceRoot":"","sources":["../../src/core/normalize.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;;;;GAMG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAI/C;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,SAAI,GAAG,MAAM,CAE7D;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,MAAM,CAYxE;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE,MAAM,EAAE,EACjB,SAAS,SAAO,GACf,OAAO,CAIT"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Text normalization and similarity utilities
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.normalizeText = normalizeText;
|
|
7
|
+
exports.tokenize = tokenize;
|
|
8
|
+
exports.makeSignature = makeSignature;
|
|
9
|
+
exports.jaccardSimilarity = jaccardSimilarity;
|
|
10
|
+
exports.areSimilar = areSimilar;
|
|
11
|
+
/**
|
|
12
|
+
* Normalize text for comparison
|
|
13
|
+
* - Lowercase
|
|
14
|
+
* - Trim whitespace
|
|
15
|
+
* - Collapse multiple spaces
|
|
16
|
+
* - Remove special characters
|
|
17
|
+
*/
|
|
18
|
+
function normalizeText(text) {
|
|
19
|
+
return text
|
|
20
|
+
.toLowerCase()
|
|
21
|
+
.trim()
|
|
22
|
+
.replace(/\s+/g, ' ')
|
|
23
|
+
.replace(/[^\w\s]/g, '');
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Tokenize text into words
|
|
27
|
+
*/
|
|
28
|
+
function tokenize(text) {
|
|
29
|
+
return text
|
|
30
|
+
.split(/\s+/)
|
|
31
|
+
.filter(token => token.length > 0);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Create a signature from first N tokens
|
|
35
|
+
* Used for bucketing to enable O(n) similarity detection
|
|
36
|
+
*/
|
|
37
|
+
function makeSignature(tokens, n = 8) {
|
|
38
|
+
return tokens.slice(0, n).join(' ');
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Calculate Jaccard similarity between two sets
|
|
42
|
+
* Returns value between 0 (no overlap) and 1 (identical)
|
|
43
|
+
*/
|
|
44
|
+
function jaccardSimilarity(a, b) {
|
|
45
|
+
if (a.size === 0 && b.size === 0) {
|
|
46
|
+
return 1.0;
|
|
47
|
+
}
|
|
48
|
+
if (a.size === 0 || b.size === 0) {
|
|
49
|
+
return 0.0;
|
|
50
|
+
}
|
|
51
|
+
const intersection = new Set([...a].filter(x => b.has(x)));
|
|
52
|
+
const union = new Set([...a, ...b]);
|
|
53
|
+
return intersection.size / union.size;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Check if two token sets are similar above threshold
|
|
57
|
+
*/
|
|
58
|
+
function areSimilar(tokens1, tokens2, threshold = 0.65) {
|
|
59
|
+
const set1 = new Set(tokens1);
|
|
60
|
+
const set2 = new Set(tokens2);
|
|
61
|
+
return jaccardSimilarity(set1, set2) >= threshold;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=normalize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalize.js","sourceRoot":"","sources":["../../src/core/normalize.ts"],"names":[],"mappings":";AAAA;;GAEG;;AASH,sCAMC;AAKD,4BAIC;AAMD,sCAEC;AAMD,8CAYC;AAKD,gCAQC;AA7DD;;;;;;GAMG;AACH,SAAgB,aAAa,CAAC,IAAY;IACxC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,IAAI,EAAE;SACN,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAgB,QAAQ,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACvC,CAAC;AAED;;;GAGG;AACH,SAAgB,aAAa,CAAC,MAAgB,EAAE,CAAC,GAAG,CAAC;IACnD,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACtC,CAAC;AAED;;;GAGG;AACH,SAAgB,iBAAiB,CAAC,CAAc,EAAE,CAAc;IAC9D,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC;IACb,CAAC;IACD,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IAEpC,OAAO,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAgB,UAAU,CACxB,OAAiB,EACjB,OAAiB,EACjB,SAAS,GAAG,IAAI;IAEhB,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9B,OAAO,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,SAAS,CAAC;AACpD,CAAC"}
|