@memograph/cli 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +402 -0
  3. package/dist/cli.d.ts +3 -0
  4. package/dist/cli.d.ts.map +1 -0
  5. package/dist/cli.js +97 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/core/detect.d.ts +30 -0
  8. package/dist/core/detect.d.ts.map +1 -0
  9. package/dist/core/detect.js +212 -0
  10. package/dist/core/detect.js.map +1 -0
  11. package/dist/core/extract.d.ts +6 -0
  12. package/dist/core/extract.d.ts.map +1 -0
  13. package/dist/core/extract.js +104 -0
  14. package/dist/core/extract.js.map +1 -0
  15. package/dist/core/inspect.d.ts +7 -0
  16. package/dist/core/inspect.d.ts.map +1 -0
  17. package/dist/core/inspect.js +98 -0
  18. package/dist/core/inspect.js.map +1 -0
  19. package/dist/core/llm/client.d.ts +55 -0
  20. package/dist/core/llm/client.d.ts.map +1 -0
  21. package/dist/core/llm/client.js +199 -0
  22. package/dist/core/llm/client.js.map +1 -0
  23. package/dist/core/llm/detect-llm.d.ts +28 -0
  24. package/dist/core/llm/detect-llm.d.ts.map +1 -0
  25. package/dist/core/llm/detect-llm.js +212 -0
  26. package/dist/core/llm/detect-llm.js.map +1 -0
  27. package/dist/core/llm/extract-llm.d.ts +27 -0
  28. package/dist/core/llm/extract-llm.d.ts.map +1 -0
  29. package/dist/core/llm/extract-llm.js +151 -0
  30. package/dist/core/llm/extract-llm.js.map +1 -0
  31. package/dist/core/llm/prompts.d.ts +28 -0
  32. package/dist/core/llm/prompts.d.ts.map +1 -0
  33. package/dist/core/llm/prompts.js +172 -0
  34. package/dist/core/llm/prompts.js.map +1 -0
  35. package/dist/core/llm/providers.d.ts +34 -0
  36. package/dist/core/llm/providers.d.ts.map +1 -0
  37. package/dist/core/llm/providers.js +169 -0
  38. package/dist/core/llm/providers.js.map +1 -0
  39. package/dist/core/load.d.ts +10 -0
  40. package/dist/core/load.d.ts.map +1 -0
  41. package/dist/core/load.js +106 -0
  42. package/dist/core/load.js.map +1 -0
  43. package/dist/core/normalize.d.ts +30 -0
  44. package/dist/core/normalize.d.ts.map +1 -0
  45. package/dist/core/normalize.js +63 -0
  46. package/dist/core/normalize.js.map +1 -0
  47. package/dist/core/render.d.ts +10 -0
  48. package/dist/core/render.d.ts.map +1 -0
  49. package/dist/core/render.js +60 -0
  50. package/dist/core/render.js.map +1 -0
  51. package/dist/core/score.d.ts +27 -0
  52. package/dist/core/score.d.ts.map +1 -0
  53. package/dist/core/score.js +59 -0
  54. package/dist/core/score.js.map +1 -0
  55. package/dist/core/types.d.ts +162 -0
  56. package/dist/core/types.d.ts.map +1 -0
  57. package/dist/core/types.js +6 -0
  58. package/dist/core/types.js.map +1 -0
  59. package/dist/interactive/index.d.ts +67 -0
  60. package/dist/interactive/index.d.ts.map +1 -0
  61. package/dist/interactive/index.js +794 -0
  62. package/dist/interactive/index.js.map +1 -0
  63. package/dist/interactive/settings.d.ts +36 -0
  64. package/dist/interactive/settings.d.ts.map +1 -0
  65. package/dist/interactive/settings.js +174 -0
  66. package/dist/interactive/settings.js.map +1 -0
  67. package/dist/interactive/wizard.d.ts +10 -0
  68. package/dist/interactive/wizard.d.ts.map +1 -0
  69. package/dist/interactive/wizard.js +249 -0
  70. package/dist/interactive/wizard.js.map +1 -0
  71. package/package.json +49 -0
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Prompt templates for LLM-based analysis
3
+ */
4
+ /**
5
+ * System prompt for fact extraction
6
+ */
7
+ export declare const FACT_EXTRACTION_SYSTEM = "You are an expert at extracting important facts and preferences from conversation transcripts.\nYour task is to identify and extract:\n1. Identity information (name, role, etc.)\n2. User preferences (language, tone, format, etc.)\n3. Context information (goals, constraints, background)\n4. Any other facts that should be remembered\n\nFor each fact, provide:\n- A key (category:attribute format, e.g., \"identity:name\", \"pref:language\")\n- The value\n- Confidence score (0.0-1.0)\n- A brief justification\n\nRespond ONLY with valid JSON in the following format:\n{\n \"facts\": [\n {\n \"fact_key\": \"category:attribute\",\n \"fact_value\": \"value\",\n \"confidence\": 0.9,\n \"justification\": \"brief explanation\"\n }\n ]\n}";
8
+ /**
9
+ * User prompt for fact extraction
10
+ */
11
+ export declare function createFactExtractionPrompt(messages: string[]): string;
12
+ /**
13
+ * User prompt for fact extraction from raw text
14
+ */
15
+ export declare function createFactExtractionPromptFromRaw(rawText: string): string;
16
+ /**
17
+ * System prompt for drift detection
18
+ */
19
+ export declare const DRIFT_DETECTION_SYSTEM = "You are an expert at analyzing conversation transcripts for memory drift issues.\nMemory drift occurs when an AI assistant forgets or loses track of important information.\n\nYour task is to detect:\n1. Repetition clusters: User repeating similar requests\n2. Session reset: Assistant indicating starting over or forgetting\n3. Preference forgotten: User restating preferences that should have been remembered\n4. Contradictions: Conflicting information about the same fact\n5. Context loss: Assistant losing important contextual information\n6. Inconsistent behavior: Assistant acting inconsistently with stated preferences\n\nFor each detected event, provide:\n- Event type\n- Severity (1-5, higher is worse)\n- Confidence (0.0-1.0)\n- Message indices involved\n- Evidence/snippets\n- Human-readable summary\n\nAdditionally, calculate an overall drift_score (0-100):\n- 0-20: Excellent memory retention, no issues\n- 21-40: Minor drift, occasional repetition\n- 41-60: Moderate drift, noticeable issues\n- 61-80: Significant drift, frequent problems\n- 81-100: Severe drift, major memory failures\n\nRespond ONLY with valid JSON in the following format:\n{\n \"drift_score\": 0-100,\n \"events\": [\n {\n \"type\": \"repetition_cluster|session_reset|preference_forgotten|contradiction|context_loss|inconsistent_behavior\",\n \"severity\": 1-5,\n \"confidence\": 0.0-1.0,\n \"msg_idxs\": [0, 1, 2],\n \"snippets\": [\"text snippets\"],\n \"summary\": \"human-readable summary\",\n \"details\": {} // additional details specific to event type\n }\n ]\n}";
20
+ /**
21
+ * User prompt for drift detection
22
+ */
23
+ export declare function createDriftDetectionPrompt(messages: string[], facts: string): string;
24
+ /**
25
+ * User prompt for drift detection from raw text
26
+ */
27
+ export declare function createDriftDetectionPromptFromRaw(rawText: string, facts: string): string;
28
+ //# sourceMappingURL=prompts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompts.d.ts","sourceRoot":"","sources":["../../../src/core/llm/prompts.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;GAEG;AACH,eAAO,MAAM,sBAAsB,8vBAuBjC,CAAC;AAEH;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,MAAM,CAgBrE;AAED;;GAEG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAezE;AAED;;GAEG;AACH,eAAO,MAAM,sBAAsB,kkDAwCjC,CAAC;AAEH;;GAEG;AACH,wBAAgB,0BAA0B,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAqBpF;AAED;;GAEG;AACH,wBAAgB,iCAAiC,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,MAAM,CAsBxF"}
@@ -0,0 +1,172 @@
1
+ "use strict";
2
+ /**
3
+ * Prompt templates for LLM-based analysis
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.DRIFT_DETECTION_SYSTEM = exports.FACT_EXTRACTION_SYSTEM = void 0;
7
+ exports.createFactExtractionPrompt = createFactExtractionPrompt;
8
+ exports.createFactExtractionPromptFromRaw = createFactExtractionPromptFromRaw;
9
+ exports.createDriftDetectionPrompt = createDriftDetectionPrompt;
10
+ exports.createDriftDetectionPromptFromRaw = createDriftDetectionPromptFromRaw;
11
+ /**
12
+ * System prompt for fact extraction
13
+ */
14
+ exports.FACT_EXTRACTION_SYSTEM = `You are an expert at extracting important facts and preferences from conversation transcripts.
15
+ Your task is to identify and extract:
16
+ 1. Identity information (name, role, etc.)
17
+ 2. User preferences (language, tone, format, etc.)
18
+ 3. Context information (goals, constraints, background)
19
+ 4. Any other facts that should be remembered
20
+
21
+ For each fact, provide:
22
+ - A key (category:attribute format, e.g., "identity:name", "pref:language")
23
+ - The value
24
+ - Confidence score (0.0-1.0)
25
+ - A brief justification
26
+
27
+ Respond ONLY with valid JSON in the following format:
28
+ {
29
+ "facts": [
30
+ {
31
+ "fact_key": "category:attribute",
32
+ "fact_value": "value",
33
+ "confidence": 0.9,
34
+ "justification": "brief explanation"
35
+ }
36
+ ]
37
+ }`;
38
+ /**
39
+ * User prompt for fact extraction
40
+ */
41
+ function createFactExtractionPrompt(messages) {
42
+ const transcript = messages.join('\n---\n');
43
+ return `Extract all important facts and preferences from the following conversation transcript.
44
+
45
+ Only extract from USER messages. Ignore assistant messages for fact extraction.
46
+
47
+ Focus on:
48
+ - Identity (name, role, etc.)
49
+ - Preferences (language, tone, format, style)
50
+ - Context (goals, constraints, requirements)
51
+ - Important facts that should be remembered
52
+
53
+ Transcript:
54
+ ${transcript}
55
+
56
+ Respond with JSON containing the facts array.`;
57
+ }
58
+ /**
59
+ * User prompt for fact extraction from raw text
60
+ */
61
+ function createFactExtractionPromptFromRaw(rawText) {
62
+ return `Extract all important facts and preferences from the following transcript text.
63
+
64
+ The content may not be structured as messages. Infer user facts and preferences from the text.
65
+
66
+ Focus on:
67
+ - Identity (name, role, etc.)
68
+ - Preferences (language, tone, format, style)
69
+ - Context (goals, constraints, requirements)
70
+ - Important facts that should be remembered
71
+
72
+ Transcript text:
73
+ ${rawText}
74
+
75
+ Respond with JSON containing the facts array.`;
76
+ }
77
+ /**
78
+ * System prompt for drift detection
79
+ */
80
+ exports.DRIFT_DETECTION_SYSTEM = `You are an expert at analyzing conversation transcripts for memory drift issues.
81
+ Memory drift occurs when an AI assistant forgets or loses track of important information.
82
+
83
+ Your task is to detect:
84
+ 1. Repetition clusters: User repeating similar requests
85
+ 2. Session reset: Assistant indicating starting over or forgetting
86
+ 3. Preference forgotten: User restating preferences that should have been remembered
87
+ 4. Contradictions: Conflicting information about the same fact
88
+ 5. Context loss: Assistant losing important contextual information
89
+ 6. Inconsistent behavior: Assistant acting inconsistently with stated preferences
90
+
91
+ For each detected event, provide:
92
+ - Event type
93
+ - Severity (1-5, higher is worse)
94
+ - Confidence (0.0-1.0)
95
+ - Message indices involved
96
+ - Evidence/snippets
97
+ - Human-readable summary
98
+
99
+ Additionally, calculate an overall drift_score (0-100):
100
+ - 0-20: Excellent memory retention, no issues
101
+ - 21-40: Minor drift, occasional repetition
102
+ - 41-60: Moderate drift, noticeable issues
103
+ - 61-80: Significant drift, frequent problems
104
+ - 81-100: Severe drift, major memory failures
105
+
106
+ Respond ONLY with valid JSON in the following format:
107
+ {
108
+ "drift_score": 0-100,
109
+ "events": [
110
+ {
111
+ "type": "repetition_cluster|session_reset|preference_forgotten|contradiction|context_loss|inconsistent_behavior",
112
+ "severity": 1-5,
113
+ "confidence": 0.0-1.0,
114
+ "msg_idxs": [0, 1, 2],
115
+ "snippets": ["text snippets"],
116
+ "summary": "human-readable summary",
117
+ "details": {} // additional details specific to event type
118
+ }
119
+ ]
120
+ }`;
121
+ /**
122
+ * User prompt for drift detection
123
+ */
124
+ function createDriftDetectionPrompt(messages, facts) {
125
+ const transcript = messages.join('\n---\n');
126
+ return `Analyze the following conversation transcript for memory drift events.
127
+
128
+ Transcript (each message is numbered starting from 0):
129
+ ${transcript}
130
+
131
+ Important facts extracted from the conversation:
132
+ ${facts || "No facts extracted"}
133
+
134
+ Look for:
135
+ 1. User repeating similar requests (repetition_cluster)
136
+ 2. Assistant indicating starting over or forgetting (session_reset)
137
+ 3. User restating preferences that were previously stated (preference_forgotten)
138
+ 4. Contradictions in information (contradiction)
139
+ 5. Assistant losing important context (context_loss)
140
+ 6. Assistant acting inconsistently with stated preferences (inconsistent_behavior)
141
+
142
+ Provide message indices (0-based) for each event.
143
+
144
+ Respond with JSON containing the events array.`;
145
+ }
146
+ /**
147
+ * User prompt for drift detection from raw text
148
+ */
149
+ function createDriftDetectionPromptFromRaw(rawText, facts) {
150
+ return `Analyze the following transcript text for memory drift events.
151
+
152
+ The content may not be structured as messages. Use best judgment to identify drift patterns.
153
+
154
+ Transcript text:
155
+ ${rawText}
156
+
157
+ Important facts extracted from the transcript:
158
+ ${facts || "No facts extracted"}
159
+
160
+ Look for:
161
+ 1. User repeating similar requests (repetition_cluster)
162
+ 2. Assistant indicating starting over or forgetting (session_reset)
163
+ 3. User restating preferences that were previously stated (preference_forgotten)
164
+ 4. Contradictions in information (contradiction)
165
+ 5. Assistant losing important context (context_loss)
166
+ 6. Assistant acting inconsistently with stated preferences (inconsistent_behavior)
167
+
168
+ If message indices are unknown, set msg_idxs to an empty array.
169
+
170
+ Respond with JSON containing the events array.`;
171
+ }
172
+ //# sourceMappingURL=prompts.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompts.js","sourceRoot":"","sources":["../../../src/core/llm/prompts.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAiCH,gEAgBC;AAKD,8EAeC;AAkDD,gEAqBC;AAKD,8EAsBC;AArKD;;GAEG;AACU,QAAA,sBAAsB,GAAG;;;;;;;;;;;;;;;;;;;;;;;EAuBpC,CAAC;AAEH;;GAEG;AACH,SAAgB,0BAA0B,CAAC,QAAkB;IAC3D,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC5C,OAAO;;;;;;;;;;;EAWP,UAAU;;8CAEkC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,SAAgB,iCAAiC,CAAC,OAAe;IAC/D,OAAO;;;;;;;;;;;EAWP,OAAO;;8CAEqC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACU,QAAA,sBAAsB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAwCpC,CAAC;AAEH;;GAEG;AACH,SAAgB,0BAA0B,CAAC,QAAkB,EAAE,KAAa;IAC1E,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC5C,OAAO;;;EAGP,UAAU;;;EAGV,KAAK,IAAI,oBAAoB;;;;;;;;;;;;+CAYgB,CAAC;AAChD,CAAC;AAED;;GAEG;AACH,SAAgB,iCAAiC,CAAC,OAAe,EAAE,KAAa;IAC9E,OAAO;;;;;EAKP,OAAO;;;EAGP,KAAK,IAAI,oBAAoB;;;;;;;;;;;;+CAYgB,CAAC;AAChD,CAAC"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * LLM Provider Registry
3
+ * Maps all supported providers with their configurations
4
+ */
5
+ export type LLMProvider = "openai" | "anthropic" | "gemini" | "mistral" | "cohere" | "xai" | "perplexity" | "openrouter" | "together" | "groq" | "fireworks" | "deepseek" | "ollama" | "lmstudio" | "vllm" | "localai" | "openai_compatible";
6
+ export interface ProviderInfo {
7
+ label: string;
8
+ category: "cloud" | "aggregator" | "local";
9
+ defaultBaseUrl?: string;
10
+ needsApiKey: boolean;
11
+ modelPresets?: string[];
12
+ openAICompatible?: boolean;
13
+ }
14
+ /**
15
+ * Provider catalog with all supported LLM providers
16
+ */
17
+ export declare const PROVIDERS: Record<LLMProvider, ProviderInfo>;
18
+ /**
19
+ * Get provider info by provider key
20
+ */
21
+ export declare function getProviderInfo(provider: LLMProvider): ProviderInfo;
22
+ /**
23
+ * Check if provider is OpenAI-compatible
24
+ */
25
+ export declare function isOpenAICompatible(provider: LLMProvider): boolean;
26
+ /**
27
+ * Get list of providers by category
28
+ */
29
+ export declare function getProvidersByCategory(category: "cloud" | "aggregator" | "local"): LLMProvider[];
30
+ /**
31
+ * Get all provider labels for display
32
+ */
33
+ export declare function getAllProviderLabels(): Record<LLMProvider, string>;
34
+ //# sourceMappingURL=providers.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../../../src/core/llm/providers.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,WAAW,GACnB,QAAQ,GACR,WAAW,GACX,QAAQ,GACR,SAAS,GACT,QAAQ,GACR,KAAK,GACL,YAAY,GACZ,YAAY,GACZ,UAAU,GACV,MAAM,GACN,WAAW,GACX,UAAU,GACV,QAAQ,GACR,UAAU,GACV,MAAM,GACN,SAAS,GACT,mBAAmB,CAAC;AAExB,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,OAAO,GAAG,YAAY,GAAG,OAAO,CAAC;IAC3C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,WAAW,EAAE,OAAO,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;IACxB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B;AAED;;GAEG;AACH,eAAO,MAAM,SAAS,EAAE,MAAM,CAAC,WAAW,EAAE,YAAY,CA+IvD,CAAC;AAEF;;GAEG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,WAAW,GAAG,YAAY,CAEnE;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAEjE;AAED;;GAEG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,OAAO,GAAG,YAAY,GAAG,OAAO,GAAG,WAAW,EAAE,CAIhG;AAED;;GAEG;AACH,wBAAgB,oBAAoB,IAAI,MAAM,CAAC,WAAW,EAAE,MAAM,CAAC,CAIlE"}
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+ /**
3
+ * LLM Provider Registry
4
+ * Maps all supported providers with their configurations
5
+ */
6
+ Object.defineProperty(exports, "__esModule", { value: true });
7
+ exports.PROVIDERS = void 0;
8
+ exports.getProviderInfo = getProviderInfo;
9
+ exports.isOpenAICompatible = isOpenAICompatible;
10
+ exports.getProvidersByCategory = getProvidersByCategory;
11
+ exports.getAllProviderLabels = getAllProviderLabels;
12
+ /**
13
+ * Provider catalog with all supported LLM providers
14
+ */
15
+ exports.PROVIDERS = {
16
+ // Cloud Providers (Direct)
17
+ openai: {
18
+ label: "OpenAI",
19
+ category: "cloud",
20
+ defaultBaseUrl: "https://api.openai.com/v1",
21
+ needsApiKey: true,
22
+ openAICompatible: true,
23
+ modelPresets: ["gpt-4o-mini", "gpt-4o", "gpt-4.1-mini"],
24
+ },
25
+ anthropic: {
26
+ label: "Anthropic (Claude)",
27
+ category: "cloud",
28
+ defaultBaseUrl: "https://api.anthropic.com",
29
+ needsApiKey: true,
30
+ modelPresets: ["claude-3-5-sonnet", "claude-3-5-haiku"],
31
+ },
32
+ gemini: {
33
+ label: "Google Gemini",
34
+ category: "cloud",
35
+ defaultBaseUrl: "https://generativelanguage.googleapis.com",
36
+ needsApiKey: true,
37
+ modelPresets: ["gemini-1.5-flash", "gemini-1.5-pro"],
38
+ },
39
+ mistral: {
40
+ label: "Mistral",
41
+ category: "cloud",
42
+ defaultBaseUrl: "https://api.mistral.ai/v1",
43
+ needsApiKey: true,
44
+ openAICompatible: true,
45
+ modelPresets: ["mistral-small", "mistral-large"],
46
+ },
47
+ cohere: {
48
+ label: "Cohere",
49
+ category: "cloud",
50
+ defaultBaseUrl: "https://api.cohere.com",
51
+ needsApiKey: true,
52
+ modelPresets: ["command-r", "command-r-plus"],
53
+ },
54
+ xai: {
55
+ label: "xAI (Grok)",
56
+ category: "cloud",
57
+ defaultBaseUrl: "https://api.x.ai/v1",
58
+ needsApiKey: true,
59
+ openAICompatible: true,
60
+ modelPresets: ["grok-2", "grok-2-mini"],
61
+ },
62
+ perplexity: {
63
+ label: "Perplexity",
64
+ category: "cloud",
65
+ defaultBaseUrl: "https://api.perplexity.ai",
66
+ needsApiKey: true,
67
+ openAICompatible: true,
68
+ modelPresets: ["sonar", "sonar-pro"],
69
+ },
70
+ // Aggregators / Routers (OpenAI-compatible)
71
+ openrouter: {
72
+ label: "OpenRouter",
73
+ category: "aggregator",
74
+ defaultBaseUrl: "https://openrouter.ai/api/v1",
75
+ needsApiKey: true,
76
+ openAICompatible: true,
77
+ },
78
+ together: {
79
+ label: "Together.ai",
80
+ category: "aggregator",
81
+ defaultBaseUrl: "https://api.together.xyz/v1",
82
+ needsApiKey: true,
83
+ openAICompatible: true,
84
+ },
85
+ groq: {
86
+ label: "Groq",
87
+ category: "aggregator",
88
+ defaultBaseUrl: "https://api.groq.com/openai/v1",
89
+ needsApiKey: true,
90
+ openAICompatible: true,
91
+ },
92
+ fireworks: {
93
+ label: "Fireworks.ai",
94
+ category: "aggregator",
95
+ defaultBaseUrl: "https://api.fireworks.ai/inference/v1",
96
+ needsApiKey: true,
97
+ openAICompatible: true,
98
+ },
99
+ deepseek: {
100
+ label: "DeepSeek",
101
+ category: "aggregator",
102
+ defaultBaseUrl: "https://api.deepseek.com",
103
+ needsApiKey: true,
104
+ openAICompatible: true,
105
+ },
106
+ // Local / Self-hosted
107
+ ollama: {
108
+ label: "Ollama (Local)",
109
+ category: "local",
110
+ defaultBaseUrl: "http://localhost:11434/v1",
111
+ needsApiKey: false,
112
+ openAICompatible: true,
113
+ },
114
+ lmstudio: {
115
+ label: "LM Studio (Local)",
116
+ category: "local",
117
+ defaultBaseUrl: "http://localhost:1234/v1",
118
+ needsApiKey: false,
119
+ openAICompatible: true,
120
+ },
121
+ vllm: {
122
+ label: "vLLM (Self-hosted)",
123
+ category: "local",
124
+ defaultBaseUrl: "http://localhost:8000/v1",
125
+ needsApiKey: false,
126
+ openAICompatible: true,
127
+ },
128
+ localai: {
129
+ label: "LocalAI (Self-hosted)",
130
+ category: "local",
131
+ defaultBaseUrl: "http://localhost:8080/v1",
132
+ needsApiKey: false,
133
+ openAICompatible: true,
134
+ },
135
+ // Custom OpenAI-compatible endpoint
136
+ openai_compatible: {
137
+ label: "Custom (OpenAI-compatible)",
138
+ category: "aggregator",
139
+ needsApiKey: true,
140
+ openAICompatible: true,
141
+ },
142
+ };
143
+ /**
144
+ * Get provider info by provider key
145
+ */
146
+ function getProviderInfo(provider) {
147
+ return exports.PROVIDERS[provider];
148
+ }
149
+ /**
150
+ * Check if provider is OpenAI-compatible
151
+ */
152
+ function isOpenAICompatible(provider) {
153
+ return exports.PROVIDERS[provider]?.openAICompatible || false;
154
+ }
155
+ /**
156
+ * Get list of providers by category
157
+ */
158
+ function getProvidersByCategory(category) {
159
+ return Object.entries(exports.PROVIDERS)
160
+ .filter(([_, info]) => info.category === category)
161
+ .map(([key, _]) => key);
162
+ }
163
+ /**
164
+ * Get all provider labels for display
165
+ */
166
+ function getAllProviderLabels() {
167
+ return Object.fromEntries(Object.entries(exports.PROVIDERS).map(([key, info]) => [key, info.label]));
168
+ }
169
+ //# sourceMappingURL=providers.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"providers.js","sourceRoot":"","sources":["../../../src/core/llm/providers.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAqLH,0CAEC;AAKD,gDAEC;AAKD,wDAIC;AAKD,oDAIC;AAlLD;;GAEG;AACU,QAAA,SAAS,GAAsC;IAC1D,2BAA2B;IAC3B,MAAM,EAAE;QACN,KAAK,EAAE,QAAQ;QACf,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,aAAa,EAAE,QAAQ,EAAE,cAAc,CAAC;KACxD;IAED,SAAS,EAAE;QACT,KAAK,EAAE,oBAAoB;QAC3B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,YAAY,EAAE,CAAC,mBAAmB,EAAE,kBAAkB,CAAC;KACxD;IAED,MAAM,EAAE;QACN,KAAK,EAAE,eAAe;QACtB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2CAA2C;QAC3D,WAAW,EAAE,IAAI;QACjB,YAAY,EAAE,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;KACrD;IAED,OAAO,EAAE;QACP,KAAK,EAAE,SAAS;QAChB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,eAAe,EAAE,eAAe,CAAC;KACjD;IAED,MAAM,EAAE;QACN,KAAK,EAAE,QAAQ;QACf,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,wBAAwB;QACxC,WAAW,EAAE,IAAI;QACjB,YAAY,EAAE,CAAC,WAAW,EAAE,gBAAgB,CAAC;KAC9C;IAED,GAAG,EAAE;QACH,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,QAAQ,EAAE,aAAa,CAAC;KACxC;IAED,UAAU,EAAE;QACV,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;QACtB,YAAY,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACrC;IAED,4CAA4C;IAC5C,UAAU,EAAE;QACV,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,8BAA8B;QAC9C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,QAAQ,EAAE;QACR,KAAK,EAAE,aAAa;QACpB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,6BAA6B;QAC7C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,IAAI,EAAE;QACJ,KAAK,EAAE,MAAM;QACb,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,gCAAgC;QAChD,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,SAAS,EAAE;QACT,KAAK,EAAE,cAAc;QACrB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,uCAAuC;QACvD,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,QAAQ,EAAE;QACR,KAAK,EAAE,UAAU;QACjB,QAAQ,EAAE,YAAY;QACtB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;IAED,sBAAsB;IACtB,MAAM,EAAE;QACN,KAAK,EAAE,gBAAgB;QACvB,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,QAAQ,EAAE;QACR,KAAK,EAAE,mBAAmB;QAC1B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,IAAI,EAAE;QACJ,KAAK,EAAE,oBAAoB;QAC3B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,OAAO,EAAE;QACP,KAAK,EAAE,uBAAuB;QAC9B,QAAQ,EAAE,OAAO;QACjB,cAAc,EAAE,0BAA0B;QAC1C,WAAW,EAAE,KAAK;QAClB,gBAAgB,EAAE,IAAI;KACvB;IAED,oCAAoC;IACpC,iBAAiB,EAAE;QACjB,KAAK,EAAE,4BAA4B;QACnC,QAAQ,EAAE,YAAY;QACtB,WAAW,EAAE,IAAI;QACjB,gBAAgB,EAAE,IAAI;KACvB;CACF,CAAC;AAEF;;GAEG;AACH,SAAgB,eAAe,CAAC,QAAqB;IACnD,OAAO,iBAAS,CAAC,QAAQ,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,QAAqB;IACtD,OAAO,iBAAS,CAAC,QAAQ,CAAC,EAAE,gBAAgB,IAAI,KAAK,CAAC;AACxD,CAAC;AAED;;GAEG;AACH,SAAgB,sBAAsB,CAAC,QAA0C;IAC/E,OAAO,MAAM,CAAC,OAAO,CAAC,iBAAS,CAAC;SAC7B,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,KAAK,QAAQ,CAAC;SACjD,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAkB,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB;IAClC,OAAO,MAAM,CAAC,WAAW,CACvB,MAAM,CAAC,OAAO,CAAC,iBAAS,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,GAAkB,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAClD,CAAC;AACnC,CAAC"}
@@ -0,0 +1,10 @@
1
+ import { Transcript } from './types.js';
2
+ /**
3
+ * Load and normalize a transcript from a JSON file
4
+ */
5
+ export declare function loadTranscript(path: string, maxMessages?: number): Promise<Transcript>;
6
+ /**
7
+ * Normalize raw input to canonical Transcript format
8
+ */
9
+ export declare function normalizeTranscript(raw: any, maxMessages?: number): Transcript;
10
+ //# sourceMappingURL=load.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"load.d.ts","sourceRoot":"","sources":["../../src/core/load.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAqB,MAAM,YAAY,CAAC;AAE3D;;GAEG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CA2B5F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,UAAU,CAiC9E"}
@@ -0,0 +1,106 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.loadTranscript = loadTranscript;
4
+ exports.normalizeTranscript = normalizeTranscript;
5
+ const promises_1 = require("fs/promises");
6
+ /**
7
+ * Load and normalize a transcript from a JSON file
8
+ */
9
+ async function loadTranscript(path, maxMessages) {
10
+ try {
11
+ const content = await (0, promises_1.readFile)(path, 'utf-8');
12
+ const raw = JSON.parse(content);
13
+ return normalizeTranscript(raw, maxMessages);
14
+ }
15
+ catch (error) {
16
+ if (error.code === 'ENOENT') {
17
+ throw new Error(`Transcript file not found: ${path}`);
18
+ }
19
+ if (error instanceof SyntaxError) {
20
+ const content = await (0, promises_1.readFile)(path, 'utf-8');
21
+ return {
22
+ schema_version: 'raw',
23
+ messages: [],
24
+ raw_text: content,
25
+ };
26
+ }
27
+ if (error instanceof Error && error.message.includes('Invalid transcript')) {
28
+ const content = await (0, promises_1.readFile)(path, 'utf-8');
29
+ return {
30
+ schema_version: 'raw',
31
+ messages: [],
32
+ raw_text: content,
33
+ };
34
+ }
35
+ throw error;
36
+ }
37
+ }
38
+ /**
39
+ * Normalize raw input to canonical Transcript format
40
+ */
41
+ function normalizeTranscript(raw, maxMessages) {
42
+ // Handle both { messages: [...] } and raw array formats
43
+ let messages;
44
+ let schema_version;
45
+ if (Array.isArray(raw)) {
46
+ messages = raw;
47
+ schema_version = '1.0';
48
+ }
49
+ else if (raw && Array.isArray(raw.messages)) {
50
+ messages = raw.messages;
51
+ schema_version = raw.schema_version || '1.0';
52
+ }
53
+ else {
54
+ throw new Error('Invalid transcript: expected messages array or { messages: [...] }');
55
+ }
56
+ if (messages.length === 0) {
57
+ console.warn('Warning: Empty transcript (no messages)');
58
+ }
59
+ // Apply max messages limit
60
+ if (maxMessages && maxMessages > 0) {
61
+ messages = messages.slice(0, maxMessages);
62
+ }
63
+ // Normalize each message
64
+ const normalized = messages.map((msg, arrayIdx) => {
65
+ return normalizeMessage(msg, arrayIdx);
66
+ });
67
+ return {
68
+ schema_version,
69
+ messages: normalized,
70
+ };
71
+ }
72
+ /**
73
+ * Normalize a single message
74
+ */
75
+ function normalizeMessage(msg, arrayIdx) {
76
+ // Auto-assign idx if missing
77
+ const idx = typeof msg.idx === 'number' ? msg.idx : arrayIdx;
78
+ // Coerce role to valid value
79
+ const role = normalizeRole(msg.role);
80
+ // Stringify content if not a string
81
+ const content = typeof msg.content === 'string' ? msg.content : String(msg.content || '');
82
+ // Estimate tokens if missing (rough approximation: chars / 4)
83
+ const tokens = typeof msg.tokens === 'number' ? msg.tokens : Math.ceil(content.length / 4);
84
+ return {
85
+ idx,
86
+ role,
87
+ content,
88
+ tokens,
89
+ ...(msg.ts && { ts: msg.ts }),
90
+ ...(msg.session_id && { session_id: msg.session_id }),
91
+ ...(msg.metadata && { metadata: msg.metadata }),
92
+ };
93
+ }
94
+ /**
95
+ * Normalize role to valid value
96
+ */
97
+ function normalizeRole(role) {
98
+ const validRoles = ['system', 'user', 'assistant', 'tool'];
99
+ const normalized = String(role || 'user').toLowerCase();
100
+ if (validRoles.includes(normalized)) {
101
+ return normalized;
102
+ }
103
+ // Default to 'user' for unknown roles
104
+ return 'user';
105
+ }
106
+ //# sourceMappingURL=load.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"load.js","sourceRoot":"","sources":["../../src/core/load.ts"],"names":[],"mappings":";;AAMA,wCA2BC;AAKD,kDAiCC;AAvED,0CAAuC;AAGvC;;GAEG;AACI,KAAK,UAAU,cAAc,CAAC,IAAY,EAAE,WAAoB;IACrE,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAChC,OAAO,mBAAmB,CAAC,GAAG,EAAE,WAAW,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAK,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACvD,MAAM,IAAI,KAAK,CAAC,8BAA8B,IAAI,EAAE,CAAC,CAAC;QACxD,CAAC;QACD,IAAI,KAAK,YAAY,WAAW,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC9C,OAAO;gBACL,cAAc,EAAE,KAAK;gBACrB,QAAQ,EAAE,EAAE;gBACZ,QAAQ,EAAE,OAAO;aAClB,CAAC;QACJ,CAAC;QACD,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YAC3E,MAAM,OAAO,GAAG,MAAM,IAAA,mBAAQ,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAC9C,OAAO;gBACL,cAAc,EAAE,KAAK;gBACrB,QAAQ,EAAE,EAAE;gBACZ,QAAQ,EAAE,OAAO;aAClB,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,GAAQ,EAAE,WAAoB;IAChE,wDAAwD;IACxD,IAAI,QAAe,CAAC;IACpB,IAAI,cAAsB,CAAC;IAE3B,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACvB,QAAQ,GAAG,GAAG,CAAC;QACf,cAAc,GAAG,KAAK,CAAC;IACzB,CAAC;SAAM,IAAI,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC9C,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;QACxB,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,KAAK,CAAC;IAC/C,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,KAAK,CAAC,oEAAoE,CAAC,CAAC;IACxF,CAAC;IAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAC1D,CAAC;IAED,2BAA2B;IAC3B,IAAI,WAAW,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;QACnC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC;IAC5C,CAAC;IAED,yBAAyB;IACzB,MAAM,UAAU,GAAwB,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;QACrE,OAAO,gBAAgB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,OAAO;QACL,cAAc;QACd,QAAQ,EAAE,UAAU;KACrB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,GAAQ,EAAE,QAAgB;IAClD,6BAA6B;IAC7B,MAAM,GAAG,GAAG,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE7D,6BAA6B;IAC7B,MAAM,IAAI,GAAG,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAErC,oCAAoC;IACpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;IAE1F,8DAA8D;IAC9D,MAAM,MAAM,GAAG,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE3F,OAAO;QACL,GAAG;QACH,IAAI;QACJ,OAAO;QACP,MAAM;QACN,GAAG,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC;QAC7B,GAAG,CAAC,GAAG,CAAC,UAAU,IAAI,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC;QACrD,GAAG,CAAC,GAAG,CAAC,QAAQ,IAAI,EAAE,QAAQ,EAAE,GAAG,CAAC,QAAQ,EAAE,CAAC;KAChD,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,IAAS;IAC9B,MAAM,UAAU,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IAC3D,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;IAExD,IAAI,UAAU,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;QACpC,OAAO,UAAsD,CAAC;IAChE,CAAC;IAED,sCAAsC;IACtC,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Text normalization and similarity utilities
3
+ */
4
+ /**
5
+ * Normalize text for comparison
6
+ * - Lowercase
7
+ * - Trim whitespace
8
+ * - Collapse multiple spaces
9
+ * - Remove special characters
10
+ */
11
+ export declare function normalizeText(text: string): string;
12
+ /**
13
+ * Tokenize text into words
14
+ */
15
+ export declare function tokenize(text: string): string[];
16
+ /**
17
+ * Create a signature from first N tokens
18
+ * Used for bucketing to enable O(n) similarity detection
19
+ */
20
+ export declare function makeSignature(tokens: string[], n?: number): string;
21
+ /**
22
+ * Calculate Jaccard similarity between two sets
23
+ * Returns value between 0 (no overlap) and 1 (identical)
24
+ */
25
+ export declare function jaccardSimilarity(a: Set<string>, b: Set<string>): number;
26
+ /**
27
+ * Check if two token sets are similar above threshold
28
+ */
29
+ export declare function areSimilar(tokens1: string[], tokens2: string[], threshold?: number): boolean;
30
+ //# sourceMappingURL=normalize.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"normalize.d.ts","sourceRoot":"","sources":["../../src/core/normalize.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;;;;GAMG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMlD;AAED;;GAEG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAI/C;AAED;;;GAGG;AACH,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,SAAI,GAAG,MAAM,CAE7D;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,MAAM,CAYxE;AAED;;GAEG;AACH,wBAAgB,UAAU,CACxB,OAAO,EAAE,MAAM,EAAE,EACjB,OAAO,EAAE,MAAM,EAAE,EACjB,SAAS,SAAO,GACf,OAAO,CAIT"}
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+ /**
3
+ * Text normalization and similarity utilities
4
+ */
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.normalizeText = normalizeText;
7
+ exports.tokenize = tokenize;
8
+ exports.makeSignature = makeSignature;
9
+ exports.jaccardSimilarity = jaccardSimilarity;
10
+ exports.areSimilar = areSimilar;
11
+ /**
12
+ * Normalize text for comparison
13
+ * - Lowercase
14
+ * - Trim whitespace
15
+ * - Collapse multiple spaces
16
+ * - Remove special characters
17
+ */
18
+ function normalizeText(text) {
19
+ return text
20
+ .toLowerCase()
21
+ .trim()
22
+ .replace(/\s+/g, ' ')
23
+ .replace(/[^\w\s]/g, '');
24
+ }
25
+ /**
26
+ * Tokenize text into words
27
+ */
28
+ function tokenize(text) {
29
+ return text
30
+ .split(/\s+/)
31
+ .filter(token => token.length > 0);
32
+ }
33
+ /**
34
+ * Create a signature from first N tokens
35
+ * Used for bucketing to enable O(n) similarity detection
36
+ */
37
+ function makeSignature(tokens, n = 8) {
38
+ return tokens.slice(0, n).join(' ');
39
+ }
40
+ /**
41
+ * Calculate Jaccard similarity between two sets
42
+ * Returns value between 0 (no overlap) and 1 (identical)
43
+ */
44
+ function jaccardSimilarity(a, b) {
45
+ if (a.size === 0 && b.size === 0) {
46
+ return 1.0;
47
+ }
48
+ if (a.size === 0 || b.size === 0) {
49
+ return 0.0;
50
+ }
51
+ const intersection = new Set([...a].filter(x => b.has(x)));
52
+ const union = new Set([...a, ...b]);
53
+ return intersection.size / union.size;
54
+ }
55
+ /**
56
+ * Check if two token sets are similar above threshold
57
+ */
58
+ function areSimilar(tokens1, tokens2, threshold = 0.65) {
59
+ const set1 = new Set(tokens1);
60
+ const set2 = new Set(tokens2);
61
+ return jaccardSimilarity(set1, set2) >= threshold;
62
+ }
63
+ //# sourceMappingURL=normalize.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"normalize.js","sourceRoot":"","sources":["../../src/core/normalize.ts"],"names":[],"mappings":";AAAA;;GAEG;;AASH,sCAMC;AAKD,4BAIC;AAMD,sCAEC;AAMD,8CAYC;AAKD,gCAQC;AA7DD;;;;;;GAMG;AACH,SAAgB,aAAa,CAAC,IAAY;IACxC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,IAAI,EAAE;SACN,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;AAC7B,CAAC;AAED;;GAEG;AACH,SAAgB,QAAQ,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,KAAK,CAAC,KAAK,CAAC;SACZ,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACvC,CAAC;AAED;;;GAGG;AACH,SAAgB,aAAa,CAAC,MAAgB,EAAE,CAAC,GAAG,CAAC;IACnD,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACtC,CAAC;AAED;;;GAGG;AACH,SAAgB,iBAAiB,CAAC,CAAc,EAAE,CAAc;IAC9D,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC;IACb,CAAC;IACD,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACjC,OAAO,GAAG,CAAC;IACb,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3D,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IAEpC,OAAO,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAgB,UAAU,CACxB,OAAiB,EACjB,OAAiB,EACjB,SAAS,GAAG,IAAI;IAEhB,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9B,OAAO,iBAAiB,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,SAAS,CAAC;AACpD,CAAC"}