@memberjunction/query-gen 0.0.1 → 2.126.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/CHANGELOG.md +34 -0
- package/COORDINATOR.md +768 -0
- package/IMPLEMENTATION_PLAN.md +1753 -0
- package/LLM_ENTITY_GROUPING_PLAN.md +977 -0
- package/README.md +675 -29
- package/dist/cli/commands/export.d.ts +15 -0
- package/dist/cli/commands/export.d.ts.map +1 -0
- package/dist/cli/commands/export.js +178 -0
- package/dist/cli/commands/export.js.map +1 -0
- package/dist/cli/commands/generate.d.ts +19 -0
- package/dist/cli/commands/generate.d.ts.map +1 -0
- package/dist/cli/commands/generate.js +282 -0
- package/dist/cli/commands/generate.js.map +1 -0
- package/dist/cli/commands/validate.d.ts +17 -0
- package/dist/cli/commands/validate.d.ts.map +1 -0
- package/dist/cli/commands/validate.js +193 -0
- package/dist/cli/commands/validate.js.map +1 -0
- package/dist/cli/config.d.ts +51 -0
- package/dist/cli/config.d.ts.map +1 -0
- package/dist/cli/config.js +142 -0
- package/dist/cli/config.js.map +1 -0
- package/dist/cli/index.d.ts +13 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +57 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/EntityGrouper.d.ts +74 -0
- package/dist/core/EntityGrouper.d.ts.map +1 -0
- package/dist/core/EntityGrouper.js +246 -0
- package/dist/core/EntityGrouper.js.map +1 -0
- package/dist/core/MetadataExporter.d.ts +59 -0
- package/dist/core/MetadataExporter.d.ts.map +1 -0
- package/dist/core/MetadataExporter.js +151 -0
- package/dist/core/MetadataExporter.js.map +1 -0
- package/dist/core/QueryDatabaseWriter.d.ts +50 -0
- package/dist/core/QueryDatabaseWriter.d.ts.map +1 -0
- package/dist/core/QueryDatabaseWriter.js +152 -0
- package/dist/core/QueryDatabaseWriter.js.map +1 -0
- package/dist/core/QueryFixer.d.ts +48 -0
- package/dist/core/QueryFixer.d.ts.map +1 -0
- package/dist/core/QueryFixer.js +115 -0
- package/dist/core/QueryFixer.js.map +1 -0
- package/dist/core/QueryRefiner.d.ts +94 -0
- package/dist/core/QueryRefiner.d.ts.map +1 -0
- package/dist/core/QueryRefiner.js +267 -0
- package/dist/core/QueryRefiner.js.map +1 -0
- package/dist/core/QueryTester.d.ts +70 -0
- package/dist/core/QueryTester.d.ts.map +1 -0
- package/dist/core/QueryTester.js +243 -0
- package/dist/core/QueryTester.js.map +1 -0
- package/dist/core/QueryWriter.d.ts +57 -0
- package/dist/core/QueryWriter.d.ts.map +1 -0
- package/dist/core/QueryWriter.js +184 -0
- package/dist/core/QueryWriter.js.map +1 -0
- package/dist/core/QuestionGenerator.d.ts +58 -0
- package/dist/core/QuestionGenerator.d.ts.map +1 -0
- package/dist/core/QuestionGenerator.js +145 -0
- package/dist/core/QuestionGenerator.js.map +1 -0
- package/dist/data/schema.d.ts +230 -0
- package/dist/data/schema.d.ts.map +1 -0
- package/dist/data/schema.js +6 -0
- package/dist/data/schema.js.map +1 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +77 -0
- package/dist/index.js.map +1 -0
- package/dist/prompts/PromptNames.d.ts +32 -0
- package/dist/prompts/PromptNames.d.ts.map +1 -0
- package/dist/prompts/PromptNames.js +35 -0
- package/dist/prompts/PromptNames.js.map +1 -0
- package/dist/utils/category-builder.d.ts +28 -0
- package/dist/utils/category-builder.d.ts.map +1 -0
- package/dist/utils/category-builder.js +90 -0
- package/dist/utils/category-builder.js.map +1 -0
- package/dist/utils/entity-helpers.d.ts +49 -0
- package/dist/utils/entity-helpers.d.ts.map +1 -0
- package/dist/utils/entity-helpers.js +189 -0
- package/dist/utils/entity-helpers.js.map +1 -0
- package/dist/utils/error-handlers.d.ts +19 -0
- package/dist/utils/error-handlers.d.ts.map +1 -0
- package/dist/utils/error-handlers.js +41 -0
- package/dist/utils/error-handlers.js.map +1 -0
- package/dist/utils/graph-helpers.d.ts +51 -0
- package/dist/utils/graph-helpers.d.ts.map +1 -0
- package/dist/utils/graph-helpers.js +82 -0
- package/dist/utils/graph-helpers.js.map +1 -0
- package/dist/utils/prompt-helpers.d.ts +25 -0
- package/dist/utils/prompt-helpers.d.ts.map +1 -0
- package/dist/utils/prompt-helpers.js +66 -0
- package/dist/utils/prompt-helpers.js.map +1 -0
- package/dist/utils/query-helpers.d.ts +23 -0
- package/dist/utils/query-helpers.d.ts.map +1 -0
- package/dist/utils/query-helpers.js +34 -0
- package/dist/utils/query-helpers.js.map +1 -0
- package/dist/utils/user-helpers.d.ts +15 -0
- package/dist/utils/user-helpers.d.ts.map +1 -0
- package/dist/utils/user-helpers.js +32 -0
- package/dist/utils/user-helpers.js.map +1 -0
- package/dist/vectors/EmbeddingService.d.ts +58 -0
- package/dist/vectors/EmbeddingService.d.ts.map +1 -0
- package/dist/vectors/EmbeddingService.js +90 -0
- package/dist/vectors/EmbeddingService.js.map +1 -0
- package/dist/vectors/SimilaritySearch.d.ts +51 -0
- package/dist/vectors/SimilaritySearch.d.ts.map +1 -0
- package/dist/vectors/SimilaritySearch.js +85 -0
- package/dist/vectors/SimilaritySearch.js.map +1 -0
- package/docs/API.md +1040 -0
- package/docs/ARCHITECTURE.md +1120 -0
- package/examples/advanced-usage.ts +401 -0
- package/examples/basic-usage.ts +285 -0
- package/package.json +48 -6
- package/src/cli/commands/export.ts +173 -0
- package/src/cli/commands/generate.ts +330 -0
- package/src/cli/commands/validate.ts +185 -0
- package/src/cli/config.ts +203 -0
- package/src/cli/index.ts +63 -0
- package/src/core/EntityGrouper.ts +318 -0
- package/src/core/MetadataExporter.ts +148 -0
- package/src/core/QueryDatabaseWriter.ts +187 -0
- package/src/core/QueryFixer.ts +153 -0
- package/src/core/QueryRefiner.ts +382 -0
- package/src/core/QueryTester.ts +264 -0
- package/src/core/QueryWriter.ts +239 -0
- package/src/core/QuestionGenerator.ts +199 -0
- package/src/data/golden-queries.json +1371 -0
- package/src/data/schema.ts +252 -0
- package/src/index.ts +49 -0
- package/src/prompts/PromptNames.ts +36 -0
- package/src/utils/category-builder.ts +97 -0
- package/src/utils/entity-helpers.ts +203 -0
- package/src/utils/error-handlers.ts +41 -0
- package/src/utils/graph-helpers.ts +99 -0
- package/src/utils/prompt-helpers.ts +79 -0
- package/src/utils/query-helpers.ts +32 -0
- package/src/utils/user-helpers.ts +39 -0
- package/src/vectors/EmbeddingService.ts +109 -0
- package/src/vectors/SimilaritySearch.ts +108 -0
- package/tsconfig.json +39 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Graph visualization and entity metadata formatting utilities
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { EntityInfo } from '@memberjunction/core';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Entity metadata formatted for LLM prompts (concise version)
|
|
9
|
+
*/
|
|
10
|
+
export interface EntityMetadataForPrompt {
|
|
11
|
+
Name: string;
|
|
12
|
+
Description: string;
|
|
13
|
+
SchemaName: string;
|
|
14
|
+
FieldCount: number;
|
|
15
|
+
RelatedEntities: Array<{ name: string; type: string }>;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Generates a simple text-based relationship graph for LLM prompts
|
|
20
|
+
*
|
|
21
|
+
* Output format:
|
|
22
|
+
* ```
|
|
23
|
+
* Customers: → Orders, → Addresses
|
|
24
|
+
* Orders: → OrderDetails, → Customers
|
|
25
|
+
* Products: → OrderDetails, → Categories
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
export function generateRelationshipGraph(entities: EntityInfo[]): string {
|
|
29
|
+
const lines: string[] = [];
|
|
30
|
+
|
|
31
|
+
for (const entity of entities) {
|
|
32
|
+
if (entity.RelatedEntities.length === 0) {
|
|
33
|
+
lines.push(`${entity.Name}: (no relationships)`);
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const relations = entity.RelatedEntities
|
|
38
|
+
.map(rel => `→ ${rel.RelatedEntity}`)
|
|
39
|
+
.join(', ');
|
|
40
|
+
|
|
41
|
+
lines.push(`${entity.Name}: ${relations}`);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return lines.join('\n');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Generates a Mermaid diagram for richer visualization
|
|
49
|
+
*
|
|
50
|
+
* This can be used in prompts that support Mermaid syntax.
|
|
51
|
+
* Output format:
|
|
52
|
+
* ```mermaid
|
|
53
|
+
* graph LR
|
|
54
|
+
* Customers[Customers] --> Orders[Orders]
|
|
55
|
+
* Orders[Orders] --> OrderDetails[OrderDetails]
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
export function generateMermaidDiagram(entities: EntityInfo[]): string {
|
|
59
|
+
const lines = ['graph LR'];
|
|
60
|
+
const processedPairs = new Set<string>();
|
|
61
|
+
|
|
62
|
+
for (const entity of entities) {
|
|
63
|
+
const safeEntityName = entity.Name.replace(/\s/g, '_');
|
|
64
|
+
|
|
65
|
+
for (const rel of entity.RelatedEntities) {
|
|
66
|
+
const safeRelatedName = rel.RelatedEntity.replace(/\s/g, '_');
|
|
67
|
+
const pairKey = [safeEntityName, safeRelatedName].sort().join('|');
|
|
68
|
+
|
|
69
|
+
if (!processedPairs.has(pairKey)) {
|
|
70
|
+
lines.push(` ${safeEntityName}[${entity.Name}] --> ${safeRelatedName}[${rel.RelatedEntity}]`);
|
|
71
|
+
processedPairs.add(pairKey);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return lines.join('\n');
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Formats entity metadata for LLM prompt (concise version with key info)
|
|
81
|
+
*
|
|
82
|
+
* Extracts only the essential information needed for entity grouping:
|
|
83
|
+
* - Entity name and description
|
|
84
|
+
* - Schema name
|
|
85
|
+
* - Field count (as a proxy for data richness)
|
|
86
|
+
* - Related entities with relationship types
|
|
87
|
+
*/
|
|
88
|
+
export function formatEntitiesForPrompt(entities: EntityInfo[]): EntityMetadataForPrompt[] {
|
|
89
|
+
return entities.map(entity => ({
|
|
90
|
+
Name: entity.Name,
|
|
91
|
+
Description: entity.Description || 'No description available',
|
|
92
|
+
SchemaName: entity.SchemaName || 'dbo',
|
|
93
|
+
FieldCount: entity.Fields.length,
|
|
94
|
+
RelatedEntities: entity.RelatedEntities.map(rel => ({
|
|
95
|
+
name: rel.RelatedEntity,
|
|
96
|
+
type: rel.Type
|
|
97
|
+
}))
|
|
98
|
+
}));
|
|
99
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt execution helpers with model/vendor override support
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { AIPromptParams } from '@memberjunction/ai-core-plus';
|
|
6
|
+
import { AIPromptRunner } from '@memberjunction/ai-prompts';
|
|
7
|
+
import { UserInfo } from '@memberjunction/core';
|
|
8
|
+
import { AIPromptEntityExtended } from '@memberjunction/core-entities';
|
|
9
|
+
import { AIEngine } from '@memberjunction/aiengine';
|
|
10
|
+
import { QueryGenConfig } from '../cli/config';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Execute a prompt with optional model/vendor overrides from QueryGenConfig
|
|
14
|
+
*
|
|
15
|
+
* Uses AIPromptParams.override parameter to apply runtime model/vendor overrides.
|
|
16
|
+
* If config specifies modelOverride or vendorOverride, looks up their IDs from
|
|
17
|
+
* the AIEngine cache and passes them to the prompt execution.
|
|
18
|
+
*
|
|
19
|
+
* @param prompt - The AI prompt to execute (from AIEngine.Instance.Prompts)
|
|
20
|
+
* @param data - Data to pass to the prompt template
|
|
21
|
+
* @param contextUser - User context for server-side operations
|
|
22
|
+
* @param config - QueryGen configuration (for model/vendor overrides)
|
|
23
|
+
* @returns Promise resolving to the prompt result
|
|
24
|
+
*/
|
|
25
|
+
export async function executePromptWithOverrides<T>(
|
|
26
|
+
prompt: AIPromptEntityExtended,
|
|
27
|
+
data: Record<string, unknown>,
|
|
28
|
+
contextUser: UserInfo,
|
|
29
|
+
config: QueryGenConfig
|
|
30
|
+
): Promise<{ success: boolean; result?: T; errorMessage?: string }> {
|
|
31
|
+
const promptParams = new AIPromptParams();
|
|
32
|
+
promptParams.prompt = prompt;
|
|
33
|
+
promptParams.data = data;
|
|
34
|
+
promptParams.contextUser = contextUser;
|
|
35
|
+
promptParams.skipValidation = false;
|
|
36
|
+
|
|
37
|
+
// Apply model/vendor overrides using built-in AIPromptParams.override
|
|
38
|
+
if (config.modelOverride || config.vendorOverride) {
|
|
39
|
+
const overrideIds = resolveModelVendorOverrides(config);
|
|
40
|
+
if (overrideIds.modelId || overrideIds.vendorId) {
|
|
41
|
+
promptParams.override = overrideIds;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const runner = new AIPromptRunner();
|
|
46
|
+
return await runner.ExecutePrompt<T>(promptParams);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Resolve model/vendor names to IDs for AIPromptParams.override
|
|
51
|
+
*
|
|
52
|
+
* Looks up model and vendor by name in the AIEngine cache (already loaded).
|
|
53
|
+
*
|
|
54
|
+
* @param config - QueryGen configuration with modelOverride/vendorOverride names
|
|
55
|
+
* @returns Object with modelId and/or vendorId, or empty object if none found
|
|
56
|
+
*/
|
|
57
|
+
function resolveModelVendorOverrides(
|
|
58
|
+
config: QueryGenConfig
|
|
59
|
+
): { modelId?: string; vendorId?: string } {
|
|
60
|
+
const result: { modelId?: string; vendorId?: string } = {};
|
|
61
|
+
|
|
62
|
+
// Look up model ID from AIEngine cache if modelOverride is set
|
|
63
|
+
if (config.modelOverride) {
|
|
64
|
+
const model = AIEngine.Instance.Models.find(m => m.Name === config.modelOverride);
|
|
65
|
+
if (model && model.ID) {
|
|
66
|
+
result.modelId = model.ID;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Look up vendor ID from AIEngine cache if vendorOverride is set
|
|
71
|
+
if (config.vendorOverride) {
|
|
72
|
+
const vendor = AIEngine.Instance.Vendors.find(v => v.Name === config.vendorOverride);
|
|
73
|
+
if (vendor && vendor.ID) {
|
|
74
|
+
result.vendorId = vendor.ID;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return result;
|
|
79
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Query generation utility functions
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { BusinessQuestion } from '../data/schema';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Generate a query name from a business question
|
|
9
|
+
*
|
|
10
|
+
* Removes question marks, filters short words, capitalizes each word,
|
|
11
|
+
* and limits to 5 words for a concise query name.
|
|
12
|
+
*
|
|
13
|
+
* @param question - The business question to convert
|
|
14
|
+
* @returns A formatted query name
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* generateQueryName({ userQuestion: "What are the top customers by revenue?" })
|
|
18
|
+
* // Returns: "What Are Top Customers Revenue"
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* generateQueryName({ userQuestion: "Show me all active users" })
|
|
22
|
+
* // Returns: "Show All Active Users"
|
|
23
|
+
*/
|
|
24
|
+
export function generateQueryName(question: BusinessQuestion): string {
|
|
25
|
+
return question.userQuestion
|
|
26
|
+
.replace(/\?/g, '')
|
|
27
|
+
.split(' ')
|
|
28
|
+
.filter(word => word.length > 2)
|
|
29
|
+
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
|
30
|
+
.slice(0, 5)
|
|
31
|
+
.join(' ');
|
|
32
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* User helper utilities for QueryGen CLI operations
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { UserInfo } from '@memberjunction/core';
|
|
6
|
+
import { UserCache } from '@memberjunction/sqlserver-dataprovider';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Get the system user from UserCache
|
|
10
|
+
*
|
|
11
|
+
* The System user is populated in the UserCache when the database provider is initialized.
|
|
12
|
+
* This user is used for CLI operations where no specific user context exists.
|
|
13
|
+
*
|
|
14
|
+
* @returns The System UserInfo object from the cache
|
|
15
|
+
* @throws Error if System user is not found in cache or doesn't have Developer role
|
|
16
|
+
*/
|
|
17
|
+
export function getSystemUser(): UserInfo {
|
|
18
|
+
const sysUser = UserCache.Instance.UserByName("System", false);
|
|
19
|
+
if (!sysUser) {
|
|
20
|
+
throw new Error(
|
|
21
|
+
"System user not found in cache. Ensure the database provider is initialized " +
|
|
22
|
+
"before running QueryGen commands (e.g., via 'mj querygen' which initializes the provider)."
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Check if the System user has the Developer role
|
|
27
|
+
const hasDeveloperRole = sysUser.UserRoles && sysUser.UserRoles.some(
|
|
28
|
+
userRole => userRole.Role.trim().toLowerCase() === 'developer'
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
if (!hasDeveloperRole) {
|
|
32
|
+
throw new Error(
|
|
33
|
+
"System user does not have the 'Developer' role. " +
|
|
34
|
+
"The System user must have the Developer role to perform QueryGen operations."
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return sysUser;
|
|
39
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* EmbeddingService - Generates embeddings for queries and golden queries
|
|
3
|
+
*
|
|
4
|
+
* Wraps AIEngine embedding functionality for QueryGen use cases.
|
|
5
|
+
* Generates embeddings for multiple fields (name, userQuestion, description, technicalDescription).
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { AIEngine } from '@memberjunction/aiengine';
|
|
9
|
+
import { QueryEmbeddings, GoldenQuery, EmbeddedGoldenQuery } from '../data/schema';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Service for generating embeddings using AI Engine
|
|
13
|
+
*/
|
|
14
|
+
export class EmbeddingService {
|
|
15
|
+
private readonly modelName: string;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Create an EmbeddingService with the specified embedding model
|
|
19
|
+
*
|
|
20
|
+
* @param modelName - Name of the embedding model to use (default: 'text-embedding-3-small')
|
|
21
|
+
*/
|
|
22
|
+
constructor(modelName: string = 'text-embedding-3-small') {
|
|
23
|
+
this.modelName = modelName;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Embed a single query by generating embeddings for all its fields
|
|
28
|
+
*
|
|
29
|
+
* Generates separate embeddings for:
|
|
30
|
+
* - userQuestion: Natural language question
|
|
31
|
+
* - description: High-level description
|
|
32
|
+
* - technicalDescription: Technical implementation details
|
|
33
|
+
*
|
|
34
|
+
* @param query - Query to embed (partial structure with text fields)
|
|
35
|
+
* @returns Embeddings for all fields
|
|
36
|
+
*/
|
|
37
|
+
async embedQuery(query: {
|
|
38
|
+
userQuestion: string;
|
|
39
|
+
description: string;
|
|
40
|
+
technicalDescription: string;
|
|
41
|
+
}): Promise<QueryEmbeddings> {
|
|
42
|
+
const aiEngine = AIEngine.Instance;
|
|
43
|
+
|
|
44
|
+
// Generate embeddings for each field in parallel
|
|
45
|
+
const [userQuestionResult, descResult, techDescResult] = await Promise.all([
|
|
46
|
+
aiEngine.EmbedTextLocal(query.userQuestion),
|
|
47
|
+
aiEngine.EmbedTextLocal(query.description),
|
|
48
|
+
aiEngine.EmbedTextLocal(query.technicalDescription),
|
|
49
|
+
]);
|
|
50
|
+
|
|
51
|
+
return {
|
|
52
|
+
userQuestion: userQuestionResult.result.vector,
|
|
53
|
+
description: descResult.result.vector,
|
|
54
|
+
technicalDescription: techDescResult.result.vector,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Embed all golden queries for few-shot learning
|
|
60
|
+
*
|
|
61
|
+
* Golden queries are example queries that serve as few-shot learning examples.
|
|
62
|
+
* This method embeds all fields of all golden queries for similarity search.
|
|
63
|
+
* Note: name field is excluded from embeddings as it's not available during query generation.
|
|
64
|
+
*
|
|
65
|
+
* @param goldenQueries - Array of golden queries to embed
|
|
66
|
+
* @returns Array of golden queries with their embeddings
|
|
67
|
+
*/
|
|
68
|
+
async embedGoldenQueries(goldenQueries: GoldenQuery[]): Promise<EmbeddedGoldenQuery[]> {
|
|
69
|
+
const embedded: EmbeddedGoldenQuery[] = [];
|
|
70
|
+
|
|
71
|
+
for (const query of goldenQueries) {
|
|
72
|
+
const embeddings = await this.embedQuery({
|
|
73
|
+
userQuestion: query.userQuestion,
|
|
74
|
+
description: query.description,
|
|
75
|
+
technicalDescription: query.technicalDescription,
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
embedded.push({
|
|
79
|
+
query,
|
|
80
|
+
embeddings,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return embedded;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Embed multiple queries in batch
|
|
89
|
+
*
|
|
90
|
+
* @param queries - Array of queries to embed
|
|
91
|
+
* @returns Array of embeddings corresponding to input queries
|
|
92
|
+
*/
|
|
93
|
+
async embedQueries(
|
|
94
|
+
queries: Array<{
|
|
95
|
+
userQuestion: string;
|
|
96
|
+
description: string;
|
|
97
|
+
technicalDescription: string;
|
|
98
|
+
}>
|
|
99
|
+
): Promise<QueryEmbeddings[]> {
|
|
100
|
+
const embeddings: QueryEmbeddings[] = [];
|
|
101
|
+
|
|
102
|
+
for (const query of queries) {
|
|
103
|
+
const embedding = await this.embedQuery(query);
|
|
104
|
+
embeddings.push(embedding);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return embeddings;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SimilaritySearch - Finds similar golden queries using weighted cosine similarity
|
|
3
|
+
*
|
|
4
|
+
* Implements weighted similarity search across multiple fields (name, userQuestion,
|
|
5
|
+
* description, technicalDescription) to find the most relevant few-shot examples.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { QueryEmbeddings, EmbeddedGoldenQuery, SimilarQuery } from '../data/schema';
|
|
9
|
+
import { SimpleVectorService } from '@memberjunction/ai-vectors-memory';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Field weights for weighted similarity calculation
|
|
13
|
+
* Note: name is excluded since queries don't have names until after generation
|
|
14
|
+
*/
|
|
15
|
+
interface FieldWeights {
|
|
16
|
+
userQuestion: number;
|
|
17
|
+
description: number;
|
|
18
|
+
technicalDescription: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* SimilaritySearch class
|
|
23
|
+
* Finds most similar golden queries using weighted cosine similarity across multiple fields
|
|
24
|
+
*/
|
|
25
|
+
export class SimilaritySearch extends SimpleVectorService {
|
|
26
|
+
/**
|
|
27
|
+
* Weights for each field in similarity calculation
|
|
28
|
+
* Total weights sum to 1.0
|
|
29
|
+
*
|
|
30
|
+
* Weight distribution prioritizes technical specifications:
|
|
31
|
+
* - userQuestion: 0.20 (less important - natural language is variable)
|
|
32
|
+
* - description: 0.40 (high-level business logic matching)
|
|
33
|
+
* - technicalDescription: 0.40 (technical implementation details)
|
|
34
|
+
*/
|
|
35
|
+
private readonly weights: FieldWeights = {
|
|
36
|
+
userQuestion: 0.20,
|
|
37
|
+
description: 0.40,
|
|
38
|
+
technicalDescription: 0.40
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Find similar golden queries using weighted cosine similarity
|
|
43
|
+
*
|
|
44
|
+
* Calculates similarity for each field separately, then combines with weights.
|
|
45
|
+
* ALWAYS returns topK results, even if below threshold (threshold is informational).
|
|
46
|
+
*
|
|
47
|
+
* @param queryEmbeddings - Embeddings for the user's query (one per field)
|
|
48
|
+
* @param goldenEmbeddings - Array of golden queries with their embeddings
|
|
49
|
+
* @param topK - Number of most similar queries to return (default: 5)
|
|
50
|
+
* @returns Array of top-K most similar golden queries with scores
|
|
51
|
+
*/
|
|
52
|
+
async findSimilarQueries(
|
|
53
|
+
queryEmbeddings: QueryEmbeddings,
|
|
54
|
+
goldenEmbeddings: EmbeddedGoldenQuery[],
|
|
55
|
+
topK: number = 5
|
|
56
|
+
): Promise<SimilarQuery[]> {
|
|
57
|
+
// Calculate weighted similarity for each golden query
|
|
58
|
+
const similarities = goldenEmbeddings.map(golden => {
|
|
59
|
+
const fieldScores = this.calculateFieldSimilarities(queryEmbeddings, golden.embeddings);
|
|
60
|
+
const weightedScore = this.calculateWeightedScore(fieldScores);
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
query: golden.query,
|
|
64
|
+
similarity: weightedScore,
|
|
65
|
+
fieldScores
|
|
66
|
+
};
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
// Sort by similarity (highest first) and return top K
|
|
70
|
+
return this.selectTopK(similarities, topK);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Calculate cosine similarity for each field separately
|
|
75
|
+
* Note: name field excluded as queries don't have names during generation
|
|
76
|
+
*/
|
|
77
|
+
private calculateFieldSimilarities(
|
|
78
|
+
queryEmbeddings: QueryEmbeddings,
|
|
79
|
+
goldenEmbeddings: QueryEmbeddings
|
|
80
|
+
): SimilarQuery['fieldScores'] {
|
|
81
|
+
return {
|
|
82
|
+
userQuestionSim: this.CosineSimilarity(queryEmbeddings.userQuestion, goldenEmbeddings.userQuestion),
|
|
83
|
+
descSim: this.CosineSimilarity(queryEmbeddings.description, goldenEmbeddings.description),
|
|
84
|
+
techDescSim: this.CosineSimilarity(queryEmbeddings.technicalDescription, goldenEmbeddings.technicalDescription)
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Calculate weighted sum of field similarities
|
|
90
|
+
* Note: name field excluded, weights adjusted accordingly
|
|
91
|
+
*/
|
|
92
|
+
private calculateWeightedScore(fieldScores: SimilarQuery['fieldScores']): number {
|
|
93
|
+
return (
|
|
94
|
+
fieldScores.userQuestionSim * this.weights.userQuestion +
|
|
95
|
+
fieldScores.descSim * this.weights.description +
|
|
96
|
+
fieldScores.techDescSim * this.weights.technicalDescription
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Select top K results sorted by similarity
|
|
102
|
+
*/
|
|
103
|
+
private selectTopK(similarities: SimilarQuery[], topK: number): SimilarQuery[] {
|
|
104
|
+
return similarities
|
|
105
|
+
.sort((a, b) => b.similarity - a.similarity)
|
|
106
|
+
.slice(0, topK);
|
|
107
|
+
}
|
|
108
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"lib": ["ES2022"],
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"declaration": true,
|
|
9
|
+
"declarationMap": true,
|
|
10
|
+
"sourceMap": true,
|
|
11
|
+
"strict": true,
|
|
12
|
+
"esModuleInterop": true,
|
|
13
|
+
"skipLibCheck": true,
|
|
14
|
+
"forceConsistentCasingInFileNames": true,
|
|
15
|
+
"resolveJsonModule": true,
|
|
16
|
+
"moduleResolution": "node",
|
|
17
|
+
"noImplicitAny": true,
|
|
18
|
+
"strictNullChecks": true,
|
|
19
|
+
"strictFunctionTypes": true,
|
|
20
|
+
"strictBindCallApply": true,
|
|
21
|
+
"strictPropertyInitialization": true,
|
|
22
|
+
"noImplicitThis": true,
|
|
23
|
+
"alwaysStrict": true,
|
|
24
|
+
"noUnusedLocals": false,
|
|
25
|
+
"noUnusedParameters": false,
|
|
26
|
+
"noImplicitReturns": true,
|
|
27
|
+
"noFallthroughCasesInSwitch": true,
|
|
28
|
+
"allowSyntheticDefaultImports": true
|
|
29
|
+
},
|
|
30
|
+
"include": [
|
|
31
|
+
"src/**/*"
|
|
32
|
+
],
|
|
33
|
+
"exclude": [
|
|
34
|
+
"node_modules",
|
|
35
|
+
"dist",
|
|
36
|
+
"**/*.spec.ts",
|
|
37
|
+
"**/*.test.ts"
|
|
38
|
+
]
|
|
39
|
+
}
|