@memberjunction/query-gen 3.4.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/.turbo/turbo-build.log +2 -2
  2. package/CHANGELOG.md +54 -0
  3. package/README.md +30 -2
  4. package/dist/cli/commands/export.js +35 -65
  5. package/dist/cli/commands/export.js.map +1 -1
  6. package/dist/cli/commands/generate.d.ts.map +1 -1
  7. package/dist/cli/commands/generate.js +114 -115
  8. package/dist/cli/commands/generate.js.map +1 -1
  9. package/dist/cli/commands/validate.js +40 -70
  10. package/dist/cli/commands/validate.js.map +1 -1
  11. package/dist/cli/config.d.ts +1 -0
  12. package/dist/cli/config.d.ts.map +1 -1
  13. package/dist/cli/config.js +8 -7
  14. package/dist/cli/config.js.map +1 -1
  15. package/dist/cli/index.js +11 -10
  16. package/dist/cli/index.js.map +1 -1
  17. package/dist/core/EntityGrouper.d.ts +5 -2
  18. package/dist/core/EntityGrouper.d.ts.map +1 -1
  19. package/dist/core/EntityGrouper.js +94 -39
  20. package/dist/core/EntityGrouper.js.map +1 -1
  21. package/dist/core/MetadataExporter.d.ts +2 -2
  22. package/dist/core/MetadataExporter.js +9 -36
  23. package/dist/core/MetadataExporter.js.map +1 -1
  24. package/dist/core/QueryDatabaseWriter.d.ts +1 -1
  25. package/dist/core/QueryDatabaseWriter.js +8 -12
  26. package/dist/core/QueryDatabaseWriter.js.map +1 -1
  27. package/dist/core/QueryFixer.d.ts +2 -2
  28. package/dist/core/QueryFixer.js +11 -17
  29. package/dist/core/QueryFixer.js.map +1 -1
  30. package/dist/core/QueryRefiner.d.ts +3 -3
  31. package/dist/core/QueryRefiner.js +22 -29
  32. package/dist/core/QueryRefiner.js.map +1 -1
  33. package/dist/core/QueryTester.d.ts +2 -2
  34. package/dist/core/QueryTester.js +11 -44
  35. package/dist/core/QueryTester.js.map +1 -1
  36. package/dist/core/QueryWriter.d.ts +2 -2
  37. package/dist/core/QueryWriter.js +12 -18
  38. package/dist/core/QueryWriter.js.map +1 -1
  39. package/dist/core/QuestionGenerator.d.ts +2 -2
  40. package/dist/core/QuestionGenerator.js +16 -22
  41. package/dist/core/QuestionGenerator.js.map +1 -1
  42. package/dist/data/schema.js +1 -2
  43. package/dist/data/schema.js.map +1 -1
  44. package/dist/index.d.ts +18 -18
  45. package/dist/index.js +18 -59
  46. package/dist/index.js.map +1 -1
  47. package/dist/prompts/PromptNames.js +5 -8
  48. package/dist/prompts/PromptNames.js.map +1 -1
  49. package/dist/utils/category-builder.d.ts +2 -2
  50. package/dist/utils/category-builder.js +2 -7
  51. package/dist/utils/category-builder.js.map +1 -1
  52. package/dist/utils/entity-helpers.d.ts +1 -1
  53. package/dist/utils/entity-helpers.js +7 -17
  54. package/dist/utils/entity-helpers.js.map +1 -1
  55. package/dist/utils/error-handlers.js +3 -9
  56. package/dist/utils/error-handlers.js.map +1 -1
  57. package/dist/utils/graph-helpers.js +3 -9
  58. package/dist/utils/graph-helpers.js.map +1 -1
  59. package/dist/utils/prompt-helpers.d.ts +1 -1
  60. package/dist/utils/prompt-helpers.js +8 -12
  61. package/dist/utils/prompt-helpers.js.map +1 -1
  62. package/dist/utils/user-helpers.js +3 -7
  63. package/dist/utils/user-helpers.js.map +1 -1
  64. package/dist/vectors/EmbeddingService.d.ts +1 -1
  65. package/dist/vectors/EmbeddingService.js +3 -8
  66. package/dist/vectors/EmbeddingService.js.map +1 -1
  67. package/dist/vectors/SimilaritySearch.d.ts +1 -1
  68. package/dist/vectors/SimilaritySearch.js +19 -20
  69. package/dist/vectors/SimilaritySearch.js.map +1 -1
  70. package/package.json +24 -17
  71. package/src/__tests__/golden-queries-loader.test.js +181 -0
  72. package/src/cli/commands/generate.ts +52 -21
  73. package/src/cli/config.ts +10 -2
  74. package/src/cli/index.ts +4 -1
  75. package/src/core/EntityGrouper.ts +75 -11
  76. package/src/core/QueryTester.ts +1 -1
  77. package/tsconfig.json +5 -35
@@ -12,8 +12,6 @@
12
12
 
13
13
  import ora from 'ora';
14
14
  import chalk from 'chalk';
15
- import * as fs from 'fs';
16
- import * as path from 'path';
17
15
  import { Metadata, DatabaseProviderBase, EntityInfo, LogStatus } from '@memberjunction/core';
18
16
  import { loadConfig } from '../config';
19
17
  import { getSystemUser } from '../../utils/user-helpers';
@@ -30,6 +28,9 @@ import { formatEntityMetadataForPrompt } from '../../utils/entity-helpers';
30
28
  import { extractErrorMessage } from '../../utils/error-handlers';
31
29
  import { buildQueryCategory, extractUniqueCategories } from '../../utils/category-builder';
32
30
  import { ValidatedQuery, GoldenQuery, EntityGroup, QueryCategoryInfo } from '../../data/schema';
31
+ import { readFileSync } from 'fs';
32
+ import { dirname, join } from 'path';
33
+ import { fileURLToPath } from 'url';
33
34
 
34
35
  /**
35
36
  * Execute the generate command
@@ -73,11 +74,42 @@ export async function generateCommand(options: Record<string, unknown>): Promise
73
74
  spinner.start('Filtering entities...');
74
75
  const md = new Metadata();
75
76
 
77
+ // DIAGNOSTIC: Log entity filtering context
78
+ if (config.verbose) {
79
+ LogStatus(`\n=== Entity Filtering Diagnostics ===`);
80
+ LogStatus(`Total entities in metadata: ${md.Entities.length}`);
81
+ LogStatus(`Excluded schemas: ${config.excludeSchemas.join(', ')}`);
82
+
83
+ // Show schema distribution BEFORE filtering
84
+ const schemasBefore = new Map<string, number>();
85
+ md.Entities.forEach(e => {
86
+ const schema = e.SchemaName || 'null';
87
+ schemasBefore.set(schema, (schemasBefore.get(schema) || 0) + 1);
88
+ });
89
+ LogStatus(`Schema distribution (before filtering):`);
90
+ Array.from(schemasBefore.entries())
91
+ .sort((a, b) => b[1] - a[1])
92
+ .slice(0, 10)
93
+ .forEach(([schema, count]) => {
94
+ const excluded = config.excludeSchemas.includes(schema) ? ' [EXCLUDED]' : '';
95
+ LogStatus(` ${schema}: ${count} entities${excluded}`);
96
+ });
97
+ }
98
+
76
99
  // Apply entity filtering (includeEntities takes precedence over excludeEntities)
77
100
  let filteredEntities = md.Entities.filter(
78
101
  e => !config.excludeSchemas.includes(e.SchemaName || '')
79
102
  );
80
103
 
104
+ if (config.verbose) {
105
+ LogStatus(`\nAfter schema filtering: ${filteredEntities.length} entities remaining`);
106
+ if (filteredEntities.length > 0) {
107
+ LogStatus(`Sample entity names: ${filteredEntities.slice(0, 5).map(e => e.Name).join(', ')}`);
108
+ const schemasAfter = new Set(filteredEntities.map(e => e.SchemaName || 'null'));
109
+ LogStatus(`Remaining schemas: ${Array.from(schemasAfter).join(', ')}`);
110
+ }
111
+ }
112
+
81
113
  if (config.includeEntities.length > 0) {
82
114
  // Allowlist: only include specified entities
83
115
  filteredEntities = filteredEntities.filter(e => config.includeEntities.includes(e.Name));
@@ -88,6 +120,11 @@ export async function generateCommand(options: Record<string, unknown>): Promise
88
120
  spinner.info(chalk.dim(`Excluded ${config.excludeEntities.length} entities`));
89
121
  }
90
122
 
123
+ if (config.verbose) {
124
+ LogStatus(`Final filtered entities: ${filteredEntities.length}`);
125
+ LogStatus('====================================\n');
126
+ }
127
+
91
128
  // 4. Group entities by schema and generate entity groups
92
129
  spinner.text = 'Analyzing entity relationships...';
93
130
 
@@ -282,36 +319,30 @@ export async function generateCommand(options: Record<string, unknown>): Promise
282
319
  }
283
320
 
284
321
  /**
285
- * Load golden queries from JSON file
322
+ * Load golden queries from compile-time imported JSON data
286
323
  *
287
324
  * Golden queries are example queries used for few-shot learning.
288
- * They are stored in the data/golden-queries.json file.
325
+ * They are imported at compile time from data/golden-queries.json.
326
+ *
327
+ * This approach is more efficient than runtime file I/O and provides
328
+ * type safety through TypeScript's resolveJsonModule feature.
289
329
  *
290
330
  * @param config - QueryGen configuration with verbose flag
291
- * @returns Array of golden queries, or empty array if file not found/invalid
331
+ * @returns Array of golden queries
292
332
  */
293
333
  async function loadGoldenQueries(config: { verbose: boolean }): Promise<GoldenQuery[]> {
294
334
  try {
295
- // Resolve path to golden-queries.json in the data directory
296
- // __dirname points to dist/cli/commands, so we go up to dist, then to data
297
- const goldenQueriesPath = path.join(__dirname, '../../data/golden-queries.json');
298
-
299
- // Check if file exists
300
- if (!fs.existsSync(goldenQueriesPath)) {
301
- if (config.verbose) {
302
- LogStatus(`[Warning] Golden queries file not found at: ${goldenQueriesPath}`);
303
- }
304
- return [];
305
- }
306
-
307
- // Read and parse JSON file
308
- const fileContent = fs.readFileSync(goldenQueriesPath, 'utf-8');
309
- const goldenQueries = JSON.parse(fileContent) as GoldenQuery[];
335
+ // Load golden queries from JSON file using ES module compatible path resolution
336
+ const __filename = fileURLToPath(import.meta.url);
337
+ const __dirname = dirname(__filename);
338
+ const goldenQueriesPath = join(__dirname, '../../data/golden-queries.json');
339
+ const goldenQueriesData = JSON.parse(readFileSync(goldenQueriesPath, 'utf-8'));
340
+ const goldenQueries = goldenQueriesData as GoldenQuery[];
310
341
 
311
342
  // Validate that it's an array
312
343
  if (!Array.isArray(goldenQueries)) {
313
344
  if (config.verbose) {
314
- LogStatus('[Warning] Golden queries file does not contain an array');
345
+ LogStatus('[Warning] Golden queries data is not an array');
315
346
  }
316
347
  return [];
317
348
  }
package/src/cli/config.ts CHANGED
@@ -4,6 +4,13 @@
4
4
  * Loads configuration from mj.config.cjs and merges with CLI options
5
5
  */
6
6
 
7
+ import fs from 'fs';
8
+ import path from 'path';
9
+ import { createRequire } from 'node:module';
10
+
11
+ // Use createRequire to load CommonJS config files
12
+ const require = createRequire(import.meta.url);
13
+
7
14
  /**
8
15
  * QueryGen configuration options
9
16
  */
@@ -21,6 +28,7 @@ export interface QueryGenConfig {
21
28
  questionsPerGroup: number;
22
29
  minGroupSize: number; // Minimum entities per group
23
30
  maxGroupSize: number; // Maximum entities per group
31
+ requireConnectivity: boolean; // Require entities in groups to be connected via relationships
24
32
 
25
33
  // AI Configuration
26
34
  modelOverride?: string; // Override model for all prompts (e.g., "GPT-OSS-120B")
@@ -74,6 +82,7 @@ const DEFAULT_CONFIG: QueryGenConfig = {
74
82
  questionsPerGroup: 2,
75
83
  minGroupSize: 2, // Multi-entity groups have at least 2 entities
76
84
  maxGroupSize: 3, // Keep groups small for focused questions
85
+ requireConnectivity: true, // Require entities to be connected (disable for sparse relationship graphs)
77
86
  embeddingModel: 'text-embedding-3-small',
78
87
  maxRefinementIterations: 3,
79
88
  maxFixingIterations: 5,
@@ -170,8 +179,7 @@ export function loadConfig(cliOptions: Record<string, unknown>): QueryGenConfig
170
179
  */
171
180
  function loadMjConfig(): { queryGen?: Partial<QueryGenConfig> } | null {
172
181
  try {
173
- const configPath = require('path').join(process.cwd(), 'mj.config.cjs');
174
- const fs = require('fs');
182
+ const configPath = path.join(process.cwd(), 'mj.config.cjs');
175
183
  if (fs.existsSync(configPath)) {
176
184
  return require(configPath);
177
185
  }
package/src/cli/index.ts CHANGED
@@ -11,11 +11,14 @@
11
11
  */
12
12
 
13
13
  import { Command } from 'commander';
14
+ import { createRequire } from 'node:module';
14
15
  import { generateCommand } from './commands/generate';
15
16
  import { validateCommand } from './commands/validate';
16
17
  import { exportCommand } from './commands/export';
17
18
 
18
- const packageJson = require('../../package.json');
19
+ // Use createRequire to import JSON (compatible with ESM)
20
+ const require = createRequire(import.meta.url);
21
+ const packageJson = require('../../package.json') as { version: string };
19
22
 
20
23
  const program = new Command();
21
24
 
@@ -74,6 +74,14 @@ export class EntityGrouper {
74
74
  LogStatus(`[${schemaIndex}/${totalSchemas}] Processing schema: ${schemaName} (${schemaEntities.length} entities)`);
75
75
  }
76
76
 
77
+ // Skip schemas with too few entities for grouping
78
+ if (schemaEntities.length < this.config.minGroupSize) {
79
+ if (this.config.verbose) {
80
+ LogStatus(`[${schemaIndex}/${totalSchemas}] Skipping schema ${schemaName}: only ${schemaEntities.length} entities (minimum ${this.config.minGroupSize} required)`);
81
+ }
82
+ continue;
83
+ }
84
+
77
85
  // 2a. Prepare schema data for LLM
78
86
  const schemaData = this.prepareSchemaData(schemaEntities, schemaName);
79
87
 
@@ -81,7 +89,8 @@ export class EntityGrouper {
81
89
  const llmResponse = await this.callLLMForGrouping(schemaData, contextUser);
82
90
 
83
91
  // 2c. Validate and convert to EntityGroup objects
84
- const validatedGroups = this.validateAndConvertGroups(llmResponse, schemaEntities);
92
+ // Note: This may return an empty array if no groups pass validation
93
+ const validatedGroups = this.validateAndConvertGroups(llmResponse, schemaEntities, schemaName);
85
94
 
86
95
  if (this.config.verbose && totalSchemas > 1) {
87
96
  LogStatus(`[${schemaIndex}/${totalSchemas}] Generated ${validatedGroups.length} groups for schema: ${schemaName}`);
@@ -90,7 +99,12 @@ export class EntityGrouper {
90
99
  allGroups.push(...validatedGroups);
91
100
  }
92
101
 
93
- // 3. Deduplicate any similar groups across all schemas
102
+ // 3. Check if we generated any groups across all schemas
103
+ if (allGroups.length === 0) {
104
+ throw new Error('No valid entity groups generated by LLM across any schema');
105
+ }
106
+
107
+ // 4. Deduplicate any similar groups across all schemas
94
108
  const deduplicatedGroups = this.deduplicateGroups(allGroups);
95
109
 
96
110
  return deduplicatedGroups;
@@ -223,14 +237,36 @@ export class EntityGrouper {
223
237
 
224
238
  /**
225
239
  * Validate LLM output and convert to EntityGroup objects
240
+ *
241
+ * @param schemaName - Optional schema name for better error messages
242
+ * @returns Array of validated groups (may be empty if no groups pass validation)
226
243
  */
227
244
  private validateAndConvertGroups(
228
245
  llmResponse: LLMEntityGroupResponse,
229
- entities: EntityInfo[]
246
+ entities: EntityInfo[],
247
+ schemaName?: string
230
248
  ): EntityGroup[] {
231
249
  const entityMap = new Map(entities.map(e => [e.Name, e]));
232
250
  const validGroups: EntityGroup[] = [];
233
251
 
252
+ // DIAGNOSTIC: Log entity validation context
253
+ if (this.config.verbose) {
254
+ LogStatus('\n=== Entity Group Validation Diagnostics ===');
255
+ LogStatus(`Entities available for validation: ${entities.length}`);
256
+ if (entities.length > 0) {
257
+ LogStatus(`Entity names in map: ${Array.from(entityMap.keys()).slice(0, 10).join(', ')}${entityMap.size > 10 ? '...' : ''}`);
258
+ LogStatus(`Sample entity SchemaName values: ${entities.slice(0, 5).map(e => `"${e.SchemaName || 'null'}"`).join(', ')}`);
259
+ }
260
+ LogStatus(`Groups returned by LLM: ${llmResponse.groups.length}`);
261
+ if (llmResponse.groups.length > 0) {
262
+ LogStatus(`Sample LLM group entities: ${llmResponse.groups[0].entities.join(', ')}`);
263
+ }
264
+ LogStatus(`Connectivity validation: ${this.config.requireConnectivity ? 'ENABLED' : 'DISABLED'}`);
265
+ if (!this.config.requireConnectivity) {
266
+ LogStatus('⚠️ WARNING: Connectivity validation is disabled. Groups may include unrelated entities.');
267
+ }
268
+ }
269
+
234
270
  for (const llmGroup of llmResponse.groups) {
235
271
  try {
236
272
  // Validate all entity names exist
@@ -239,14 +275,21 @@ export class EntityGrouper {
239
275
  .filter((e): e is EntityInfo => e !== undefined);
240
276
 
241
277
  if (groupEntities.length !== llmGroup.entities.length) {
242
- // Skip logging - verbose debug info that clutters output
278
+ if (this.config.verbose) {
279
+ const missingEntities = llmGroup.entities.filter(name => !entityMap.has(name));
280
+ LogStatus(` ✗ Group rejected - Entity name mismatch:`);
281
+ LogStatus(` LLM requested: ${llmGroup.entities.join(', ')}`);
282
+ LogStatus(` Not found in metadata: ${missingEntities.join(', ')}`);
283
+ }
243
284
  continue;
244
285
  }
245
286
 
246
287
  // Validate primary entity exists
247
288
  const primaryEntity = entityMap.get(llmGroup.primaryEntity);
248
289
  if (!primaryEntity) {
249
- // Skip logging - verbose debug info that clutters output
290
+ if (this.config.verbose) {
291
+ LogStatus(` ✗ Group rejected - Primary entity not found: ${llmGroup.primaryEntity}`);
292
+ }
250
293
  continue;
251
294
  }
252
295
 
@@ -254,9 +297,20 @@ export class EntityGrouper {
254
297
  const relationships = this.extractRelationships(groupEntities);
255
298
 
256
299
  // Validate connectivity (all entities must be reachable from primary)
257
- if (groupEntities.length > 1 && !this.isConnected(groupEntities, relationships)) {
258
- // Skip logging - verbose debug info that clutters output
259
- continue;
300
+ // Skip this check if requireConnectivity is disabled
301
+ if (this.config.requireConnectivity) {
302
+ if (groupEntities.length > 1 && !this.isConnected(groupEntities, relationships)) {
303
+ if (this.config.verbose) {
304
+ LogStatus(` ✗ Group rejected - Entities not connected: ${groupEntities.map(e => e.Name).join(', ')}`);
305
+ }
306
+ continue;
307
+ }
308
+ } else if (this.config.verbose && groupEntities.length > 1) {
309
+ // Log warning when connectivity check is skipped
310
+ const hasRelationships = relationships.length > 0;
311
+ if (!hasRelationships) {
312
+ LogStatus(` ⚠️ Group has no relationships (connectivity check disabled): ${groupEntities.map(e => e.Name).join(', ')}`);
313
+ }
260
314
  }
261
315
 
262
316
  // Create EntityGroup with LLM metadata
@@ -269,15 +323,25 @@ export class EntityGrouper {
269
323
  businessRationale: llmGroup.businessRationale,
270
324
  expectedQuestionTypes: llmGroup.expectedQuestionTypes
271
325
  });
326
+
327
+ if (this.config.verbose) {
328
+ LogStatus(` ✓ Group accepted: ${groupEntities.map(e => e.Name).join(', ')}`);
329
+ }
272
330
  } catch (error: unknown) {
273
- // Skip logging - verbose debug info that clutters output
331
+ if (this.config.verbose) {
332
+ LogStatus(` ✗ Group validation error: ${extractErrorMessage(error, 'Group Validation')}`);
333
+ }
274
334
  }
275
335
  }
276
336
 
277
- if (validGroups.length === 0) {
278
- throw new Error('No valid entity groups generated by LLM');
337
+ if (this.config.verbose) {
338
+ const schemaInfo = schemaName ? ` (schema: ${schemaName})` : '';
339
+ LogStatus(`\nValidation Summary${schemaInfo}: ${validGroups.length}/${llmResponse.groups.length} groups accepted`);
340
+ LogStatus('==========================================\n');
279
341
  }
280
342
 
343
+ // Return empty array if no groups passed validation
344
+ // The caller will aggregate results from all schemas and check if total is 0
281
345
  return validGroups;
282
346
  }
283
347
 
@@ -5,7 +5,7 @@
5
5
  * queries against the database. Handles error fixing with retry loop.
6
6
  */
7
7
 
8
- import * as nunjucks from 'nunjucks';
8
+ import nunjucks from 'nunjucks';
9
9
  import {
10
10
  DatabaseProviderBase,
11
11
  RunQuerySQLFilterManager,
package/tsconfig.json CHANGED
@@ -1,39 +1,9 @@
1
1
  {
2
+ "extends": "../../tsconfig.server.json",
2
3
  "compilerOptions": {
3
- "target": "ES2022",
4
- "module": "commonjs",
5
- "lib": ["ES2022"],
6
- "outDir": "./dist",
7
- "rootDir": "./src",
8
- "declaration": true,
9
- "declarationMap": true,
10
- "sourceMap": true,
11
- "strict": true,
12
- "esModuleInterop": true,
13
- "skipLibCheck": true,
14
- "forceConsistentCasingInFileNames": true,
15
- "resolveJsonModule": true,
16
- "moduleResolution": "node",
17
- "noImplicitAny": true,
18
- "strictNullChecks": true,
19
- "strictFunctionTypes": true,
20
- "strictBindCallApply": true,
21
- "strictPropertyInitialization": true,
22
- "noImplicitThis": true,
23
- "alwaysStrict": true,
24
- "noUnusedLocals": false,
25
- "noUnusedParameters": false,
26
- "noImplicitReturns": true,
27
- "noFallthroughCasesInSwitch": true,
28
- "allowSyntheticDefaultImports": true
4
+ "outDir": "dist",
5
+ "rootDir": "src"
29
6
  },
30
- "include": [
31
- "src/**/*"
32
- ],
33
- "exclude": [
34
- "node_modules",
35
- "dist",
36
- "**/*.spec.ts",
37
- "**/*.test.ts"
38
- ]
7
+ "include": ["src/**/*"],
8
+ "exclude": ["node_modules"]
39
9
  }