@memberjunction/query-gen 0.0.1 → 2.126.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.turbo/turbo-build.log +4 -0
  2. package/CHANGELOG.md +34 -0
  3. package/COORDINATOR.md +768 -0
  4. package/IMPLEMENTATION_PLAN.md +1753 -0
  5. package/LLM_ENTITY_GROUPING_PLAN.md +977 -0
  6. package/README.md +675 -29
  7. package/dist/cli/commands/export.d.ts +15 -0
  8. package/dist/cli/commands/export.d.ts.map +1 -0
  9. package/dist/cli/commands/export.js +178 -0
  10. package/dist/cli/commands/export.js.map +1 -0
  11. package/dist/cli/commands/generate.d.ts +19 -0
  12. package/dist/cli/commands/generate.d.ts.map +1 -0
  13. package/dist/cli/commands/generate.js +282 -0
  14. package/dist/cli/commands/generate.js.map +1 -0
  15. package/dist/cli/commands/validate.d.ts +17 -0
  16. package/dist/cli/commands/validate.d.ts.map +1 -0
  17. package/dist/cli/commands/validate.js +193 -0
  18. package/dist/cli/commands/validate.js.map +1 -0
  19. package/dist/cli/config.d.ts +51 -0
  20. package/dist/cli/config.d.ts.map +1 -0
  21. package/dist/cli/config.js +142 -0
  22. package/dist/cli/config.js.map +1 -0
  23. package/dist/cli/index.d.ts +13 -0
  24. package/dist/cli/index.d.ts.map +1 -0
  25. package/dist/cli/index.js +57 -0
  26. package/dist/cli/index.js.map +1 -0
  27. package/dist/core/EntityGrouper.d.ts +74 -0
  28. package/dist/core/EntityGrouper.d.ts.map +1 -0
  29. package/dist/core/EntityGrouper.js +246 -0
  30. package/dist/core/EntityGrouper.js.map +1 -0
  31. package/dist/core/MetadataExporter.d.ts +59 -0
  32. package/dist/core/MetadataExporter.d.ts.map +1 -0
  33. package/dist/core/MetadataExporter.js +151 -0
  34. package/dist/core/MetadataExporter.js.map +1 -0
  35. package/dist/core/QueryDatabaseWriter.d.ts +50 -0
  36. package/dist/core/QueryDatabaseWriter.d.ts.map +1 -0
  37. package/dist/core/QueryDatabaseWriter.js +152 -0
  38. package/dist/core/QueryDatabaseWriter.js.map +1 -0
  39. package/dist/core/QueryFixer.d.ts +48 -0
  40. package/dist/core/QueryFixer.d.ts.map +1 -0
  41. package/dist/core/QueryFixer.js +115 -0
  42. package/dist/core/QueryFixer.js.map +1 -0
  43. package/dist/core/QueryRefiner.d.ts +94 -0
  44. package/dist/core/QueryRefiner.d.ts.map +1 -0
  45. package/dist/core/QueryRefiner.js +267 -0
  46. package/dist/core/QueryRefiner.js.map +1 -0
  47. package/dist/core/QueryTester.d.ts +70 -0
  48. package/dist/core/QueryTester.d.ts.map +1 -0
  49. package/dist/core/QueryTester.js +243 -0
  50. package/dist/core/QueryTester.js.map +1 -0
  51. package/dist/core/QueryWriter.d.ts +57 -0
  52. package/dist/core/QueryWriter.d.ts.map +1 -0
  53. package/dist/core/QueryWriter.js +184 -0
  54. package/dist/core/QueryWriter.js.map +1 -0
  55. package/dist/core/QuestionGenerator.d.ts +58 -0
  56. package/dist/core/QuestionGenerator.d.ts.map +1 -0
  57. package/dist/core/QuestionGenerator.js +145 -0
  58. package/dist/core/QuestionGenerator.js.map +1 -0
  59. package/dist/data/schema.d.ts +230 -0
  60. package/dist/data/schema.d.ts.map +1 -0
  61. package/dist/data/schema.js +6 -0
  62. package/dist/data/schema.js.map +1 -0
  63. package/dist/index.d.ts +28 -0
  64. package/dist/index.d.ts.map +1 -0
  65. package/dist/index.js +77 -0
  66. package/dist/index.js.map +1 -0
  67. package/dist/prompts/PromptNames.d.ts +32 -0
  68. package/dist/prompts/PromptNames.d.ts.map +1 -0
  69. package/dist/prompts/PromptNames.js +35 -0
  70. package/dist/prompts/PromptNames.js.map +1 -0
  71. package/dist/utils/category-builder.d.ts +28 -0
  72. package/dist/utils/category-builder.d.ts.map +1 -0
  73. package/dist/utils/category-builder.js +90 -0
  74. package/dist/utils/category-builder.js.map +1 -0
  75. package/dist/utils/entity-helpers.d.ts +49 -0
  76. package/dist/utils/entity-helpers.d.ts.map +1 -0
  77. package/dist/utils/entity-helpers.js +189 -0
  78. package/dist/utils/entity-helpers.js.map +1 -0
  79. package/dist/utils/error-handlers.d.ts +19 -0
  80. package/dist/utils/error-handlers.d.ts.map +1 -0
  81. package/dist/utils/error-handlers.js +41 -0
  82. package/dist/utils/error-handlers.js.map +1 -0
  83. package/dist/utils/graph-helpers.d.ts +51 -0
  84. package/dist/utils/graph-helpers.d.ts.map +1 -0
  85. package/dist/utils/graph-helpers.js +82 -0
  86. package/dist/utils/graph-helpers.js.map +1 -0
  87. package/dist/utils/prompt-helpers.d.ts +25 -0
  88. package/dist/utils/prompt-helpers.d.ts.map +1 -0
  89. package/dist/utils/prompt-helpers.js +66 -0
  90. package/dist/utils/prompt-helpers.js.map +1 -0
  91. package/dist/utils/query-helpers.d.ts +23 -0
  92. package/dist/utils/query-helpers.d.ts.map +1 -0
  93. package/dist/utils/query-helpers.js +34 -0
  94. package/dist/utils/query-helpers.js.map +1 -0
  95. package/dist/utils/user-helpers.d.ts +15 -0
  96. package/dist/utils/user-helpers.d.ts.map +1 -0
  97. package/dist/utils/user-helpers.js +32 -0
  98. package/dist/utils/user-helpers.js.map +1 -0
  99. package/dist/vectors/EmbeddingService.d.ts +58 -0
  100. package/dist/vectors/EmbeddingService.d.ts.map +1 -0
  101. package/dist/vectors/EmbeddingService.js +90 -0
  102. package/dist/vectors/EmbeddingService.js.map +1 -0
  103. package/dist/vectors/SimilaritySearch.d.ts +51 -0
  104. package/dist/vectors/SimilaritySearch.d.ts.map +1 -0
  105. package/dist/vectors/SimilaritySearch.js +85 -0
  106. package/dist/vectors/SimilaritySearch.js.map +1 -0
  107. package/docs/API.md +1040 -0
  108. package/docs/ARCHITECTURE.md +1120 -0
  109. package/examples/advanced-usage.ts +401 -0
  110. package/examples/basic-usage.ts +285 -0
  111. package/package.json +48 -6
  112. package/src/cli/commands/export.ts +173 -0
  113. package/src/cli/commands/generate.ts +330 -0
  114. package/src/cli/commands/validate.ts +185 -0
  115. package/src/cli/config.ts +203 -0
  116. package/src/cli/index.ts +63 -0
  117. package/src/core/EntityGrouper.ts +318 -0
  118. package/src/core/MetadataExporter.ts +148 -0
  119. package/src/core/QueryDatabaseWriter.ts +187 -0
  120. package/src/core/QueryFixer.ts +153 -0
  121. package/src/core/QueryRefiner.ts +382 -0
  122. package/src/core/QueryTester.ts +264 -0
  123. package/src/core/QueryWriter.ts +239 -0
  124. package/src/core/QuestionGenerator.ts +199 -0
  125. package/src/data/golden-queries.json +1371 -0
  126. package/src/data/schema.ts +252 -0
  127. package/src/index.ts +49 -0
  128. package/src/prompts/PromptNames.ts +36 -0
  129. package/src/utils/category-builder.ts +97 -0
  130. package/src/utils/entity-helpers.ts +203 -0
  131. package/src/utils/error-handlers.ts +41 -0
  132. package/src/utils/graph-helpers.ts +99 -0
  133. package/src/utils/prompt-helpers.ts +79 -0
  134. package/src/utils/query-helpers.ts +32 -0
  135. package/src/utils/user-helpers.ts +39 -0
  136. package/src/vectors/EmbeddingService.ts +109 -0
  137. package/src/vectors/SimilaritySearch.ts +108 -0
  138. package/tsconfig.json +39 -0
package/README.md CHANGED
@@ -1,45 +1,691 @@
1
- # @memberjunction/query-gen
1
+ # QueryGen - AI-Powered SQL Query Template Generator
2
2
 
3
- ## ⚠️ IMPORTANT NOTICE ⚠️
3
+ **@memberjunction/query-gen** is a comprehensive tool for generating domain-specific SQL query templates using artificial intelligence. It analyzes your database schema, generates meaningful business questions, creates SQL queries, tests them, refines them through iterative feedback, and exports them to MemberJunction metadata format.
4
4
 
5
- **This package is created solely for the purpose of setting up OIDC (OpenID Connect) trusted publishing with npm.**
5
+ ## Overview
6
6
 
7
- This is **NOT** a functional package and contains **NO** code or functionality beyond the OIDC setup configuration.
7
+ QueryGen automates the creation of SQL query templates through an 11-phase AI-powered pipeline:
8
8
 
9
- ## Purpose
9
+ 1. **Schema Analysis** - Loads entities and relationship graphs from MemberJunction metadata
10
+ 2. **Entity Grouping** - Uses AI to generate semantically meaningful entity combinations (2-3 related entities)
11
+ 3. **Business Question Generation** - AI creates domain-specific questions for each entity group
12
+ 4. **Vector Similarity Search** - Finds similar golden queries using weighted cosine similarity for few-shot learning
13
+ 5. **SQL Template Generation** - AI generates Nunjucks-parameterized SQL with proper syntax
14
+ 6. **Query Testing** - Executes SQL against database to validate functionality
15
+ 7. **Error Fixing** - AI automatically corrects SQL syntax and logic errors (up to 5 attempts)
16
+ 8. **Query Evaluation** - AI assesses if query answers the business question correctly
17
+ 9. **Query Refinement** - AI improves queries based on evaluation feedback (up to 3 iterations)
18
+ 10. **Comprehensive Validation** - Validates all generated queries against schema and execution
19
+ 11. **Export** - Outputs to MJ metadata format (JSON files) or directly to database
10
20
 
11
- This package exists to:
12
- 1. Configure OIDC trusted publishing for the package name `@memberjunction/query-gen`
13
- 2. Enable secure, token-less publishing from CI/CD workflows
14
- 3. Establish provenance for packages published under this name
21
+ **Status**: Production-ready with complete implementation of all 11 phases.
15
22
 
16
- ## What is OIDC Trusted Publishing?
23
+ ## Installation
17
24
 
18
- OIDC trusted publishing allows package maintainers to publish packages directly from their CI/CD workflows without needing to manage npm access tokens. Instead, it uses OpenID Connect to establish trust between the CI/CD provider (like GitHub Actions) and npm.
25
+ ```bash
26
+ # From MJ repository root
27
+ cd packages/QueryGen
28
+ npm install
29
+ npm run build
19
30
 
20
- ## Setup Instructions
31
+ # Link for global CLI usage (optional)
32
+ npm link
33
+ ```
21
34
 
22
- To properly configure OIDC trusted publishing for this package:
35
+ ## Quick Start
23
36
 
24
- 1. Go to [npmjs.com](https://www.npmjs.com/) and navigate to your package settings
25
- 2. Configure the trusted publisher (e.g., GitHub Actions)
26
- 3. Specify the repository and workflow that should be allowed to publish
27
- 4. Use the configured workflow to publish your actual package
37
+ QueryGen is integrated into the MemberJunction CLI (`mj`) for seamless workflow integration. You can also use the standalone CLI for direct access.
28
38
 
29
- ## DO NOT USE THIS PACKAGE
39
+ ### Using MemberJunction CLI (Recommended)
30
40
 
31
- This package is a placeholder for OIDC configuration only. It:
32
- - Contains no executable code
33
- - Provides no functionality
34
- - Should not be installed as a dependency
35
- - Exists only for administrative purposes
41
+ ```bash
42
+ # Generate queries for all entities
43
+ mj querygen generate
36
44
 
37
- ## More Information
45
+ # Generate with verbose output
46
+ mj querygen generate -v
38
47
 
39
- For more details about npm's trusted publishing feature, see:
40
- - [npm Trusted Publishing Documentation](https://docs.npmjs.com/generating-provenance-statements)
41
- - [GitHub Actions OIDC Documentation](https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/about-security-hardening-with-openid-connect)
48
+ # Generate for specific entities
49
+ mj querygen generate --entities "Customers,Orders,Products"
42
50
 
43
- ---
51
+ # Exclude specific schemas
52
+ mj querygen generate --exclude-schemas "sys,INFORMATION_SCHEMA"
44
53
 
45
- **Maintained for OIDC setup purposes only**
54
+ # Validate existing queries
55
+ mj querygen validate
56
+
57
+ # Export queries from database
58
+ mj querygen export
59
+ ```
60
+
61
+ ### Using Standalone CLI
62
+
63
+ ```bash
64
+ # Generate queries for all entities
65
+ mj-querygen generate
66
+
67
+ # Generate with verbose output
68
+ mj-querygen generate -v
69
+
70
+ # Generate for specific entities
71
+ mj-querygen generate -e Customers Orders Products
72
+
73
+ # Exclude specific schemas
74
+ mj-querygen generate -s sys INFORMATION_SCHEMA
75
+ ```
76
+
77
+ ### Additional Examples
78
+
79
+ ```bash
80
+ # Validate queries with MJ CLI
81
+ mj querygen validate --path ./metadata/queries
82
+
83
+ # Export from database with MJ CLI
84
+ mj querygen export --output ./exported-queries
85
+
86
+ # Or use standalone CLI
87
+ mj-querygen validate -p ./metadata/queries
88
+ mj-querygen export -o ./exported-queries
89
+ ```
90
+
91
+ ## Configuration
92
+
93
+ ### CLI Options
94
+
95
+ #### Generate Command
96
+
97
+ ```bash
98
+ mj-querygen generate [options]
99
+
100
+ Options:
101
+ -e, --entities <names...> Specific entities to generate queries for
102
+ -x, --exclude-entities <names...> Entities to exclude from generation
103
+ -s, --exclude-schemas <names...> Schemas to exclude (default: sys, INFORMATION_SCHEMA)
104
+ -m, --max-entities <number> Max entities per group (default: 3)
105
+ -r, --max-refinements <number> Max refinement iterations (default: 3)
106
+ -f, --max-fixes <number> Max error-fixing attempts (default: 5)
107
+ --model <name> Preferred AI model (overrides config)
108
+ --vendor <name> Preferred AI vendor (overrides config)
109
+ -o, --output <path> Output directory (default: ./metadata/queries)
110
+ --mode <mode> Output mode: metadata|database|both (default: metadata)
111
+ -v, --verbose Enable verbose output
112
+ -h, --help Display help information
113
+ ```
114
+
115
+ #### Validate Command
116
+
117
+ ```bash
118
+ mj-querygen validate [options]
119
+
120
+ Options:
121
+ -p, --path <path> Path to queries metadata directory (default: ./metadata/queries)
122
+ -v, --verbose Enable verbose output
123
+ -h, --help Display help information
124
+ ```
125
+
126
+ #### Export Command
127
+
128
+ ```bash
129
+ mj-querygen export [options]
130
+
131
+ Options:
132
+ -o, --output <path> Output directory (default: ./metadata/queries)
133
+ -v, --verbose Enable verbose output
134
+ -h, --help Display help information
135
+ ```
136
+
137
+ ### Configuration File (mj.config.cjs)
138
+
139
+ Add a `queryGen` section to your `mj.config.cjs` file:
140
+
141
+ ```javascript
142
+ module.exports = {
143
+ // ... other MJ configuration
144
+
145
+ queryGen: {
146
+ // Entity Filtering
147
+ includeEntities: [], // Allowlist (if provided, ONLY these entities processed)
148
+ excludeEntities: [], // Denylist (ignored if includeEntities is set)
149
+ excludeSchemas: ['sys', 'INFORMATION_SCHEMA', '__mj'], // Exclude system schemas
150
+
151
+ // Entity Grouping
152
+ questionsPerGroup: 2, // Questions to generate per entity group
153
+ minGroupSize: 2, // Minimum entities per group (multi-entity groups)
154
+ maxGroupSize: 3, // Maximum entities per group (keep focused)
155
+
156
+ // AI Configuration
157
+ modelOverride: undefined, // Optional: override AI model (e.g., "GPT-OSS-120B")
158
+ vendorOverride: undefined, // Optional: override AI vendor (e.g., "Groq")
159
+ embeddingModel: 'text-embedding-3-small', // Embedding model for vector similarity
160
+
161
+ // Iteration Limits
162
+ maxRefinementIterations: 3, // Max query refinement cycles
163
+ maxFixingIterations: 5, // Max error-fixing attempts
164
+
165
+ // Few-Shot Learning
166
+ topSimilarQueries: 5, // Number of golden queries to use as examples
167
+
168
+ // Similarity Weighting (for vector search)
169
+ similarityWeights: {
170
+ userQuestion: 0.2, // 20% weight for question similarity
171
+ description: 0.4, // 40% weight for description similarity
172
+ technicalDescription: 0.4 // 40% weight for technical description similarity
173
+ },
174
+
175
+ // Output Configuration
176
+ outputMode: 'metadata', // 'metadata', 'database', or 'both'
177
+ outputDirectory: './metadata/queries',
178
+ outputCategoryDirectory: undefined, // Optional: separate directory for categories
179
+ rootQueryCategory: 'Auto-Generated', // Root category for generated queries
180
+ autoCreateEntityQueryCategories: false, // Create per-entity subcategories
181
+
182
+ // Performance
183
+ parallelGenerations: 1, // Parallel query generation (future enhancement)
184
+ enableCaching: true, // Cache AI prompt results
185
+
186
+ // Validation
187
+ testWithSampleData: true, // Test queries before export
188
+ requireMinRows: 0, // Minimum rows required (0 = allow empty results)
189
+ maxRefinementRows: 10, // Max rows used for refinement evaluation (cost optimization)
190
+
191
+ // Logging
192
+ verbose: false // Enable verbose logging (all logs gated by this flag)
193
+ }
194
+ };
195
+ ```
196
+
197
+ ### Configuration Priority
198
+
199
+ Configuration is merged in this order (highest to lowest priority):
200
+
201
+ 1. **CLI options** - Command line flags
202
+ 2. **mj.config.cjs** - queryGen section
203
+ 3. **Default values** - Built-in defaults
204
+
205
+ ## Architecture
206
+
207
+ ### 11-Phase Pipeline
208
+
209
+ QueryGen orchestrates an 11-phase workflow:
210
+
211
+ ```
212
+ ┌─────────────────────────────────────────────────────────────┐
213
+ │ Phase 1: Entity Analysis │
214
+ │ - Load entities from Metadata │
215
+ │ - Filter by include/exclude lists │
216
+ │ - Build foreign key relationship graph │
217
+ └─────────────────────────────────────────────────────────────┘
218
+
219
+ ┌─────────────────────────────────────────────────────────────┐
220
+ │ Phase 2: Entity Grouping (LLM-Based Semantic Analysis) │
221
+ │ - AI analyzes relationship graph for meaningful groupings │
222
+ │ - Generates groups of 2-3 related entities │
223
+ │ - Includes business context: domain, rationale, questions │
224
+ │ - Validates connectivity and deduplicates groups │
225
+ └─────────────────────────────────────────────────────────────┘
226
+
227
+ ┌─────────────────────────────────────────────────────────────┐
228
+ │ Phase 3: Business Question Generation │
229
+ │ - Use AI to generate domain-specific questions │
230
+ │ - 1-2 questions per entity group │
231
+ │ - Vary complexity (simple aggregations → complex joins) │
232
+ └─────────────────────────────────────────────────────────────┘
233
+
234
+ ┌─────────────────────────────────────────────────────────────┐
235
+ │ Phase 4: Vector Similarity Search │
236
+ │ - Embed business question using local embeddings │
237
+ │ - Find top-K similar golden queries │
238
+ │ - Weighted cosine similarity across multiple fields │
239
+ └─────────────────────────────────────────────────────────────┘
240
+
241
+ ┌─────────────────────────────────────────────────────────────┐
242
+ │ Phase 5: SQL Query Generation │
243
+ │ - Use AI with few-shot examples │
244
+ │ - Generate Nunjucks SQL templates │
245
+ │ - Define parameters and output fields │
246
+ └─────────────────────────────────────────────────────────────┘
247
+
248
+ ┌─────────────────────────────────────────────────────────────┐
249
+ │ Phase 6: Query Testing │
250
+ │ - Render template with sample parameter values │
251
+ │ - Execute SQL against database │
252
+ │ - Validate results (row count, schema) │
253
+ └─────────────────────────────────────────────────────────────┘
254
+
255
+ ┌─────────────────────────────────────────────────────────────┐
256
+ │ Phase 7: Error Fixing (if needed) │
257
+ │ - Pass error message to AI │
258
+ │ - AI fixes SQL syntax/logic errors │
259
+ │ - Retry up to maxFixingIterations │
260
+ └─────────────────────────────────────────────────────────────┘
261
+
262
+ ┌─────────────────────────────────────────────────────────────┐
263
+ │ Phase 8: Query Evaluation │
264
+ │ - AI evaluates if query answers business question │
265
+ │ - Checks result relevance, completeness, correctness │
266
+ │ - Generates improvement suggestions │
267
+ └─────────────────────────────────────────────────────────────┘
268
+
269
+ ┌─────────────────────────────────────────────────────────────┐
270
+ │ Phase 9: Query Refinement (if needed) │
271
+ │ - AI refines query based on evaluation feedback │
272
+ │ - Iterative loop up to maxRefinementIterations │
273
+ │ - Returns best refined query │
274
+ └─────────────────────────────────────────────────────────────┘
275
+
276
+ ┌─────────────────────────────────────────────────────────────┐
277
+ │ Phase 10: Validation │
278
+ │ - Comprehensive validation of all generated queries │
279
+ │ - Type checking, parameter validation, execution tests │
280
+ └─────────────────────────────────────────────────────────────┘
281
+
282
+ ┌─────────────────────────────────────────────────────────────┐
283
+ │ Phase 11: Metadata Export │
284
+ │ - Export to JSON metadata files (metadata mode) │
285
+ │ - Insert into database tables (database mode) │
286
+ │ - Create Queries, Query Fields, Query Params records │
287
+ └─────────────────────────────────────────────────────────────┘
288
+ ```
289
+
290
+ See [docs/ARCHITECTURE.md](./docs/ARCHITECTURE.md) for detailed technical architecture.
291
+
292
+ ## Core Components
293
+
294
+ ### EntityGrouper
295
+
296
+ Uses AI to generate semantically meaningful entity groups with business context:
297
+
298
+ ```typescript
299
+ const grouper = new EntityGrouper(config);
300
+ const groups = await grouper.generateEntityGroups(entities, contextUser);
301
+ // Returns groups with businessDomain, businessRationale, expectedQuestionTypes
302
+ ```
303
+
304
+ ### QuestionGenerator
305
+
306
+ Generates business questions using AI with validation:
307
+
308
+ ```typescript
309
+ const generator = new QuestionGenerator(contextUser, config);
310
+ const questions = await generator.generateQuestions(entityGroup);
311
+ // Returns validated questions with complexity, aggregation, and join flags
312
+ ```
313
+
314
+ ### QueryWriter
315
+
316
+ Generates SQL templates using AI with few-shot learning and validation:
317
+
318
+ ```typescript
319
+ const writer = new QueryWriter(contextUser, config);
320
+ const query = await writer.generateQuery(
321
+ businessQuestion,
322
+ entityMetadata,
323
+ fewShotExamples
324
+ );
325
+ // Returns Nunjucks SQL template with parameters
326
+ // Includes retry logic with validation feedback (up to 3 attempts)
327
+ ```
328
+
329
+ ### QueryTester
330
+
331
+ Tests queries by rendering templates and executing against database:
332
+
333
+ ```typescript
334
+ const tester = new QueryTester(entityMetadata, question, contextUser, config);
335
+ const result = await tester.testQuery(query, 5); // max 5 error-fixing attempts
336
+ // Returns success, sample rows, or error details
337
+ // Integrates with QueryFixer for automatic error correction
338
+ ```
339
+
340
+ ### QueryRefiner
341
+
342
+ Evaluates and refines queries iteratively based on AI feedback:
343
+
344
+ ```typescript
345
+ const refiner = new QueryRefiner(tester, contextUser, config);
346
+ const refined = await refiner.refineQuery(
347
+ query,
348
+ businessQuestion,
349
+ entityMetadata
350
+ );
351
+ // Evaluation → Feedback → Refinement cycle (up to 3 iterations)
352
+ // Uses first 10 sample rows for cost optimization
353
+ ```
354
+
355
+ ### MetadataExporter
356
+
357
+ Exports validated queries to MemberJunction metadata JSON format:
358
+
359
+ ```typescript
360
+ const exporter = new MetadataExporter();
361
+ const result = await exporter.exportQueries(
362
+ validatedQueries,
363
+ uniqueCategories,
364
+ config.outputDirectory,
365
+ config.outputCategoryDirectory
366
+ );
367
+ // Creates .queries-{timestamp}.json and .query-categories-{timestamp}.json
368
+ // Compatible with mj-sync push for database synchronization
369
+ ```
370
+
371
+ ### QueryDatabaseWriter
372
+
373
+ Writes queries directly to database as Query entities:
374
+
375
+ ```typescript
376
+ const writer = new QueryDatabaseWriter();
377
+ await writer.writeQueriesToDatabase(validatedQueries, contextUser);
378
+ // Creates QueryEntity records using GetEntityObject pattern
379
+ // QueryFields and QueryParams are automatically extracted by QueryEntity.server.ts
380
+ ```
381
+
382
+ ## Workflow Examples
383
+
384
+ ### Example 1: Generate Queries for Specific Entities
385
+
386
+ ```bash
387
+ # Generate queries for customer-related entities
388
+ mj-querygen generate \
389
+ -e Customers Orders "Order Details" Products \
390
+ -m 2 \
391
+ -v
392
+
393
+ # Output:
394
+ # ✓ Metadata loaded
395
+ # ✓ Found 6 entity groups
396
+ # ✓ Embedded 20 golden queries
397
+ # [1/6] Processing Customers...
398
+ # [1/6] ✓ Customers complete (2 queries)
399
+ # [2/6] Processing Orders...
400
+ # [2/6] ✓ Orders complete (2 queries)
401
+ # ...
402
+ # ✓ Exported to ./metadata/queries/queries-1234567890.json
403
+ #
404
+ # ✓ Query generation complete!
405
+ #
406
+ # Summary:
407
+ # Entity Groups Processed: 6
408
+ # Queries Generated: 12
409
+ # Output Location: ./metadata/queries
410
+ ```
411
+
412
+ ### Example 2: Validate Existing Queries
413
+
414
+ ```bash
415
+ # Validate all queries in metadata directory
416
+ mj-querygen validate -p ./metadata/queries -v
417
+
418
+ # Output:
419
+ # ✓ Metadata loaded
420
+ # ✓ Found 3 query files
421
+ # [1/3] Validating queries-1234567890.json...
422
+ # [1/3] ✓ Top Customers By Revenue
423
+ # [1/3] ✓ Recent Orders By Status
424
+ # [1/3] ✗ Product Sales Analysis: Column 'ProductName' not found
425
+ # ...
426
+ # ⚠ Validation completed with errors
427
+ #
428
+ # Summary:
429
+ # Total Queries: 15
430
+ # Passed: 12
431
+ # Failed: 3
432
+ ```
433
+
434
+ ### Example 3: Export Database Queries to Metadata
435
+
436
+ ```bash
437
+ # Export all queries from database
438
+ mj-querygen export -o ./exported-queries -v
439
+
440
+ # Output:
441
+ # ✓ Metadata loaded
442
+ # ✓ Found 25 queries
443
+ # [1/25] Exporting Customer Summary...
444
+ # [1/25] ✓ Exported Customer Summary
445
+ # ...
446
+ # ✓ All 25 queries exported successfully!
447
+ ```
448
+
449
+ ## Troubleshooting
450
+
451
+ ### Common Errors
452
+
453
+ #### Database Connection Errors
454
+
455
+ **Error**: `Metadata provider not configured`
456
+
457
+ **Solution**: Ensure database connection is configured in `mj.config.cjs`
458
+
459
+ #### AI Prompt Failures
460
+
461
+ **Error**: `Prompt 'Business Question Generator' not found`
462
+
463
+ **Solution**: Sync AI prompts to database with `npx mj-sync push`
464
+
465
+ #### Template Rendering Errors
466
+
467
+ **Error**: `Template rendering failed: Unknown filter 'sqlString'`
468
+
469
+ **Solution**: Verify QueryParameterProcessor is imported and SQL filters are registered
470
+
471
+ #### Validation Failures
472
+
473
+ **Error**: `Query returned no results`
474
+
475
+ **Solution**: Ensure database has sample data or set `requireMinRows: 0`
476
+
477
+ ### Performance Issues
478
+
479
+ #### Slow Generation
480
+
481
+ **Solutions**:
482
+ - Reduce `maxEntitiesPerGroup` (3 → 2)
483
+ - Reduce `questionsPerGroup` (2 → 1)
484
+ - Increase `parallelGenerations` (1 → 3)
485
+ - Enable `enableCaching: true`
486
+
487
+ #### High Token Costs
488
+
489
+ **Solutions**:
490
+ - Use cheaper models (Gemini 2.5 Flash, GPT 5-nano)
491
+ - Reduce `topSimilarQueries` (5 → 3)
492
+ - Reduce `maxRefinementIterations` (3 → 2)
493
+ - Reduce `maxFixingIterations` (5 → 3)
494
+
495
+ ## Programmatic Usage
496
+
497
+ QueryGen can be used as a library in your applications:
498
+
499
+ ```typescript
500
+ import {
501
+ EntityGrouper,
502
+ QuestionGenerator,
503
+ QueryWriter,
504
+ QueryTester,
505
+ QueryRefiner,
506
+ MetadataExporter
507
+ } from '@memberjunction/query-gen';
508
+
509
+ async function generateQueriesForEntity(entityName: string, contextUser: UserInfo) {
510
+ // 1. Load entity metadata
511
+ const md = new Metadata();
512
+ const entity = md.Entities.find(e => e.Name === entityName);
513
+
514
+ // 2. Create entity group
515
+ const grouper = new EntityGrouper();
516
+ const groups = await grouper.generateEntityGroups([entity], 1, 1);
517
+
518
+ // 3. Generate business questions
519
+ const questionGen = new QuestionGenerator(contextUser);
520
+ const questions = await questionGen.generateQuestions(groups[0]);
521
+
522
+ // 4. Generate and test SQL queries
523
+ const queryWriter = new QueryWriter(contextUser);
524
+ const query = await queryWriter.generateQuery(
525
+ questions[0],
526
+ entityMetadata,
527
+ fewShotExamples
528
+ );
529
+
530
+ // 5. Test and refine
531
+ const tester = new QueryTester(dataProvider, entityMetadata, questions[0], contextUser);
532
+ const testResult = await tester.testQuery(query, 5);
533
+
534
+ if (testResult.success) {
535
+ const refiner = new QueryRefiner(tester, contextUser);
536
+ const refined = await refiner.refineQuery(query, questions[0], entityMetadata, 3);
537
+ return refined.query;
538
+ }
539
+ }
540
+ ```
541
+
542
+ See [docs/API.md](./docs/API.md) for detailed API documentation.
543
+
544
+ ## Key Features & Design Decisions
545
+
546
+ ### LLM-Based Semantic Entity Grouping
547
+
548
+ QueryGen uses AI to analyze database schemas and generate meaningful entity groupings based on business context, replacing traditional deterministic algorithms. Each group includes:
549
+
550
+ - **Business Domain** - Clear business area label (e.g., "Sales Pipeline", "Inventory Management")
551
+ - **Business Rationale** - Explanation of why this grouping matters
552
+ - **Expected Question Types** - Types of questions this group supports
553
+ - **Relationship Type** - Parent-child or many-to-many relationships
554
+
555
+ This approach generates more meaningful queries aligned with actual business use cases.
556
+
557
+ ### Weighted Vector Similarity Search
558
+
559
+ Few-shot learning uses multi-field weighted cosine similarity to find relevant golden query examples:
560
+
561
+ - **userQuestion**: 20% weight (natural language varies more)
562
+ - **description**: 40% weight (business logic matching)
563
+ - **technicalDescription**: 40% weight (implementation details)
564
+
565
+ Each field is embedded separately using local embeddings (`text-embedding-3-small`) for precise similarity matching.
566
+
567
+ ### Iterative Quality Improvement
568
+
569
+ QueryGen implements a sophisticated refinement loop:
570
+
571
+ 1. **Error Fixing Phase** - AI corrects SQL syntax/logic errors (up to 5 attempts)
572
+ 2. **Evaluation Phase** - AI assesses if query answers the business question
573
+ 3. **Refinement Phase** - AI improves query based on evaluation feedback (up to 3 iterations)
574
+
575
+ This multi-stage approach ensures high-quality queries that actually solve business problems.
576
+
577
+ ### Cost Optimization
578
+
579
+ - **Limited Sample Data** - Uses only first 10 rows for evaluation (reduces token costs)
580
+ - **Validation Feedback** - Failed generations provide feedback to AI (improves success rate)
581
+ - **Multi-Model Failover** - 6-model priority chain balances quality and cost
582
+
583
+ ### Automatic Field Extraction
584
+
585
+ QueryGen generates SQL templates with parameters, but **QueryFields** and **QueryParams** are automatically extracted by `QueryEntity.server.ts` during Save(). This eliminates duplication and ensures SQL is the single source of truth.
586
+
587
+ ## Environment Requirements
588
+
589
+ ### Database
590
+
591
+ - SQL Server 2016 or later
592
+ - MemberJunction metadata tables populated
593
+ - Sample data for query testing (recommended)
594
+
595
+ ### AI Models
596
+
597
+ QueryGen uses 6 AI prompts, each configured with 6-model failover:
598
+
599
+ 1. **Claude 4.5 Sonnet** (Anthropic) - Priority 1
600
+ 2. **Kimi K2** (Groq) - Priority 2
601
+ 3. **Kimi K2** (Cerebras) - Priority 3
602
+ 4. **Gemini 2.5 Flash** (Google) - Priority 4
603
+ 5. **GPT-OSS-120B** (Groq) - Priority 5
604
+ 6. **GPT 5-nano** (OpenAI) - Priority 6
605
+
606
+ Model/vendor can be overridden with `--model` and `--vendor` CLI flags.
607
+
608
+ ### Embeddings
609
+
610
+ - Default: `text-embedding-3-small`
611
+ - Runs via AIEngine's `EmbedTextLocal()` method
612
+ - No external API calls required
613
+ - 20 golden queries pre-embedded for few-shot learning
614
+
615
+ ## Best Practices
616
+
617
+ ### Query Generation
618
+
619
+ 1. **Start Small** - Begin with a few representative entities to validate the pipeline
620
+ 2. **Sample Data** - Ensure database has representative sample data for testing
621
+ 3. **Entity Filtering** - Use `--entities` or `--exclude-entities` to focus on specific domains
622
+ 4. **Verbose Mode** - Use `-v` flag for detailed logging during initial runs
623
+ 5. **Review Generated Queries** - Always review SQL templates before production use
624
+
625
+ ### Configuration Tuning
626
+
627
+ 1. **Group Size** - Keep `maxGroupSize: 3` for focused, manageable queries
628
+ 2. **Refinement Iterations** - Use 3 iterations for quality, 1-2 for speed
629
+ 3. **Model Selection** - Use `--model` and `--vendor` for cost/performance optimization
630
+ 4. **Output Mode** - Use `metadata` mode for review, `database` mode for direct import
631
+
632
+ ### Golden Query Library
633
+
634
+ QueryGen includes 20 golden queries in `/src/data/golden-queries.json` covering common patterns:
635
+
636
+ - **Aggregations** - COUNT, SUM, AVG, MIN, MAX
637
+ - **Grouping** - GROUP BY with various aggregations
638
+ - **Joins** - Simple joins (parent-child), complex joins (many-to-many)
639
+ - **Filtering** - WHERE clauses with parameterization
640
+ - **Sorting** - ORDER BY with multiple columns
641
+ - **Window Functions** - ROW_NUMBER, RANK, DENSE_RANK
642
+ - **Date Operations** - Date filtering and grouping
643
+ - **TOP N Queries** - Top customers, products, etc.
644
+
645
+ **To add your own golden queries:**
646
+ 1. Follow the structure in `golden-queries.json`
647
+ 2. Include complete parameter definitions with `sampleValue`
648
+ 3. Use Nunjucks syntax with SQL-safe filters
649
+ 4. Rebuild package to update embeddings
650
+
651
+ ### Performance Optimization
652
+
653
+ 1. **Reduce Entity Count** - Filter to specific schemas or entities
654
+ 2. **Lower Question Count** - Set `questionsPerGroup: 1` for faster generation
655
+ 3. **Skip Refinement** - Set `maxRefinementIterations: 0` for draft queries
656
+ 4. **Use Faster Models** - Override with Groq or Cerebras models
657
+
658
+ ## Contributing
659
+
660
+ QueryGen is part of the MemberJunction project. Contributions are welcome!
661
+
662
+ ```bash
663
+ # Development setup
664
+ cd MJ/packages/QueryGen
665
+ npm install
666
+ npm run build
667
+
668
+ # Run in watch mode
669
+ npm run watch
670
+
671
+ # Lint and format
672
+ npm run lint
673
+ npm run format
674
+ ```
675
+
676
+ ## Support
677
+
678
+ - GitHub Issues: https://github.com/MemberJunction/MJ/issues
679
+ - Documentation: https://docs.memberjunction.com
680
+ - Community: https://community.memberjunction.com
681
+
682
+ ## Related Documentation
683
+
684
+ - [Architecture Deep Dive](./docs/ARCHITECTURE.md) - Technical details
685
+ - [API Documentation](./docs/API.md) - Programmatic API reference
686
+ - [Implementation Plan](./IMPLEMENTATION_PLAN.md) - Development roadmap
687
+ - [MemberJunction Docs](https://docs.memberjunction.com) - Platform documentation
688
+
689
+ ## License
690
+
691
+ MIT License - see LICENSE file for details