@kiyeonjeon21/datacontext 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.cursorrules +12 -0
  2. package/.env.example +8 -0
  3. package/.github/workflows/ci.yml +21 -1
  4. package/.github/workflows/publish.yml +21 -1
  5. package/CHANGELOG.md +41 -0
  6. package/README.md +247 -239
  7. package/datacontext.db +0 -0
  8. package/dist/api/server.d.ts.map +1 -1
  9. package/dist/api/server.js +145 -0
  10. package/dist/api/server.js.map +1 -1
  11. package/dist/api/start-server.d.ts +10 -0
  12. package/dist/api/start-server.d.ts.map +1 -0
  13. package/dist/api/start-server.js +73 -0
  14. package/dist/api/start-server.js.map +1 -0
  15. package/dist/cli/index.js +462 -0
  16. package/dist/cli/index.js.map +1 -1
  17. package/dist/core/context-service.d.ts +58 -0
  18. package/dist/core/context-service.d.ts.map +1 -1
  19. package/dist/core/context-service.js +121 -0
  20. package/dist/core/context-service.js.map +1 -1
  21. package/dist/core/index.d.ts +2 -0
  22. package/dist/core/index.d.ts.map +1 -1
  23. package/dist/core/index.js +5 -1
  24. package/dist/core/index.js.map +1 -1
  25. package/dist/core/llm-service.d.ts +141 -0
  26. package/dist/core/llm-service.d.ts.map +1 -0
  27. package/dist/core/llm-service.js +284 -0
  28. package/dist/core/llm-service.js.map +1 -0
  29. package/dist/knowledge/store.d.ts +56 -3
  30. package/dist/knowledge/store.d.ts.map +1 -1
  31. package/dist/knowledge/store.js +193 -7
  32. package/dist/knowledge/store.js.map +1 -1
  33. package/dist/knowledge/types.d.ts +43 -1
  34. package/dist/knowledge/types.d.ts.map +1 -1
  35. package/dist/knowledge/types.js.map +1 -1
  36. package/dist/mcp/tools.d.ts.map +1 -1
  37. package/dist/mcp/tools.js +365 -0
  38. package/dist/mcp/tools.js.map +1 -1
  39. package/docs/API.md +173 -0
  40. package/docs/DEMO_SCRIPT.md +210 -0
  41. package/docs/SYNC_GUIDE.md +242 -0
  42. package/package.json +4 -1
  43. package/src/api/server.ts +160 -0
  44. package/src/api/start-server.ts +78 -0
  45. package/src/cli/index.ts +534 -0
  46. package/src/core/context-service.ts +157 -0
  47. package/src/core/index.ts +7 -0
  48. package/src/core/llm-service.ts +359 -0
  49. package/src/knowledge/store.ts +232 -7
  50. package/src/knowledge/types.ts +45 -1
  51. package/src/mcp/tools.ts +415 -0
@@ -648,6 +648,163 @@ export class DataContextService {
648
648
  return this.metrics.getAggregatedMetrics();
649
649
  }
650
650
 
651
+ // ============================================================
652
+ // Glossary (Business Terms)
653
+ // ============================================================
654
+
655
+ /**
656
+ * Get all business terms
657
+ */
658
+ getBusinessTerms(): import('../knowledge/types.js').BusinessTerm[] {
659
+ return this.knowledge.getBusinessTerms();
660
+ }
661
+
662
+ /**
663
+ * Find terms matching a query
664
+ */
665
+ findMatchingTerms(query: string): import('../knowledge/types.js').BusinessTerm[] {
666
+ return this.knowledge.findMatchingTerms(query);
667
+ }
668
+
669
+ /**
670
+ * Add a business term manually
671
+ */
672
+ async addBusinessTerm(
673
+ term: string,
674
+ definition: string,
675
+ options: {
676
+ sqlExpression?: string;
677
+ synonyms?: string[];
678
+ appliesTo?: { tables?: string[]; columns?: string[] };
679
+ category?: import('../knowledge/types.js').TermCategory;
680
+ } = {}
681
+ ): Promise<import('../knowledge/types.js').BusinessTerm> {
682
+ return this.knowledge.addBusinessTerm(term, definition, options);
683
+ }
684
+
685
+ /**
686
+ * Delete a business term
687
+ */
688
+ async deleteBusinessTerm(id: string): Promise<void> {
689
+ return this.knowledge.deleteBusinessTerm(id);
690
+ }
691
+
692
+ /**
693
+ * Enhance a natural language query using the glossary
694
+ *
695
+ * Matches terms from the glossary and suggests SQL conditions.
696
+ * Uses local matching first, then AI if available.
697
+ *
698
+ * @param query - Natural language query
699
+ * @returns Enhancement result with suggested conditions
700
+ */
701
+ async enhanceQuery(query: string): Promise<{
702
+ query: string;
703
+ enhancedQuery: string;
704
+ usedTerms: string[];
705
+ suggestedConditions: string[];
706
+ method: 'local' | 'ai';
707
+ }> {
708
+ // Try local matching first
709
+ const localMatches = this.knowledge.findMatchingTerms(query);
710
+
711
+ if (localMatches.length > 0) {
712
+ return {
713
+ query,
714
+ enhancedQuery: query,
715
+ usedTerms: localMatches.map(t => t.term),
716
+ suggestedConditions: localMatches
717
+ .filter(t => t.sqlExpression)
718
+ .map(t => t.sqlExpression as string),
719
+ method: 'local',
720
+ };
721
+ }
722
+
723
+ // Try AI enhancement if available
724
+ const { isLLMAvailable, createLLMService } = await import('./llm-service.js');
725
+
726
+ if (!isLLMAvailable()) {
727
+ return {
728
+ query,
729
+ enhancedQuery: query,
730
+ usedTerms: [],
731
+ suggestedConditions: [],
732
+ method: 'local',
733
+ };
734
+ }
735
+
736
+ const terms = this.knowledge.getActiveTerms();
737
+ if (terms.length === 0) {
738
+ return {
739
+ query,
740
+ enhancedQuery: query,
741
+ usedTerms: [],
742
+ suggestedConditions: [],
743
+ method: 'local',
744
+ };
745
+ }
746
+
747
+ const llm = createLLMService();
748
+ const result = await llm.enhanceQueryWithGlossary(query, terms);
749
+
750
+ return {
751
+ query,
752
+ enhancedQuery: result.enhancedQuery,
753
+ usedTerms: result.usedTerms,
754
+ suggestedConditions: result.suggestedConditions,
755
+ method: 'ai',
756
+ };
757
+ }
758
+
759
+ /**
760
+ * Generate glossary from raw terms using AI
761
+ *
762
+ * Takes natural language term definitions and uses Claude to generate
763
+ * structured BusinessTerm entries with SQL expressions.
764
+ *
765
+ * @param rawTerms - User's term definitions in natural language
766
+ * @returns Generated BusinessTerm entries
767
+ *
768
+ * @example
769
+ * ```typescript
770
+ * const terms = await service.generateGlossary(
771
+ * "활성 사용자 = status가 1인 사용자\n최근 주문 = 30일 이내 주문"
772
+ * );
773
+ * console.log(`Generated ${terms.length} terms`);
774
+ * ```
775
+ */
776
+ async generateGlossary(rawTerms: string): Promise<import('../knowledge/types.js').BusinessTerm[]> {
777
+ const { isLLMAvailable, createLLMService } = await import('./llm-service.js');
778
+
779
+ if (!isLLMAvailable()) {
780
+ throw new Error('ANTHROPIC_API_KEY not configured. Set the environment variable to use AI-powered glossary generation.');
781
+ }
782
+
783
+ // Get schema context
784
+ const schemaInfo = await this.adapter.getSchema();
785
+ const schemaContext = {
786
+ tables: schemaInfo.tables.slice(0, 20).map(table => ({
787
+ name: table.name,
788
+ columns: table.columns.map(c => ({
789
+ name: c.name,
790
+ type: c.dataType,
791
+ nullable: c.isNullable,
792
+ })),
793
+ })),
794
+ existingTerms: this.knowledge.getBusinessTerms(),
795
+ };
796
+
797
+ const llm = createLLMService();
798
+ const generatedTerms = await llm.generateGlossary(
799
+ rawTerms,
800
+ schemaContext,
801
+ this.knowledge.getSchemaHash()
802
+ );
803
+
804
+ // Add to knowledge store
805
+ return this.knowledge.addBusinessTerms(generatedTerms);
806
+ }
807
+
651
808
  // ============================================================
652
809
  // Lifecycle
653
810
  // ============================================================
package/src/core/index.ts CHANGED
@@ -11,6 +11,7 @@ export { Harvester, createHarvester } from './harvester.js';
11
11
  export { FeedbackManager, createFeedbackManager } from './feedback.js';
12
12
  export { MetricsCollector, createMetricsCollector } from './metrics.js';
13
13
  export { CostEstimator, createCostEstimator } from './cost-estimator.js';
14
+ export { LLMService, createLLMService, isLLMAvailable } from './llm-service.js';
14
15
 
15
16
  // Types
16
17
  export type {
@@ -24,3 +25,9 @@ export type {
24
25
  DataContextConfig,
25
26
  } from './types.js';
26
27
 
28
+ export type {
29
+ LLMServiceConfig,
30
+ SchemaContext,
31
+ GeneratedTerm,
32
+ } from './llm-service.js';
33
+
@@ -0,0 +1,359 @@
1
+ /**
2
+ * LLM Service Module
3
+ *
4
+ * Provides AI-powered features using Claude API.
5
+ * Used for auto-generating glossary terms, descriptions, and query suggestions.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * const llm = createLLMService();
10
+ *
11
+ * // Generate glossary from user's raw terms
12
+ * const terms = await llm.generateGlossary(
13
+ * "활성 사용자, 최근 주문, VIP 고객",
14
+ * schemaContext
15
+ * );
16
+ *
17
+ * // Generate table descriptions
18
+ * const descriptions = await llm.generateTableDescriptions(tableInfo);
19
+ * ```
20
+ */
21
+
22
+ import Anthropic from '@anthropic-ai/sdk';
23
+ import type { BusinessTerm, TermCategory, TableDescription } from '../knowledge/types.js';
24
+ import { generateId, createKnowledgeMeta } from '../knowledge/types.js';
25
+
26
+ /** LLM service configuration */
27
+ export interface LLMServiceConfig {
28
+ /** Anthropic API key (defaults to ANTHROPIC_API_KEY env var) */
29
+ apiKey?: string;
30
+ /** Model to use (defaults to claude-sonnet-4-20250514) */
31
+ model?: string;
32
+ /** Maximum tokens for response */
33
+ maxTokens?: number;
34
+ }
35
+
36
+ /** Schema context for LLM prompts */
37
+ export interface SchemaContext {
38
+ tables: Array<{
39
+ name: string;
40
+ columns: Array<{
41
+ name: string;
42
+ type: string;
43
+ nullable: boolean;
44
+ }>;
45
+ }>;
46
+ existingTerms?: BusinessTerm[];
47
+ existingRules?: Array<{ name: string; description: string }>;
48
+ }
49
+
50
+ /** Generated term from LLM */
51
+ export interface GeneratedTerm {
52
+ term: string;
53
+ synonyms: string[];
54
+ definition: string;
55
+ sqlExpression?: string;
56
+ appliesTo?: {
57
+ tables?: string[];
58
+ columns?: string[];
59
+ };
60
+ category?: TermCategory;
61
+ examples?: string[];
62
+ }
63
+
64
+ /** LLM Service class */
65
+ export class LLMService {
66
+ private client: Anthropic;
67
+ private model: string;
68
+ private maxTokens: number;
69
+
70
+ constructor(config: LLMServiceConfig = {}) {
71
+ const apiKey = config.apiKey || process.env.ANTHROPIC_API_KEY;
72
+
73
+ if (!apiKey) {
74
+ throw new Error(
75
+ 'Anthropic API key not found. Set ANTHROPIC_API_KEY environment variable or pass apiKey in config.'
76
+ );
77
+ }
78
+
79
+ this.client = new Anthropic({ apiKey });
80
+ this.model = config.model || process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514';
81
+ this.maxTokens = config.maxTokens || 4096;
82
+ }
83
+
84
+ /**
85
+ * Generate glossary terms from user's raw input
86
+ *
87
+ * Takes natural language terms and generates structured BusinessTerm entries
88
+ * with SQL expressions and proper categorization.
89
+ *
90
+ * @param rawTerms - User's raw term input (comma-separated, YAML, or natural language)
91
+ * @param context - Database schema context
92
+ * @param schemaHash - Current schema hash for metadata
93
+ * @returns Array of generated BusinessTerm entries
94
+ *
95
+ * @example
96
+ * ```typescript
97
+ * const terms = await llm.generateGlossary(
98
+ * "활성 사용자 = status가 1인 사용자\n최근 주문 = 30일 이내 주문",
99
+ * { tables: [{ name: 'users', columns: [...] }] },
100
+ * "abc123"
101
+ * );
102
+ * ```
103
+ */
104
+ async generateGlossary(
105
+ rawTerms: string,
106
+ context: SchemaContext,
107
+ schemaHash: string = ''
108
+ ): Promise<BusinessTerm[]> {
109
+ const systemPrompt = `You are a database context expert. Your job is to analyze user-provided business terms and generate structured glossary entries that can be used to translate natural language queries into accurate SQL.
110
+
111
+ IMPORTANT RULES:
112
+ 1. Generate SQL expressions that are syntactically correct for the given schema
113
+ 2. Match terms to actual table/column names in the schema
114
+ 3. Be precise with data types (e.g., integer status codes, date intervals)
115
+ 4. Include synonyms in multiple languages if the term suggests it
116
+ 5. Categorize terms appropriately: status, time, money, entity, metric, filter, custom
117
+
118
+ OUTPUT FORMAT: Return a JSON array of term objects. Each object must have:
119
+ - term: The primary term name
120
+ - synonyms: Array of alternative names (include English, Korean if applicable)
121
+ - definition: Clear explanation of what this term means
122
+ - sqlExpression: SQL condition or expression (if applicable)
123
+ - appliesTo: { tables?: string[], columns?: string[] }
124
+ - category: One of: status, time, money, entity, metric, filter, custom
125
+ - examples: Array of example usage in natural language queries
126
+
127
+ Return ONLY the JSON array, no other text.`;
128
+
129
+ const schemaInfo = this.formatSchemaContext(context);
130
+
131
+ const userPrompt = `DATABASE SCHEMA:
132
+ ${schemaInfo}
133
+
134
+ ${context.existingTerms?.length ? `EXISTING TERMS (avoid duplicates):
135
+ ${context.existingTerms.map(t => `- ${t.term}: ${t.definition}`).join('\n')}
136
+ ` : ''}
137
+
138
+ USER'S RAW TERMS TO PROCESS:
139
+ ${rawTerms}
140
+
141
+ Generate structured glossary entries for these terms. Match them to the actual schema above.`;
142
+
143
+ try {
144
+ const response = await this.client.messages.create({
145
+ model: this.model,
146
+ max_tokens: this.maxTokens,
147
+ messages: [
148
+ { role: 'user', content: userPrompt }
149
+ ],
150
+ system: systemPrompt,
151
+ });
152
+
153
+ const content = response.content[0];
154
+ if (content.type !== 'text') {
155
+ throw new Error('Unexpected response type from Claude');
156
+ }
157
+
158
+ // Parse JSON response
159
+ const generated = this.parseJsonResponse<GeneratedTerm[]>(content.text);
160
+
161
+ // Convert to BusinessTerm with metadata
162
+ return generated.map(term => ({
163
+ ...createKnowledgeMeta('auto', schemaHash),
164
+ type: 'business_term' as const,
165
+ term: term.term,
166
+ synonyms: term.synonyms || [],
167
+ definition: term.definition,
168
+ sqlExpression: term.sqlExpression,
169
+ appliesTo: term.appliesTo,
170
+ category: term.category,
171
+ examples: term.examples,
172
+ isActive: true,
173
+ }));
174
+ } catch (error) {
175
+ throw new Error(`Failed to generate glossary: ${error instanceof Error ? error.message : String(error)}`);
176
+ }
177
+ }
178
+
179
+ /**
180
+ * Generate table/column descriptions from schema
181
+ *
182
+ * Analyzes table and column names to generate meaningful descriptions.
183
+ * Useful for cold-start when no documentation exists.
184
+ *
185
+ * @param tableInfo - Table schema information
186
+ * @param schemaHash - Current schema hash
187
+ * @returns Generated TableDescription
188
+ */
189
+ async generateTableDescription(
190
+ tableInfo: {
191
+ name: string;
192
+ columns: Array<{ name: string; type: string; nullable: boolean }>;
193
+ sampleData?: Record<string, unknown>[];
194
+ },
195
+ schemaHash: string = ''
196
+ ): Promise<Omit<TableDescription, keyof import('../knowledge/types.js').KnowledgeMeta | 'type'>> {
197
+ const systemPrompt = `You are a database documentation expert. Analyze the table structure and generate clear, useful descriptions.
198
+
199
+ OUTPUT FORMAT: Return a JSON object with:
200
+ - description: One sentence describing the table's purpose
201
+ - purpose: Detailed explanation of the table's role
202
+ - columns: Array of { name: string, description: string, valueMappings?: Record<string, string> }
203
+ - tags: Array of relevant tags
204
+
205
+ Return ONLY the JSON object, no other text.`;
206
+
207
+ const sampleDataStr = tableInfo.sampleData
208
+ ? `\n\nSAMPLE DATA:\n${JSON.stringify(tableInfo.sampleData.slice(0, 3), null, 2)}`
209
+ : '';
210
+
211
+ const userPrompt = `TABLE: ${tableInfo.name}
212
+
213
+ COLUMNS:
214
+ ${tableInfo.columns.map(c => `- ${c.name} (${c.type}${c.nullable ? ', nullable' : ''})`).join('\n')}
215
+ ${sampleDataStr}
216
+
217
+ Generate descriptions for this table and its columns.`;
218
+
219
+ try {
220
+ const response = await this.client.messages.create({
221
+ model: this.model,
222
+ max_tokens: this.maxTokens,
223
+ messages: [
224
+ { role: 'user', content: userPrompt }
225
+ ],
226
+ system: systemPrompt,
227
+ });
228
+
229
+ const content = response.content[0];
230
+ if (content.type !== 'text') {
231
+ throw new Error('Unexpected response type from Claude');
232
+ }
233
+
234
+ return this.parseJsonResponse(content.text);
235
+ } catch (error) {
236
+ throw new Error(`Failed to generate table description: ${error instanceof Error ? error.message : String(error)}`);
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Enhance a user query with glossary context
242
+ *
243
+ * Takes a natural language query and returns an enhanced version
244
+ * with term definitions resolved.
245
+ *
246
+ * @param query - User's natural language query
247
+ * @param terms - Available business terms
248
+ * @returns Enhanced query with context
249
+ */
250
+ async enhanceQueryWithGlossary(
251
+ query: string,
252
+ terms: BusinessTerm[]
253
+ ): Promise<{
254
+ enhancedQuery: string;
255
+ usedTerms: string[];
256
+ suggestedConditions: string[];
257
+ }> {
258
+ if (terms.length === 0) {
259
+ return { enhancedQuery: query, usedTerms: [], suggestedConditions: [] };
260
+ }
261
+
262
+ const systemPrompt = `You are a query enhancement assistant. Your job is to identify business terms in user queries and suggest SQL conditions based on the glossary.
263
+
264
+ OUTPUT FORMAT: Return a JSON object with:
265
+ - enhancedQuery: The query with term definitions inline
266
+ - usedTerms: Array of term names that were found in the query
267
+ - suggestedConditions: Array of SQL conditions to apply
268
+
269
+ Return ONLY the JSON object, no other text.`;
270
+
271
+ const glossaryStr = terms
272
+ .filter(t => t.isActive)
273
+ .map(t => `- "${t.term}" (${t.synonyms.join(', ')}): ${t.definition}${t.sqlExpression ? ` → SQL: ${t.sqlExpression}` : ''}`)
274
+ .join('\n');
275
+
276
+ const userPrompt = `GLOSSARY:
277
+ ${glossaryStr}
278
+
279
+ USER QUERY:
280
+ ${query}
281
+
282
+ Identify any terms from the glossary used in this query and suggest SQL conditions.`;
283
+
284
+ try {
285
+ const response = await this.client.messages.create({
286
+ model: this.model,
287
+ max_tokens: 1024,
288
+ messages: [
289
+ { role: 'user', content: userPrompt }
290
+ ],
291
+ system: systemPrompt,
292
+ });
293
+
294
+ const content = response.content[0];
295
+ if (content.type !== 'text') {
296
+ return { enhancedQuery: query, usedTerms: [], suggestedConditions: [] };
297
+ }
298
+
299
+ return this.parseJsonResponse(content.text);
300
+ } catch {
301
+ return { enhancedQuery: query, usedTerms: [], suggestedConditions: [] };
302
+ }
303
+ }
304
+
305
+ /**
306
+ * Parse JSON response from Claude, handling markdown code blocks
307
+ */
308
+ private parseJsonResponse<T>(text: string): T {
309
+ // Remove markdown code blocks if present
310
+ let cleaned = text.trim();
311
+ if (cleaned.startsWith('```json')) {
312
+ cleaned = cleaned.slice(7);
313
+ } else if (cleaned.startsWith('```')) {
314
+ cleaned = cleaned.slice(3);
315
+ }
316
+ if (cleaned.endsWith('```')) {
317
+ cleaned = cleaned.slice(0, -3);
318
+ }
319
+ cleaned = cleaned.trim();
320
+
321
+ try {
322
+ return JSON.parse(cleaned) as T;
323
+ } catch {
324
+ throw new Error(`Failed to parse JSON response: ${text.slice(0, 200)}...`);
325
+ }
326
+ }
327
+
328
+ /**
329
+ * Format schema context for prompts
330
+ */
331
+ private formatSchemaContext(context: SchemaContext): string {
332
+ return context.tables
333
+ .map(table => {
334
+ const cols = table.columns
335
+ .map(c => ` - ${c.name} (${c.type}${c.nullable ? ', nullable' : ''})`)
336
+ .join('\n');
337
+ return `Table: ${table.name}\n${cols}`;
338
+ })
339
+ .join('\n\n');
340
+ }
341
+ }
342
+
343
+ /**
344
+ * Create an LLM service instance
345
+ *
346
+ * @param config - Service configuration
347
+ * @returns LLMService instance
348
+ */
349
+ export function createLLMService(config: LLMServiceConfig = {}): LLMService {
350
+ return new LLMService(config);
351
+ }
352
+
353
+ /**
354
+ * Check if LLM service is available (API key configured)
355
+ */
356
+ export function isLLMAvailable(): boolean {
357
+ return !!process.env.ANTHROPIC_API_KEY;
358
+ }
359
+