npm - @kiyeonjeon21/datacontext - Versions diffs - 0.2.0 → 0.3.0 - Mend

@kiyeonjeon21/datacontext 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/.cursorrules +12 -0
package/.env.example +8 -0
package/.github/workflows/ci.yml +21 -1
package/.github/workflows/publish.yml +21 -1
package/CHANGELOG.md +41 -0
package/README.md +247 -239
package/datacontext.db +0 -0
package/dist/api/server.d.ts.map +1 -1
package/dist/api/server.js +145 -0
package/dist/api/server.js.map +1 -1
package/dist/api/start-server.d.ts +10 -0
package/dist/api/start-server.d.ts.map +1 -0
package/dist/api/start-server.js +73 -0
package/dist/api/start-server.js.map +1 -0
package/dist/cli/index.js +462 -0
package/dist/cli/index.js.map +1 -1
package/dist/core/context-service.d.ts +58 -0
package/dist/core/context-service.d.ts.map +1 -1
package/dist/core/context-service.js +121 -0
package/dist/core/context-service.js.map +1 -1
package/dist/core/index.d.ts +2 -0
package/dist/core/index.d.ts.map +1 -1
package/dist/core/index.js +5 -1
package/dist/core/index.js.map +1 -1
package/dist/core/llm-service.d.ts +141 -0
package/dist/core/llm-service.d.ts.map +1 -0
package/dist/core/llm-service.js +284 -0
package/dist/core/llm-service.js.map +1 -0
package/dist/knowledge/store.d.ts +56 -3
package/dist/knowledge/store.d.ts.map +1 -1
package/dist/knowledge/store.js +193 -7
package/dist/knowledge/store.js.map +1 -1
package/dist/knowledge/types.d.ts +43 -1
package/dist/knowledge/types.d.ts.map +1 -1
package/dist/knowledge/types.js.map +1 -1
package/dist/mcp/tools.d.ts.map +1 -1
package/dist/mcp/tools.js +365 -0
package/dist/mcp/tools.js.map +1 -1
package/docs/API.md +173 -0
package/docs/DEMO_SCRIPT.md +210 -0
package/docs/SYNC_GUIDE.md +242 -0
package/package.json +4 -1
package/src/api/server.ts +160 -0
package/src/api/start-server.ts +78 -0
package/src/cli/index.ts +534 -0
package/src/core/context-service.ts +157 -0
package/src/core/index.ts +7 -0
package/src/core/llm-service.ts +359 -0
package/src/knowledge/store.ts +232 -7
package/src/knowledge/types.ts +45 -1
package/src/mcp/tools.ts +415 -0

package/src/core/context-service.ts CHANGED Viewed

@@ -648,6 +648,163 @@ export class DataContextService {
     return this.metrics.getAggregatedMetrics();
   }
+  // ============================================================
+  // Glossary (Business Terms)
+  // ============================================================
+  /**
+   * Get all business terms
+   */
+  getBusinessTerms(): import('../knowledge/types.js').BusinessTerm[] {
+    return this.knowledge.getBusinessTerms();
+  }
+  /**
+   * Find terms matching a query
+   */
+  findMatchingTerms(query: string): import('../knowledge/types.js').BusinessTerm[] {
+    return this.knowledge.findMatchingTerms(query);
+  }
+  /**
+   * Add a business term manually
+   */
+  async addBusinessTerm(
+    term: string,
+    definition: string,
+    options: {
+      sqlExpression?: string;
+      synonyms?: string[];
+      appliesTo?: { tables?: string[]; columns?: string[] };
+      category?: import('../knowledge/types.js').TermCategory;
+    } = {}
+  ): Promise<import('../knowledge/types.js').BusinessTerm> {
+    return this.knowledge.addBusinessTerm(term, definition, options);
+  }
+  /**
+   * Delete a business term
+   */
+  async deleteBusinessTerm(id: string): Promise<void> {
+    return this.knowledge.deleteBusinessTerm(id);
+  }
+  /**
+   * Enhance a natural language query using the glossary
+   *
+   * Matches terms from the glossary and suggests SQL conditions.
+   * Uses local matching first, then AI if available.
+   *
+   * @param query - Natural language query
+   * @returns Enhancement result with suggested conditions
+   */
+  async enhanceQuery(query: string): Promise<{
+    query: string;
+    enhancedQuery: string;
+    usedTerms: string[];
+    suggestedConditions: string[];
+    method: 'local' | 'ai';
+  }> {
+    // Try local matching first
+    const localMatches = this.knowledge.findMatchingTerms(query);
+    if (localMatches.length > 0) {
+      return {
+        query,
+        enhancedQuery: query,
+        usedTerms: localMatches.map(t => t.term),
+        suggestedConditions: localMatches
+          .filter(t => t.sqlExpression)
+          .map(t => t.sqlExpression as string),
+        method: 'local',
+      };
+    }
+    // Try AI enhancement if available
+    const { isLLMAvailable, createLLMService } = await import('./llm-service.js');
+    if (!isLLMAvailable()) {
+      return {
+        query,
+        enhancedQuery: query,
+        usedTerms: [],
+        suggestedConditions: [],
+        method: 'local',
+      };
+    }
+    const terms = this.knowledge.getActiveTerms();
+    if (terms.length === 0) {
+      return {
+        query,
+        enhancedQuery: query,
+        usedTerms: [],
+        suggestedConditions: [],
+        method: 'local',
+      };
+    }
+    const llm = createLLMService();
+    const result = await llm.enhanceQueryWithGlossary(query, terms);
+    return {
+      query,
+      enhancedQuery: result.enhancedQuery,
+      usedTerms: result.usedTerms,
+      suggestedConditions: result.suggestedConditions,
+      method: 'ai',
+    };
+  }
+  /**
+   * Generate glossary from raw terms using AI
+   *
+   * Takes natural language term definitions and uses Claude to generate
+   * structured BusinessTerm entries with SQL expressions.
+   *
+   * @param rawTerms - User's term definitions in natural language
+   * @returns Generated BusinessTerm entries
+   *
+   * @example
+   * ```typescript
+   * const terms = await service.generateGlossary(
+   *   "활성 사용자 = status가 1인 사용자\n최근 주문 = 30일 이내 주문"
+   * );
+   * console.log(`Generated ${terms.length} terms`);
+   * ```
+   */
+  async generateGlossary(rawTerms: string): Promise<import('../knowledge/types.js').BusinessTerm[]> {
+    const { isLLMAvailable, createLLMService } = await import('./llm-service.js');
+    if (!isLLMAvailable()) {
+      throw new Error('ANTHROPIC_API_KEY not configured. Set the environment variable to use AI-powered glossary generation.');
+    }
+    // Get schema context
+    const schemaInfo = await this.adapter.getSchema();
+    const schemaContext = {
+      tables: schemaInfo.tables.slice(0, 20).map(table => ({
+        name: table.name,
+        columns: table.columns.map(c => ({
+          name: c.name,
+          type: c.dataType,
+          nullable: c.isNullable,
+        })),
+      })),
+      existingTerms: this.knowledge.getBusinessTerms(),
+    };
+    const llm = createLLMService();
+    const generatedTerms = await llm.generateGlossary(
+      rawTerms,
+      schemaContext,
+      this.knowledge.getSchemaHash()
+    );
+    // Add to knowledge store
+    return this.knowledge.addBusinessTerms(generatedTerms);
+  }
   // ============================================================
   // Lifecycle
   // ============================================================

package/src/core/index.ts CHANGED Viewed

@@ -11,6 +11,7 @@ export { Harvester, createHarvester } from './harvester.js';
 export { FeedbackManager, createFeedbackManager } from './feedback.js';
 export { MetricsCollector, createMetricsCollector } from './metrics.js';
 export { CostEstimator, createCostEstimator } from './cost-estimator.js';
+export { LLMService, createLLMService, isLLMAvailable } from './llm-service.js';
 // Types
 export type {
@@ -24,3 +25,9 @@ export type {
   DataContextConfig,
 } from './types.js';
+export type {
+  LLMServiceConfig,
+  SchemaContext,
+  GeneratedTerm,
+} from './llm-service.js';

package/src/core/llm-service.ts ADDED Viewed

@@ -0,0 +1,359 @@
+/**
+ * LLM Service Module
+ *
+ * Provides AI-powered features using Claude API.
+ * Used for auto-generating glossary terms, descriptions, and query suggestions.
+ *
+ * @example
+ * ```typescript
+ * const llm = createLLMService();
+ *
+ * // Generate glossary from user's raw terms
+ * const terms = await llm.generateGlossary(
+ *   "활성 사용자, 최근 주문, VIP 고객",
+ *   schemaContext
+ * );
+ *
+ * // Generate table descriptions
+ * const descriptions = await llm.generateTableDescriptions(tableInfo);
+ * ```
+ */
+import Anthropic from '@anthropic-ai/sdk';
+import type { BusinessTerm, TermCategory, TableDescription } from '../knowledge/types.js';
+import { generateId, createKnowledgeMeta } from '../knowledge/types.js';
+/** LLM service configuration */
+export interface LLMServiceConfig {
+  /** Anthropic API key (defaults to ANTHROPIC_API_KEY env var) */
+  apiKey?: string;
+  /** Model to use (defaults to claude-sonnet-4-20250514) */
+  model?: string;
+  /** Maximum tokens for response */
+  maxTokens?: number;
+}
+/** Schema context for LLM prompts */
+export interface SchemaContext {
+  tables: Array<{
+    name: string;
+    columns: Array<{
+      name: string;
+      type: string;
+      nullable: boolean;
+    }>;
+  }>;
+  existingTerms?: BusinessTerm[];
+  existingRules?: Array<{ name: string; description: string }>;
+}
+/** Generated term from LLM */
+export interface GeneratedTerm {
+  term: string;
+  synonyms: string[];
+  definition: string;
+  sqlExpression?: string;
+  appliesTo?: {
+    tables?: string[];
+    columns?: string[];
+  };
+  category?: TermCategory;
+  examples?: string[];
+}
+/** LLM Service class */
+export class LLMService {
+  private client: Anthropic;
+  private model: string;
+  private maxTokens: number;
+  constructor(config: LLMServiceConfig = {}) {
+    const apiKey = config.apiKey || process.env.ANTHROPIC_API_KEY;
+    if (!apiKey) {
+      throw new Error(
+        'Anthropic API key not found. Set ANTHROPIC_API_KEY environment variable or pass apiKey in config.'
+      );
+    }
+    this.client = new Anthropic({ apiKey });
+    this.model = config.model || process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514';
+    this.maxTokens = config.maxTokens || 4096;
+  }
+  /**
+   * Generate glossary terms from user's raw input
+   *
+   * Takes natural language terms and generates structured BusinessTerm entries
+   * with SQL expressions and proper categorization.
+   *
+   * @param rawTerms - User's raw term input (comma-separated, YAML, or natural language)
+   * @param context - Database schema context
+   * @param schemaHash - Current schema hash for metadata
+   * @returns Array of generated BusinessTerm entries
+   *
+   * @example
+   * ```typescript
+   * const terms = await llm.generateGlossary(
+   *   "활성 사용자 = status가 1인 사용자\n최근 주문 = 30일 이내 주문",
+   *   { tables: [{ name: 'users', columns: [...] }] },
+   *   "abc123"
+   * );
+   * ```
+   */
+  async generateGlossary(
+    rawTerms: string,
+    context: SchemaContext,
+    schemaHash: string = ''
+  ): Promise<BusinessTerm[]> {
+    const systemPrompt = `You are a database context expert. Your job is to analyze user-provided business terms and generate structured glossary entries that can be used to translate natural language queries into accurate SQL.
+IMPORTANT RULES:
+1. Generate SQL expressions that are syntactically correct for the given schema
+2. Match terms to actual table/column names in the schema
+3. Be precise with data types (e.g., integer status codes, date intervals)
+4. Include synonyms in multiple languages if the term suggests it
+5. Categorize terms appropriately: status, time, money, entity, metric, filter, custom
+OUTPUT FORMAT: Return a JSON array of term objects. Each object must have:
+- term: The primary term name
+- synonyms: Array of alternative names (include English, Korean if applicable)
+- definition: Clear explanation of what this term means
+- sqlExpression: SQL condition or expression (if applicable)
+- appliesTo: { tables?: string[], columns?: string[] }
+- category: One of: status, time, money, entity, metric, filter, custom
+- examples: Array of example usage in natural language queries
+Return ONLY the JSON array, no other text.`;
+    const schemaInfo = this.formatSchemaContext(context);
+    const userPrompt = `DATABASE SCHEMA:
+${schemaInfo}
+${context.existingTerms?.length ? `EXISTING TERMS (avoid duplicates):
+${context.existingTerms.map(t => `- ${t.term}: ${t.definition}`).join('\n')}
+` : ''}
+USER'S RAW TERMS TO PROCESS:
+${rawTerms}
+Generate structured glossary entries for these terms. Match them to the actual schema above.`;
+    try {
+      const response = await this.client.messages.create({
+        model: this.model,
+        max_tokens: this.maxTokens,
+        messages: [
+          { role: 'user', content: userPrompt }
+        ],
+        system: systemPrompt,
+      });
+      const content = response.content[0];
+      if (content.type !== 'text') {
+        throw new Error('Unexpected response type from Claude');
+      }
+      // Parse JSON response
+      const generated = this.parseJsonResponse<GeneratedTerm[]>(content.text);
+      // Convert to BusinessTerm with metadata
+      return generated.map(term => ({
+        ...createKnowledgeMeta('auto', schemaHash),
+        type: 'business_term' as const,
+        term: term.term,
+        synonyms: term.synonyms || [],
+        definition: term.definition,
+        sqlExpression: term.sqlExpression,
+        appliesTo: term.appliesTo,
+        category: term.category,
+        examples: term.examples,
+        isActive: true,
+      }));
+    } catch (error) {
+      throw new Error(`Failed to generate glossary: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+  /**
+   * Generate table/column descriptions from schema
+   *
+   * Analyzes table and column names to generate meaningful descriptions.
+   * Useful for cold-start when no documentation exists.
+   *
+   * @param tableInfo - Table schema information
+   * @param schemaHash - Current schema hash
+   * @returns Generated TableDescription
+   */
+  async generateTableDescription(
+    tableInfo: {
+      name: string;
+      columns: Array<{ name: string; type: string; nullable: boolean }>;
+      sampleData?: Record<string, unknown>[];
+    },
+    schemaHash: string = ''
+  ): Promise<Omit<TableDescription, keyof import('../knowledge/types.js').KnowledgeMeta | 'type'>> {
+    const systemPrompt = `You are a database documentation expert. Analyze the table structure and generate clear, useful descriptions.
+OUTPUT FORMAT: Return a JSON object with:
+- description: One sentence describing the table's purpose
+- purpose: Detailed explanation of the table's role
+- columns: Array of { name: string, description: string, valueMappings?: Record<string, string> }
+- tags: Array of relevant tags
+Return ONLY the JSON object, no other text.`;
+    const sampleDataStr = tableInfo.sampleData
+      ? `\n\nSAMPLE DATA:\n${JSON.stringify(tableInfo.sampleData.slice(0, 3), null, 2)}`
+      : '';
+    const userPrompt = `TABLE: ${tableInfo.name}
+COLUMNS:
+${tableInfo.columns.map(c => `- ${c.name} (${c.type}${c.nullable ? ', nullable' : ''})`).join('\n')}
+${sampleDataStr}
+Generate descriptions for this table and its columns.`;
+    try {
+      const response = await this.client.messages.create({
+        model: this.model,
+        max_tokens: this.maxTokens,
+        messages: [
+          { role: 'user', content: userPrompt }
+        ],
+        system: systemPrompt,
+      });
+      const content = response.content[0];
+      if (content.type !== 'text') {
+        throw new Error('Unexpected response type from Claude');
+      }
+      return this.parseJsonResponse(content.text);
+    } catch (error) {
+      throw new Error(`Failed to generate table description: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+  /**
+   * Enhance a user query with glossary context
+   *
+   * Takes a natural language query and returns an enhanced version
+   * with term definitions resolved.
+   *
+   * @param query - User's natural language query
+   * @param terms - Available business terms
+   * @returns Enhanced query with context
+   */
+  async enhanceQueryWithGlossary(
+    query: string,
+    terms: BusinessTerm[]
+  ): Promise<{
+    enhancedQuery: string;
+    usedTerms: string[];
+    suggestedConditions: string[];
+  }> {
+    if (terms.length === 0) {
+      return { enhancedQuery: query, usedTerms: [], suggestedConditions: [] };
+    }
+    const systemPrompt = `You are a query enhancement assistant. Your job is to identify business terms in user queries and suggest SQL conditions based on the glossary.
+OUTPUT FORMAT: Return a JSON object with:
+- enhancedQuery: The query with term definitions inline
+- usedTerms: Array of term names that were found in the query
+- suggestedConditions: Array of SQL conditions to apply
+Return ONLY the JSON object, no other text.`;
+    const glossaryStr = terms
+      .filter(t => t.isActive)
+      .map(t => `- "${t.term}" (${t.synonyms.join(', ')}): ${t.definition}${t.sqlExpression ? ` → SQL: ${t.sqlExpression}` : ''}`)
+      .join('\n');
+    const userPrompt = `GLOSSARY:
+${glossaryStr}
+USER QUERY:
+${query}
+Identify any terms from the glossary used in this query and suggest SQL conditions.`;
+    try {
+      const response = await this.client.messages.create({
+        model: this.model,
+        max_tokens: 1024,
+        messages: [
+          { role: 'user', content: userPrompt }
+        ],
+        system: systemPrompt,
+      });
+      const content = response.content[0];
+      if (content.type !== 'text') {
+        return { enhancedQuery: query, usedTerms: [], suggestedConditions: [] };
+      }
+      return this.parseJsonResponse(content.text);
+    } catch {
+      return { enhancedQuery: query, usedTerms: [], suggestedConditions: [] };
+    }
+  }
+  /**
+   * Parse JSON response from Claude, handling markdown code blocks
+   */
+  private parseJsonResponse<T>(text: string): T {
+    // Remove markdown code blocks if present
+    let cleaned = text.trim();
+    if (cleaned.startsWith('```json')) {
+      cleaned = cleaned.slice(7);
+    } else if (cleaned.startsWith('```')) {
+      cleaned = cleaned.slice(3);
+    }
+    if (cleaned.endsWith('```')) {
+      cleaned = cleaned.slice(0, -3);
+    }
+    cleaned = cleaned.trim();
+    try {
+      return JSON.parse(cleaned) as T;
+    } catch {
+      throw new Error(`Failed to parse JSON response: ${text.slice(0, 200)}...`);
+    }
+  }
+  /**
+   * Format schema context for prompts
+   */
+  private formatSchemaContext(context: SchemaContext): string {
+    return context.tables
+      .map(table => {
+        const cols = table.columns
+          .map(c => `  - ${c.name} (${c.type}${c.nullable ? ', nullable' : ''})`)
+          .join('\n');
+        return `Table: ${table.name}\n${cols}`;
+      })
+      .join('\n\n');
+  }
+}
+/**
+ * Create an LLM service instance
+ *
+ * @param config - Service configuration
+ * @returns LLMService instance
+ */
+export function createLLMService(config: LLMServiceConfig = {}): LLMService {
+  return new LLMService(config);
+}
+/**
+ * Check if LLM service is available (API key configured)
+ */
+export function isLLMAvailable(): boolean {
+  return !!process.env.ANTHROPIC_API_KEY;
+}