@kiyeonjeon21/datacontext 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.cursorrules +12 -0
  2. package/.env.example +8 -0
  3. package/.github/workflows/ci.yml +21 -1
  4. package/.github/workflows/publish.yml +21 -1
  5. package/CHANGELOG.md +41 -0
  6. package/README.md +247 -239
  7. package/datacontext.db +0 -0
  8. package/dist/api/server.d.ts.map +1 -1
  9. package/dist/api/server.js +145 -0
  10. package/dist/api/server.js.map +1 -1
  11. package/dist/api/start-server.d.ts +10 -0
  12. package/dist/api/start-server.d.ts.map +1 -0
  13. package/dist/api/start-server.js +73 -0
  14. package/dist/api/start-server.js.map +1 -0
  15. package/dist/cli/index.js +462 -0
  16. package/dist/cli/index.js.map +1 -1
  17. package/dist/core/context-service.d.ts +58 -0
  18. package/dist/core/context-service.d.ts.map +1 -1
  19. package/dist/core/context-service.js +121 -0
  20. package/dist/core/context-service.js.map +1 -1
  21. package/dist/core/index.d.ts +2 -0
  22. package/dist/core/index.d.ts.map +1 -1
  23. package/dist/core/index.js +5 -1
  24. package/dist/core/index.js.map +1 -1
  25. package/dist/core/llm-service.d.ts +141 -0
  26. package/dist/core/llm-service.d.ts.map +1 -0
  27. package/dist/core/llm-service.js +284 -0
  28. package/dist/core/llm-service.js.map +1 -0
  29. package/dist/knowledge/store.d.ts +56 -3
  30. package/dist/knowledge/store.d.ts.map +1 -1
  31. package/dist/knowledge/store.js +193 -7
  32. package/dist/knowledge/store.js.map +1 -1
  33. package/dist/knowledge/types.d.ts +43 -1
  34. package/dist/knowledge/types.d.ts.map +1 -1
  35. package/dist/knowledge/types.js.map +1 -1
  36. package/dist/mcp/tools.d.ts.map +1 -1
  37. package/dist/mcp/tools.js +365 -0
  38. package/dist/mcp/tools.js.map +1 -1
  39. package/docs/API.md +173 -0
  40. package/docs/DEMO_SCRIPT.md +210 -0
  41. package/docs/SYNC_GUIDE.md +242 -0
  42. package/package.json +4 -1
  43. package/src/api/server.ts +160 -0
  44. package/src/api/start-server.ts +78 -0
  45. package/src/cli/index.ts +534 -0
  46. package/src/core/context-service.ts +157 -0
  47. package/src/core/index.ts +7 -0
  48. package/src/core/llm-service.ts +359 -0
  49. package/src/knowledge/store.ts +232 -7
  50. package/src/knowledge/types.ts +45 -1
  51. package/src/mcp/tools.ts +415 -0
package/src/mcp/tools.ts CHANGED
@@ -263,6 +263,98 @@ export function getMcpTools(): Tool[] {
263
263
  required: ['name', 'description', 'tables'],
264
264
  },
265
265
  },
266
+ // === Glossary Tools ===
267
+ {
268
+ name: 'generate_glossary',
269
+ description: 'Generate business glossary/terms from user input using AI. Takes natural language term definitions and creates structured SQL-ready glossary entries. Requires ANTHROPIC_API_KEY to be set.',
270
+ inputSchema: {
271
+ type: 'object',
272
+ properties: {
273
+ terms: {
274
+ type: 'string',
275
+ description: 'Raw term definitions in natural language. Can be comma-separated, one per line, or structured (YAML-like). Example: "활성 사용자 = status가 1인 사용자\\n최근 주문 = 30일 이내 주문\\nVIP = 주문 10건 이상"',
276
+ },
277
+ },
278
+ required: ['terms'],
279
+ },
280
+ },
281
+ {
282
+ name: 'add_term',
283
+ description: 'Add a single business term to the glossary manually.',
284
+ inputSchema: {
285
+ type: 'object',
286
+ properties: {
287
+ term: {
288
+ type: 'string',
289
+ description: 'The business term (e.g., "활성 사용자", "active user").',
290
+ },
291
+ definition: {
292
+ type: 'string',
293
+ description: 'Human-readable definition of the term.',
294
+ },
295
+ sql: {
296
+ type: 'string',
297
+ description: 'SQL expression for this term (e.g., "status = 1").',
298
+ },
299
+ synonyms: {
300
+ type: 'array',
301
+ items: { type: 'string' },
302
+ description: 'Alternative names for this term.',
303
+ },
304
+ tables: {
305
+ type: 'array',
306
+ items: { type: 'string' },
307
+ description: 'Tables this term applies to.',
308
+ },
309
+ },
310
+ required: ['term', 'definition'],
311
+ },
312
+ },
313
+ {
314
+ name: 'list_terms',
315
+ description: 'List all business terms in the glossary.',
316
+ inputSchema: {
317
+ type: 'object',
318
+ properties: {
319
+ category: {
320
+ type: 'string',
321
+ description: 'Filter by category (status, time, money, entity, metric, filter, custom).',
322
+ },
323
+ table: {
324
+ type: 'string',
325
+ description: 'Filter by table name.',
326
+ },
327
+ },
328
+ },
329
+ },
330
+ {
331
+ name: 'search_terms',
332
+ description: 'Search for business terms that match a query. Useful for finding relevant terms before generating SQL.',
333
+ inputSchema: {
334
+ type: 'object',
335
+ properties: {
336
+ query: {
337
+ type: 'string',
338
+ description: 'Search query to match against term names and synonyms.',
339
+ },
340
+ },
341
+ required: ['query'],
342
+ },
343
+ },
344
+ {
345
+ name: 'enhance_query',
346
+ description: 'Enhance a natural language query by matching it against the business glossary. Returns suggested SQL conditions based on matched terms. Requires ANTHROPIC_API_KEY.',
347
+ inputSchema: {
348
+ type: 'object',
349
+ properties: {
350
+ query: {
351
+ type: 'string',
352
+ description: 'Natural language query to enhance (e.g., "활성 사용자 중 최근 주문한 VIP 고객")',
353
+ },
354
+ },
355
+ required: ['query'],
356
+ },
357
+ },
266
358
  ];
267
359
  }
268
360
 
@@ -298,6 +390,17 @@ export async function handleToolCall(
298
390
  return handleAddQueryExample(args, context);
299
391
  case 'add_business_rule':
300
392
  return handleAddBusinessRule(args, context);
393
+ // === Glossary Tools ===
394
+ case 'generate_glossary':
395
+ return handleGenerateGlossary(args, context);
396
+ case 'add_term':
397
+ return handleAddTerm(args, context);
398
+ case 'list_terms':
399
+ return handleListTerms(args, context);
400
+ case 'search_terms':
401
+ return handleSearchTerms(args, context);
402
+ case 'enhance_query':
403
+ return handleEnhanceQuery(args, context);
301
404
  default:
302
405
  throw new Error(`Unknown tool: ${name}`);
303
406
  }
@@ -820,6 +923,318 @@ async function handleAddBusinessRule(
820
923
  };
821
924
  }
822
925
 
926
+ // ============================================================
927
+ // Glossary Tool Handlers
928
+ // ============================================================
929
+
930
+ /**
931
+ * Handle generate_glossary tool - AI-powered glossary generation
932
+ */
933
+ async function handleGenerateGlossary(
934
+ args: Record<string, unknown>,
935
+ context: ToolContext
936
+ ): Promise<unknown> {
937
+ const terms = args.terms as string;
938
+
939
+ if (!terms) {
940
+ throw new Error('terms is required');
941
+ }
942
+
943
+ // Check if LLM is available
944
+ const { isLLMAvailable, createLLMService } = await import('../core/llm-service.js');
945
+
946
+ if (!isLLMAvailable()) {
947
+ return {
948
+ success: false,
949
+ error: 'ANTHROPIC_API_KEY not configured. Set the environment variable to use AI-powered glossary generation.',
950
+ tip: 'You can still add terms manually using the add_term tool.',
951
+ };
952
+ }
953
+
954
+ try {
955
+ // Get schema context
956
+ const schemaInfo = await context.adapter.getSchema();
957
+ const schemaContext = {
958
+ tables: schemaInfo.tables.slice(0, 20).map(table => ({
959
+ name: table.name,
960
+ columns: table.columns.map(c => ({
961
+ name: c.name,
962
+ type: c.dataType,
963
+ nullable: c.isNullable,
964
+ })),
965
+ })),
966
+ existingTerms: context.knowledge.getBusinessTerms(),
967
+ };
968
+
969
+ const llm = createLLMService();
970
+ const generatedTerms = await llm.generateGlossary(
971
+ terms,
972
+ schemaContext,
973
+ context.knowledge.getSchemaHash()
974
+ );
975
+
976
+ // Add to knowledge store
977
+ const added = await context.knowledge.addBusinessTerms(generatedTerms);
978
+
979
+ return {
980
+ success: true,
981
+ generated: added.length,
982
+ terms: added.map(t => ({
983
+ term: t.term,
984
+ definition: t.definition,
985
+ sql: t.sqlExpression,
986
+ category: t.category,
987
+ tables: t.appliesTo?.tables,
988
+ })),
989
+ message: `Generated and added ${added.length} business term(s) to glossary.`,
990
+ tip: 'These terms will now be used to enhance query understanding.',
991
+ };
992
+ } catch (error) {
993
+ return {
994
+ success: false,
995
+ error: error instanceof Error ? error.message : String(error),
996
+ tip: 'Check your API key and try again with simpler term definitions.',
997
+ };
998
+ }
999
+ }
1000
+
1001
+ /**
1002
+ * Handle add_term tool - manual term addition
1003
+ */
1004
+ async function handleAddTerm(
1005
+ args: Record<string, unknown>,
1006
+ context: ToolContext
1007
+ ): Promise<unknown> {
1008
+ const term = args.term as string;
1009
+ const definition = args.definition as string;
1010
+ const sql = args.sql as string | undefined;
1011
+ const synonyms = args.synonyms as string[] | undefined;
1012
+ const tables = args.tables as string[] | undefined;
1013
+
1014
+ if (!term) throw new Error('term is required');
1015
+ if (!definition) throw new Error('definition is required');
1016
+
1017
+ const added = await context.knowledge.addBusinessTerm(term, definition, {
1018
+ sqlExpression: sql,
1019
+ synonyms,
1020
+ appliesTo: tables ? { tables } : undefined,
1021
+ });
1022
+
1023
+ return {
1024
+ success: true,
1025
+ term: added.term,
1026
+ definition: added.definition,
1027
+ sql: added.sqlExpression,
1028
+ synonyms: added.synonyms,
1029
+ tables: added.appliesTo?.tables,
1030
+ message: `Added term "${term}" to glossary.`,
1031
+ tip: 'This term will be used in query context when relevant.',
1032
+ };
1033
+ }
1034
+
1035
+ /**
1036
+ * Handle list_terms tool
1037
+ */
1038
+ async function handleListTerms(
1039
+ args: Record<string, unknown>,
1040
+ context: ToolContext
1041
+ ): Promise<unknown> {
1042
+ const category = args.category as string | undefined;
1043
+ const table = args.table as string | undefined;
1044
+
1045
+ let terms = context.knowledge.getActiveTerms();
1046
+
1047
+ // Apply filters
1048
+ if (category) {
1049
+ terms = terms.filter(t => t.category === category);
1050
+ }
1051
+ if (table) {
1052
+ terms = terms.filter(t => t.appliesTo?.tables?.includes(table));
1053
+ }
1054
+
1055
+ if (terms.length === 0) {
1056
+ return {
1057
+ count: 0,
1058
+ terms: [],
1059
+ message: 'No business terms found.',
1060
+ tip: 'Add terms using add_term or generate_glossary.',
1061
+ };
1062
+ }
1063
+
1064
+ return {
1065
+ count: terms.length,
1066
+ terms: terms.map(t => ({
1067
+ id: t.id,
1068
+ term: t.term,
1069
+ synonyms: t.synonyms,
1070
+ definition: t.definition,
1071
+ sql: t.sqlExpression,
1072
+ category: t.category,
1073
+ tables: t.appliesTo?.tables,
1074
+ isActive: t.isActive,
1075
+ })),
1076
+ categories: [...new Set(terms.map(t => t.category).filter(Boolean))],
1077
+ };
1078
+ }
1079
+
1080
+ /**
1081
+ * Handle search_terms tool
1082
+ */
1083
+ async function handleSearchTerms(
1084
+ args: Record<string, unknown>,
1085
+ context: ToolContext
1086
+ ): Promise<unknown> {
1087
+ const query = args.query as string;
1088
+
1089
+ if (!query) {
1090
+ throw new Error('query is required');
1091
+ }
1092
+
1093
+ const terms = context.knowledge.findMatchingTerms(query);
1094
+
1095
+ if (terms.length === 0) {
1096
+ return {
1097
+ count: 0,
1098
+ terms: [],
1099
+ message: `No terms found matching "${query}".`,
1100
+ tip: 'Try a broader search or check available terms with list_terms.',
1101
+ };
1102
+ }
1103
+
1104
+ return {
1105
+ query,
1106
+ count: terms.length,
1107
+ terms: terms.map(t => ({
1108
+ term: t.term,
1109
+ synonyms: t.synonyms,
1110
+ definition: t.definition,
1111
+ sql: t.sqlExpression,
1112
+ category: t.category,
1113
+ relevance: calculateTermRelevance(query, t),
1114
+ })),
1115
+ suggestedConditions: terms
1116
+ .filter(t => t.sqlExpression)
1117
+ .map(t => t.sqlExpression as string),
1118
+ message: `Found ${terms.length} term(s) matching "${query}".`,
1119
+ };
1120
+ }
1121
+
1122
+ /**
1123
+ * Handle enhance_query tool - AI-powered query enhancement
1124
+ */
1125
+ async function handleEnhanceQuery(
1126
+ args: Record<string, unknown>,
1127
+ context: ToolContext
1128
+ ): Promise<unknown> {
1129
+ const query = args.query as string;
1130
+
1131
+ if (!query) {
1132
+ throw new Error('query is required');
1133
+ }
1134
+
1135
+ // First try local matching (no LLM)
1136
+ const localMatches = context.knowledge.findMatchingTerms(query);
1137
+
1138
+ if (localMatches.length > 0) {
1139
+ const suggestedConditions = localMatches
1140
+ .filter(t => t.sqlExpression)
1141
+ .map(t => t.sqlExpression as string);
1142
+
1143
+ return {
1144
+ success: true,
1145
+ method: 'local',
1146
+ query,
1147
+ enhancedQuery: query,
1148
+ usedTerms: localMatches.map(t => t.term),
1149
+ suggestedConditions,
1150
+ terms: localMatches.map(t => ({
1151
+ term: t.term,
1152
+ sql: t.sqlExpression,
1153
+ category: t.category,
1154
+ })),
1155
+ tip: 'Use these SQL conditions in your WHERE clause.',
1156
+ };
1157
+ }
1158
+
1159
+ // Try LLM enhancement if available
1160
+ const { isLLMAvailable, createLLMService } = await import('../core/llm-service.js');
1161
+
1162
+ if (!isLLMAvailable()) {
1163
+ return {
1164
+ success: true,
1165
+ method: 'local',
1166
+ query,
1167
+ enhancedQuery: query,
1168
+ usedTerms: [],
1169
+ suggestedConditions: [],
1170
+ message: 'No matching terms found locally. Configure ANTHROPIC_API_KEY for AI-powered enhancement.',
1171
+ };
1172
+ }
1173
+
1174
+ try {
1175
+ const terms = context.knowledge.getActiveTerms();
1176
+
1177
+ if (terms.length === 0) {
1178
+ return {
1179
+ success: true,
1180
+ method: 'local',
1181
+ query,
1182
+ enhancedQuery: query,
1183
+ usedTerms: [],
1184
+ suggestedConditions: [],
1185
+ message: 'No terms in glossary. Add terms using add_term or generate_glossary.',
1186
+ };
1187
+ }
1188
+
1189
+ const llm = createLLMService();
1190
+ const result = await llm.enhanceQueryWithGlossary(query, terms);
1191
+
1192
+ return {
1193
+ success: true,
1194
+ method: 'ai',
1195
+ query,
1196
+ enhancedQuery: result.enhancedQuery,
1197
+ usedTerms: result.usedTerms,
1198
+ suggestedConditions: result.suggestedConditions,
1199
+ message: result.usedTerms.length > 0
1200
+ ? `Found ${result.usedTerms.length} matching term(s): ${result.usedTerms.join(', ')}`
1201
+ : 'No matching terms found.',
1202
+ tip: 'Apply suggestedConditions to your SQL WHERE clause.',
1203
+ };
1204
+ } catch (error) {
1205
+ return {
1206
+ success: false,
1207
+ error: error instanceof Error ? error.message : String(error),
1208
+ tip: 'Fallback to local term matching.',
1209
+ };
1210
+ }
1211
+ }
1212
+
1213
+ /**
1214
+ * Calculate term relevance score
1215
+ */
1216
+ function calculateTermRelevance(query: string, term: { term: string; synonyms: string[] }): number {
1217
+ const lowerQuery = query.toLowerCase();
1218
+ const lowerTerm = term.term.toLowerCase();
1219
+
1220
+ // Exact match = 1.0
1221
+ if (lowerTerm === lowerQuery) return 1.0;
1222
+
1223
+ // Term contains query = 0.8
1224
+ if (lowerTerm.includes(lowerQuery)) return 0.8;
1225
+
1226
+ // Query contains term = 0.7
1227
+ if (lowerQuery.includes(lowerTerm)) return 0.7;
1228
+
1229
+ // Synonym match
1230
+ for (const syn of term.synonyms) {
1231
+ if (syn.toLowerCase() === lowerQuery) return 0.9;
1232
+ if (syn.toLowerCase().includes(lowerQuery)) return 0.6;
1233
+ }
1234
+
1235
+ return 0.5;
1236
+ }
1237
+
823
1238
  // ============================================================
824
1239
  // Utility Functions
825
1240
  // ============================================================