@kiyeonjeon21/datacontext 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.cursorrules +12 -0
  2. package/.env.example +8 -0
  3. package/.github/workflows/ci.yml +21 -1
  4. package/.github/workflows/publish.yml +21 -1
  5. package/CHANGELOG.md +41 -0
  6. package/README.md +247 -239
  7. package/cursor-mcp-config.json.example +29 -0
  8. package/datacontext.db +0 -0
  9. package/dist/api/server.d.ts.map +1 -1
  10. package/dist/api/server.js +145 -0
  11. package/dist/api/server.js.map +1 -1
  12. package/dist/api/start-server.d.ts +10 -0
  13. package/dist/api/start-server.d.ts.map +1 -0
  14. package/dist/api/start-server.js +73 -0
  15. package/dist/api/start-server.js.map +1 -0
  16. package/dist/cli/index.js +462 -0
  17. package/dist/cli/index.js.map +1 -1
  18. package/dist/core/context-service.d.ts +72 -0
  19. package/dist/core/context-service.d.ts.map +1 -1
  20. package/dist/core/context-service.js +132 -0
  21. package/dist/core/context-service.js.map +1 -1
  22. package/dist/core/index.d.ts +2 -0
  23. package/dist/core/index.d.ts.map +1 -1
  24. package/dist/core/index.js +5 -1
  25. package/dist/core/index.js.map +1 -1
  26. package/dist/core/llm-service.d.ts +141 -0
  27. package/dist/core/llm-service.d.ts.map +1 -0
  28. package/dist/core/llm-service.js +284 -0
  29. package/dist/core/llm-service.js.map +1 -0
  30. package/dist/knowledge/store.d.ts +56 -3
  31. package/dist/knowledge/store.d.ts.map +1 -1
  32. package/dist/knowledge/store.js +193 -7
  33. package/dist/knowledge/store.js.map +1 -1
  34. package/dist/knowledge/types.d.ts +43 -1
  35. package/dist/knowledge/types.d.ts.map +1 -1
  36. package/dist/knowledge/types.js.map +1 -1
  37. package/dist/mcp/tools.d.ts.map +1 -1
  38. package/dist/mcp/tools.js +365 -0
  39. package/dist/mcp/tools.js.map +1 -1
  40. package/docs/API.md +173 -0
  41. package/docs/DEMO_SCRIPT.md +210 -0
  42. package/docs/MCP_TEST_GUIDE.md +414 -0
  43. package/docs/SYNC_GUIDE.md +242 -0
  44. package/package.json +4 -1
  45. package/src/api/server.ts +160 -0
  46. package/src/api/start-server.ts +78 -0
  47. package/src/cli/index.ts +534 -0
  48. package/src/core/context-service.ts +182 -0
  49. package/src/core/index.ts +7 -0
  50. package/src/core/llm-service.ts +359 -0
  51. package/src/knowledge/store.ts +232 -7
  52. package/src/knowledge/types.ts +45 -1
  53. package/src/mcp/tools.ts +415 -0
  54. package/test-glossary.yaml +55 -0
  55. package/test-mcp.db +0 -0
package/src/mcp/tools.ts CHANGED
@@ -263,6 +263,98 @@ export function getMcpTools(): Tool[] {
263
263
  required: ['name', 'description', 'tables'],
264
264
  },
265
265
  },
266
+ // === Glossary Tools ===
267
+ {
268
+ name: 'generate_glossary',
269
+ description: 'Generate business glossary/terms from user input using AI. Takes natural language term definitions and creates structured SQL-ready glossary entries. Requires ANTHROPIC_API_KEY to be set.',
270
+ inputSchema: {
271
+ type: 'object',
272
+ properties: {
273
+ terms: {
274
+ type: 'string',
275
+ description: 'Raw term definitions in natural language. Can be comma-separated, one per line, or structured (YAML-like). Example: "활성 사용자 = status가 1인 사용자\\n최근 주문 = 30일 이내 주문\\nVIP = 주문 10건 이상"',
276
+ },
277
+ },
278
+ required: ['terms'],
279
+ },
280
+ },
281
+ {
282
+ name: 'add_term',
283
+ description: 'Add a single business term to the glossary manually.',
284
+ inputSchema: {
285
+ type: 'object',
286
+ properties: {
287
+ term: {
288
+ type: 'string',
289
+ description: 'The business term (e.g., "활성 사용자", "active user").',
290
+ },
291
+ definition: {
292
+ type: 'string',
293
+ description: 'Human-readable definition of the term.',
294
+ },
295
+ sql: {
296
+ type: 'string',
297
+ description: 'SQL expression for this term (e.g., "status = 1").',
298
+ },
299
+ synonyms: {
300
+ type: 'array',
301
+ items: { type: 'string' },
302
+ description: 'Alternative names for this term.',
303
+ },
304
+ tables: {
305
+ type: 'array',
306
+ items: { type: 'string' },
307
+ description: 'Tables this term applies to.',
308
+ },
309
+ },
310
+ required: ['term', 'definition'],
311
+ },
312
+ },
313
+ {
314
+ name: 'list_terms',
315
+ description: 'List all business terms in the glossary.',
316
+ inputSchema: {
317
+ type: 'object',
318
+ properties: {
319
+ category: {
320
+ type: 'string',
321
+ description: 'Filter by category (status, time, money, entity, metric, filter, custom).',
322
+ },
323
+ table: {
324
+ type: 'string',
325
+ description: 'Filter by table name.',
326
+ },
327
+ },
328
+ },
329
+ },
330
+ {
331
+ name: 'search_terms',
332
+ description: 'Search for business terms that match a query. Useful for finding relevant terms before generating SQL.',
333
+ inputSchema: {
334
+ type: 'object',
335
+ properties: {
336
+ query: {
337
+ type: 'string',
338
+ description: 'Search query to match against term names and synonyms.',
339
+ },
340
+ },
341
+ required: ['query'],
342
+ },
343
+ },
344
+ {
345
+ name: 'enhance_query',
346
+ description: 'Enhance a natural language query by matching it against the business glossary. Returns suggested SQL conditions based on matched terms. Requires ANTHROPIC_API_KEY.',
347
+ inputSchema: {
348
+ type: 'object',
349
+ properties: {
350
+ query: {
351
+ type: 'string',
352
+ description: 'Natural language query to enhance (e.g., "활성 사용자 중 최근 주문한 VIP 고객")',
353
+ },
354
+ },
355
+ required: ['query'],
356
+ },
357
+ },
266
358
  ];
267
359
  }
268
360
 
@@ -298,6 +390,17 @@ export async function handleToolCall(
298
390
  return handleAddQueryExample(args, context);
299
391
  case 'add_business_rule':
300
392
  return handleAddBusinessRule(args, context);
393
+ // === Glossary Tools ===
394
+ case 'generate_glossary':
395
+ return handleGenerateGlossary(args, context);
396
+ case 'add_term':
397
+ return handleAddTerm(args, context);
398
+ case 'list_terms':
399
+ return handleListTerms(args, context);
400
+ case 'search_terms':
401
+ return handleSearchTerms(args, context);
402
+ case 'enhance_query':
403
+ return handleEnhanceQuery(args, context);
301
404
  default:
302
405
  throw new Error(`Unknown tool: ${name}`);
303
406
  }
@@ -820,6 +923,318 @@ async function handleAddBusinessRule(
820
923
  };
821
924
  }
822
925
 
926
+ // ============================================================
927
+ // Glossary Tool Handlers
928
+ // ============================================================
929
+
930
+ /**
931
+ * Handle generate_glossary tool - AI-powered glossary generation
932
+ */
933
+ async function handleGenerateGlossary(
934
+ args: Record<string, unknown>,
935
+ context: ToolContext
936
+ ): Promise<unknown> {
937
+ const terms = args.terms as string;
938
+
939
+ if (!terms) {
940
+ throw new Error('terms is required');
941
+ }
942
+
943
+ // Check if LLM is available
944
+ const { isLLMAvailable, createLLMService } = await import('../core/llm-service.js');
945
+
946
+ if (!isLLMAvailable()) {
947
+ return {
948
+ success: false,
949
+ error: 'ANTHROPIC_API_KEY not configured. Set the environment variable to use AI-powered glossary generation.',
950
+ tip: 'You can still add terms manually using the add_term tool.',
951
+ };
952
+ }
953
+
954
+ try {
955
+ // Get schema context
956
+ const schemaInfo = await context.adapter.getSchema();
957
+ const schemaContext = {
958
+ tables: schemaInfo.tables.slice(0, 20).map(table => ({
959
+ name: table.name,
960
+ columns: table.columns.map(c => ({
961
+ name: c.name,
962
+ type: c.dataType,
963
+ nullable: c.isNullable,
964
+ })),
965
+ })),
966
+ existingTerms: context.knowledge.getBusinessTerms(),
967
+ };
968
+
969
+ const llm = createLLMService();
970
+ const generatedTerms = await llm.generateGlossary(
971
+ terms,
972
+ schemaContext,
973
+ context.knowledge.getSchemaHash()
974
+ );
975
+
976
+ // Add to knowledge store
977
+ const added = await context.knowledge.addBusinessTerms(generatedTerms);
978
+
979
+ return {
980
+ success: true,
981
+ generated: added.length,
982
+ terms: added.map(t => ({
983
+ term: t.term,
984
+ definition: t.definition,
985
+ sql: t.sqlExpression,
986
+ category: t.category,
987
+ tables: t.appliesTo?.tables,
988
+ })),
989
+ message: `Generated and added ${added.length} business term(s) to glossary.`,
990
+ tip: 'These terms will now be used to enhance query understanding.',
991
+ };
992
+ } catch (error) {
993
+ return {
994
+ success: false,
995
+ error: error instanceof Error ? error.message : String(error),
996
+ tip: 'Check your API key and try again with simpler term definitions.',
997
+ };
998
+ }
999
+ }
1000
+
1001
+ /**
1002
+ * Handle add_term tool - manual term addition
1003
+ */
1004
+ async function handleAddTerm(
1005
+ args: Record<string, unknown>,
1006
+ context: ToolContext
1007
+ ): Promise<unknown> {
1008
+ const term = args.term as string;
1009
+ const definition = args.definition as string;
1010
+ const sql = args.sql as string | undefined;
1011
+ const synonyms = args.synonyms as string[] | undefined;
1012
+ const tables = args.tables as string[] | undefined;
1013
+
1014
+ if (!term) throw new Error('term is required');
1015
+ if (!definition) throw new Error('definition is required');
1016
+
1017
+ const added = await context.knowledge.addBusinessTerm(term, definition, {
1018
+ sqlExpression: sql,
1019
+ synonyms,
1020
+ appliesTo: tables ? { tables } : undefined,
1021
+ });
1022
+
1023
+ return {
1024
+ success: true,
1025
+ term: added.term,
1026
+ definition: added.definition,
1027
+ sql: added.sqlExpression,
1028
+ synonyms: added.synonyms,
1029
+ tables: added.appliesTo?.tables,
1030
+ message: `Added term "${term}" to glossary.`,
1031
+ tip: 'This term will be used in query context when relevant.',
1032
+ };
1033
+ }
1034
+
1035
+ /**
1036
+ * Handle list_terms tool
1037
+ */
1038
+ async function handleListTerms(
1039
+ args: Record<string, unknown>,
1040
+ context: ToolContext
1041
+ ): Promise<unknown> {
1042
+ const category = args.category as string | undefined;
1043
+ const table = args.table as string | undefined;
1044
+
1045
+ let terms = context.knowledge.getActiveTerms();
1046
+
1047
+ // Apply filters
1048
+ if (category) {
1049
+ terms = terms.filter(t => t.category === category);
1050
+ }
1051
+ if (table) {
1052
+ terms = terms.filter(t => t.appliesTo?.tables?.includes(table));
1053
+ }
1054
+
1055
+ if (terms.length === 0) {
1056
+ return {
1057
+ count: 0,
1058
+ terms: [],
1059
+ message: 'No business terms found.',
1060
+ tip: 'Add terms using add_term or generate_glossary.',
1061
+ };
1062
+ }
1063
+
1064
+ return {
1065
+ count: terms.length,
1066
+ terms: terms.map(t => ({
1067
+ id: t.id,
1068
+ term: t.term,
1069
+ synonyms: t.synonyms,
1070
+ definition: t.definition,
1071
+ sql: t.sqlExpression,
1072
+ category: t.category,
1073
+ tables: t.appliesTo?.tables,
1074
+ isActive: t.isActive,
1075
+ })),
1076
+ categories: [...new Set(terms.map(t => t.category).filter(Boolean))],
1077
+ };
1078
+ }
1079
+
1080
+ /**
1081
+ * Handle search_terms tool
1082
+ */
1083
+ async function handleSearchTerms(
1084
+ args: Record<string, unknown>,
1085
+ context: ToolContext
1086
+ ): Promise<unknown> {
1087
+ const query = args.query as string;
1088
+
1089
+ if (!query) {
1090
+ throw new Error('query is required');
1091
+ }
1092
+
1093
+ const terms = context.knowledge.findMatchingTerms(query);
1094
+
1095
+ if (terms.length === 0) {
1096
+ return {
1097
+ count: 0,
1098
+ terms: [],
1099
+ message: `No terms found matching "${query}".`,
1100
+ tip: 'Try a broader search or check available terms with list_terms.',
1101
+ };
1102
+ }
1103
+
1104
+ return {
1105
+ query,
1106
+ count: terms.length,
1107
+ terms: terms.map(t => ({
1108
+ term: t.term,
1109
+ synonyms: t.synonyms,
1110
+ definition: t.definition,
1111
+ sql: t.sqlExpression,
1112
+ category: t.category,
1113
+ relevance: calculateTermRelevance(query, t),
1114
+ })),
1115
+ suggestedConditions: terms
1116
+ .filter(t => t.sqlExpression)
1117
+ .map(t => t.sqlExpression as string),
1118
+ message: `Found ${terms.length} term(s) matching "${query}".`,
1119
+ };
1120
+ }
1121
+
1122
+ /**
1123
+ * Handle enhance_query tool - AI-powered query enhancement
1124
+ */
1125
+ async function handleEnhanceQuery(
1126
+ args: Record<string, unknown>,
1127
+ context: ToolContext
1128
+ ): Promise<unknown> {
1129
+ const query = args.query as string;
1130
+
1131
+ if (!query) {
1132
+ throw new Error('query is required');
1133
+ }
1134
+
1135
+ // First try local matching (no LLM)
1136
+ const localMatches = context.knowledge.findMatchingTerms(query);
1137
+
1138
+ if (localMatches.length > 0) {
1139
+ const suggestedConditions = localMatches
1140
+ .filter(t => t.sqlExpression)
1141
+ .map(t => t.sqlExpression as string);
1142
+
1143
+ return {
1144
+ success: true,
1145
+ method: 'local',
1146
+ query,
1147
+ enhancedQuery: query,
1148
+ usedTerms: localMatches.map(t => t.term),
1149
+ suggestedConditions,
1150
+ terms: localMatches.map(t => ({
1151
+ term: t.term,
1152
+ sql: t.sqlExpression,
1153
+ category: t.category,
1154
+ })),
1155
+ tip: 'Use these SQL conditions in your WHERE clause.',
1156
+ };
1157
+ }
1158
+
1159
+ // Try LLM enhancement if available
1160
+ const { isLLMAvailable, createLLMService } = await import('../core/llm-service.js');
1161
+
1162
+ if (!isLLMAvailable()) {
1163
+ return {
1164
+ success: true,
1165
+ method: 'local',
1166
+ query,
1167
+ enhancedQuery: query,
1168
+ usedTerms: [],
1169
+ suggestedConditions: [],
1170
+ message: 'No matching terms found locally. Configure ANTHROPIC_API_KEY for AI-powered enhancement.',
1171
+ };
1172
+ }
1173
+
1174
+ try {
1175
+ const terms = context.knowledge.getActiveTerms();
1176
+
1177
+ if (terms.length === 0) {
1178
+ return {
1179
+ success: true,
1180
+ method: 'local',
1181
+ query,
1182
+ enhancedQuery: query,
1183
+ usedTerms: [],
1184
+ suggestedConditions: [],
1185
+ message: 'No terms in glossary. Add terms using add_term or generate_glossary.',
1186
+ };
1187
+ }
1188
+
1189
+ const llm = createLLMService();
1190
+ const result = await llm.enhanceQueryWithGlossary(query, terms);
1191
+
1192
+ return {
1193
+ success: true,
1194
+ method: 'ai',
1195
+ query,
1196
+ enhancedQuery: result.enhancedQuery,
1197
+ usedTerms: result.usedTerms,
1198
+ suggestedConditions: result.suggestedConditions,
1199
+ message: result.usedTerms.length > 0
1200
+ ? `Found ${result.usedTerms.length} matching term(s): ${result.usedTerms.join(', ')}`
1201
+ : 'No matching terms found.',
1202
+ tip: 'Apply suggestedConditions to your SQL WHERE clause.',
1203
+ };
1204
+ } catch (error) {
1205
+ return {
1206
+ success: false,
1207
+ error: error instanceof Error ? error.message : String(error),
1208
+ tip: 'Fallback to local term matching.',
1209
+ };
1210
+ }
1211
+ }
1212
+
1213
+ /**
1214
+ * Calculate term relevance score
1215
+ */
1216
+ function calculateTermRelevance(query: string, term: { term: string; synonyms: string[] }): number {
1217
+ const lowerQuery = query.toLowerCase();
1218
+ const lowerTerm = term.term.toLowerCase();
1219
+
1220
+ // Exact match = 1.0
1221
+ if (lowerTerm === lowerQuery) return 1.0;
1222
+
1223
+ // Term contains query = 0.8
1224
+ if (lowerTerm.includes(lowerQuery)) return 0.8;
1225
+
1226
+ // Query contains term = 0.7
1227
+ if (lowerQuery.includes(lowerTerm)) return 0.7;
1228
+
1229
+ // Synonym match
1230
+ for (const syn of term.synonyms) {
1231
+ if (syn.toLowerCase() === lowerQuery) return 0.9;
1232
+ if (syn.toLowerCase().includes(lowerQuery)) return 0.6;
1233
+ }
1234
+
1235
+ return 0.5;
1236
+ }
1237
+
823
1238
  // ============================================================
824
1239
  // Utility Functions
825
1240
  // ============================================================
@@ -0,0 +1,55 @@
1
+ version: 1.0.0
2
+ exportedAt: "2025-12-31T19:42:09.637Z"
3
+ terms:
4
+ -
5
+ term: 최근 주문
6
+ definition: 30일 이내에 발생한 주문
7
+ sql: order_date >= CURRENT_DATE - INTERVAL '30 days'
8
+ synonyms: ["recent orders", "30일 이내 주문", "30일내 주문", "최근 30일 주문"]
9
+ tables: ["orders"]
10
+ columns: ["order_date"]
11
+ category: time
12
+ examples: ["최근 주문 목록을 보여줘", "30일 이내 주문한 고객들", "recent orders analysis"]
13
+ -
14
+ term: VIP 고객
15
+ definition: 총 주문 건수가 10건 이상인 고객
16
+ sql: COUNT(order_id) >= 10
17
+ synonyms: ["VIP customer", "VIP customers", "우수 고객", "10건 이상 주문 고객"]
18
+ tables: ["customers", "orders"]
19
+ columns: ["customer_id"]
20
+ category: custom
21
+ examples: ["VIP 고객 리스트", "주문 10건 이상인 고객들", "VIP customers with high order volume"]
22
+ -
23
+ term: 신규 가입자
24
+ definition: 7일 이내에 가입한 사용자
25
+ sql: created_at >= DATE_SUB(CURRENT_DATE, INTERVAL 7 DAY)
26
+ synonyms: ["새로운 사용자", "신규 회원", "new users", "recent signups"]
27
+ tables: ["users"]
28
+ columns: ["created_at"]
29
+ category: filter
30
+ examples: ["신규 가입자 목록을 보여주세요", "지난 주에 가입한 신규 가입자는 몇 명인가요?", "신규 가입자들의 평균 나이는?", "Show me new users this week"]
31
+ -
32
+ term: 활성 사용자
33
+ definition: status가 1인 사용자
34
+ sql: status = 1
35
+ synonyms: ["active user", "활성화된 사용자"]
36
+ tables: ["users"]
37
+ category: status
38
+ -
39
+ term: 프리미엄 회원
40
+ definition: 월 구독료가 10,000원 이상인 회원
41
+ sql: subscription_fee >= 10000
42
+ synonyms: ["premium member", "premium user", "프리미엄 사용자", "유료 회원"]
43
+ tables: ["users", "subscriptions"]
44
+ columns: ["subscription_fee", "monthly_fee"]
45
+ category: filter
46
+ examples: ["프리미엄 회원들의 평균 사용량은?", "프리미엄 회원 중에서 활성 사용자는 몇 명인가요?", "이번 달 프리미엄 회원 가입자 수는?"]
47
+ -
48
+ term: 휴면 계정
49
+ definition: 마지막 로그인으로부터 90일 이상 지난 계정
50
+ sql: last_login_date < DATE_SUB(NOW(), INTERVAL 90 DAY)
51
+ synonyms: ["dormant account", "inactive user", "비활성 계정", "장기 미접속자"]
52
+ tables: ["users"]
53
+ columns: ["last_login_date", "last_access_date"]
54
+ category: status
55
+ examples: ["휴면 계정은 총 몇 개인가요?", "휴면 계정 중에서 VIP 고객은?", "휴면 계정을 활성화시키려면?"]
package/test-mcp.db ADDED
Binary file