db-mcp 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +860 -0
  3. package/dist/adapters/DatabaseAdapter.d.ts +141 -0
  4. package/dist/adapters/DatabaseAdapter.d.ts.map +1 -0
  5. package/dist/adapters/DatabaseAdapter.js +131 -0
  6. package/dist/adapters/DatabaseAdapter.js.map +1 -0
  7. package/dist/adapters/sqlite/SchemaManager.d.ts +58 -0
  8. package/dist/adapters/sqlite/SchemaManager.d.ts.map +1 -0
  9. package/dist/adapters/sqlite/SchemaManager.js +187 -0
  10. package/dist/adapters/sqlite/SchemaManager.js.map +1 -0
  11. package/dist/adapters/sqlite/SqliteAdapter.d.ts +161 -0
  12. package/dist/adapters/sqlite/SqliteAdapter.d.ts.map +1 -0
  13. package/dist/adapters/sqlite/SqliteAdapter.js +741 -0
  14. package/dist/adapters/sqlite/SqliteAdapter.js.map +1 -0
  15. package/dist/adapters/sqlite/index.d.ts +9 -0
  16. package/dist/adapters/sqlite/index.d.ts.map +1 -0
  17. package/dist/adapters/sqlite/index.js +8 -0
  18. package/dist/adapters/sqlite/index.js.map +1 -0
  19. package/dist/adapters/sqlite/json-utils.d.ts +100 -0
  20. package/dist/adapters/sqlite/json-utils.d.ts.map +1 -0
  21. package/dist/adapters/sqlite/json-utils.js +274 -0
  22. package/dist/adapters/sqlite/json-utils.js.map +1 -0
  23. package/dist/adapters/sqlite/output-schemas.d.ts +1187 -0
  24. package/dist/adapters/sqlite/output-schemas.d.ts.map +1 -0
  25. package/dist/adapters/sqlite/output-schemas.js +1337 -0
  26. package/dist/adapters/sqlite/output-schemas.js.map +1 -0
  27. package/dist/adapters/sqlite/prompts.d.ts +13 -0
  28. package/dist/adapters/sqlite/prompts.d.ts.map +1 -0
  29. package/dist/adapters/sqlite/prompts.js +605 -0
  30. package/dist/adapters/sqlite/prompts.js.map +1 -0
  31. package/dist/adapters/sqlite/resources.d.ts +13 -0
  32. package/dist/adapters/sqlite/resources.d.ts.map +1 -0
  33. package/dist/adapters/sqlite/resources.js +251 -0
  34. package/dist/adapters/sqlite/resources.js.map +1 -0
  35. package/dist/adapters/sqlite/tools/admin.d.ts +14 -0
  36. package/dist/adapters/sqlite/tools/admin.d.ts.map +1 -0
  37. package/dist/adapters/sqlite/tools/admin.js +788 -0
  38. package/dist/adapters/sqlite/tools/admin.js.map +1 -0
  39. package/dist/adapters/sqlite/tools/core.d.ts +25 -0
  40. package/dist/adapters/sqlite/tools/core.d.ts.map +1 -0
  41. package/dist/adapters/sqlite/tools/core.js +359 -0
  42. package/dist/adapters/sqlite/tools/core.js.map +1 -0
  43. package/dist/adapters/sqlite/tools/fts.d.ts +13 -0
  44. package/dist/adapters/sqlite/tools/fts.d.ts.map +1 -0
  45. package/dist/adapters/sqlite/tools/fts.js +347 -0
  46. package/dist/adapters/sqlite/tools/fts.js.map +1 -0
  47. package/dist/adapters/sqlite/tools/geo.d.ts +14 -0
  48. package/dist/adapters/sqlite/tools/geo.d.ts.map +1 -0
  49. package/dist/adapters/sqlite/tools/geo.js +252 -0
  50. package/dist/adapters/sqlite/tools/geo.js.map +1 -0
  51. package/dist/adapters/sqlite/tools/index.d.ts +30 -0
  52. package/dist/adapters/sqlite/tools/index.d.ts.map +1 -0
  53. package/dist/adapters/sqlite/tools/index.js +61 -0
  54. package/dist/adapters/sqlite/tools/index.js.map +1 -0
  55. package/dist/adapters/sqlite/tools/json-helpers.d.ts +14 -0
  56. package/dist/adapters/sqlite/tools/json-helpers.d.ts.map +1 -0
  57. package/dist/adapters/sqlite/tools/json-helpers.js +477 -0
  58. package/dist/adapters/sqlite/tools/json-helpers.js.map +1 -0
  59. package/dist/adapters/sqlite/tools/json-operations.d.ts +14 -0
  60. package/dist/adapters/sqlite/tools/json-operations.d.ts.map +1 -0
  61. package/dist/adapters/sqlite/tools/json-operations.js +839 -0
  62. package/dist/adapters/sqlite/tools/json-operations.js.map +1 -0
  63. package/dist/adapters/sqlite/tools/stats.d.ts +15 -0
  64. package/dist/adapters/sqlite/tools/stats.d.ts.map +1 -0
  65. package/dist/adapters/sqlite/tools/stats.js +1219 -0
  66. package/dist/adapters/sqlite/tools/stats.js.map +1 -0
  67. package/dist/adapters/sqlite/tools/text.d.ts +14 -0
  68. package/dist/adapters/sqlite/tools/text.d.ts.map +1 -0
  69. package/dist/adapters/sqlite/tools/text.js +1141 -0
  70. package/dist/adapters/sqlite/tools/text.js.map +1 -0
  71. package/dist/adapters/sqlite/tools/vector.d.ts +14 -0
  72. package/dist/adapters/sqlite/tools/vector.d.ts.map +1 -0
  73. package/dist/adapters/sqlite/tools/vector.js +613 -0
  74. package/dist/adapters/sqlite/tools/vector.js.map +1 -0
  75. package/dist/adapters/sqlite/tools/virtual.d.ts +13 -0
  76. package/dist/adapters/sqlite/tools/virtual.d.ts.map +1 -0
  77. package/dist/adapters/sqlite/tools/virtual.js +930 -0
  78. package/dist/adapters/sqlite/tools/virtual.js.map +1 -0
  79. package/dist/adapters/sqlite/types.d.ts +207 -0
  80. package/dist/adapters/sqlite/types.d.ts.map +1 -0
  81. package/dist/adapters/sqlite/types.js +186 -0
  82. package/dist/adapters/sqlite/types.js.map +1 -0
  83. package/dist/adapters/sqlite-native/NativeSqliteAdapter.d.ts +163 -0
  84. package/dist/adapters/sqlite-native/NativeSqliteAdapter.d.ts.map +1 -0
  85. package/dist/adapters/sqlite-native/NativeSqliteAdapter.js +748 -0
  86. package/dist/adapters/sqlite-native/NativeSqliteAdapter.js.map +1 -0
  87. package/dist/adapters/sqlite-native/index.d.ts +11 -0
  88. package/dist/adapters/sqlite-native/index.d.ts.map +1 -0
  89. package/dist/adapters/sqlite-native/index.js +11 -0
  90. package/dist/adapters/sqlite-native/index.js.map +1 -0
  91. package/dist/adapters/sqlite-native/tools/spatialite.d.ts +19 -0
  92. package/dist/adapters/sqlite-native/tools/spatialite.d.ts.map +1 -0
  93. package/dist/adapters/sqlite-native/tools/spatialite.js +628 -0
  94. package/dist/adapters/sqlite-native/tools/spatialite.js.map +1 -0
  95. package/dist/adapters/sqlite-native/tools/transactions.d.ts +12 -0
  96. package/dist/adapters/sqlite-native/tools/transactions.d.ts.map +1 -0
  97. package/dist/adapters/sqlite-native/tools/transactions.js +255 -0
  98. package/dist/adapters/sqlite-native/tools/transactions.js.map +1 -0
  99. package/dist/adapters/sqlite-native/tools/window.d.ts +12 -0
  100. package/dist/adapters/sqlite-native/tools/window.d.ts.map +1 -0
  101. package/dist/adapters/sqlite-native/tools/window.js +370 -0
  102. package/dist/adapters/sqlite-native/tools/window.js.map +1 -0
  103. package/dist/auth/AuthorizationServerDiscovery.d.ts +90 -0
  104. package/dist/auth/AuthorizationServerDiscovery.d.ts.map +1 -0
  105. package/dist/auth/AuthorizationServerDiscovery.js +204 -0
  106. package/dist/auth/AuthorizationServerDiscovery.js.map +1 -0
  107. package/dist/auth/OAuthResourceServer.d.ts +65 -0
  108. package/dist/auth/OAuthResourceServer.d.ts.map +1 -0
  109. package/dist/auth/OAuthResourceServer.js +121 -0
  110. package/dist/auth/OAuthResourceServer.js.map +1 -0
  111. package/dist/auth/TokenValidator.d.ts +60 -0
  112. package/dist/auth/TokenValidator.d.ts.map +1 -0
  113. package/dist/auth/TokenValidator.js +235 -0
  114. package/dist/auth/TokenValidator.js.map +1 -0
  115. package/dist/auth/errors.d.ts +74 -0
  116. package/dist/auth/errors.d.ts.map +1 -0
  117. package/dist/auth/errors.js +133 -0
  118. package/dist/auth/errors.js.map +1 -0
  119. package/dist/auth/index.d.ts +13 -0
  120. package/dist/auth/index.d.ts.map +1 -0
  121. package/dist/auth/index.js +15 -0
  122. package/dist/auth/index.js.map +1 -0
  123. package/dist/auth/middleware.d.ts +81 -0
  124. package/dist/auth/middleware.d.ts.map +1 -0
  125. package/dist/auth/middleware.js +291 -0
  126. package/dist/auth/middleware.js.map +1 -0
  127. package/dist/auth/scopes.d.ts +136 -0
  128. package/dist/auth/scopes.d.ts.map +1 -0
  129. package/dist/auth/scopes.js +349 -0
  130. package/dist/auth/scopes.js.map +1 -0
  131. package/dist/auth/types.d.ts +257 -0
  132. package/dist/auth/types.d.ts.map +1 -0
  133. package/dist/auth/types.js +8 -0
  134. package/dist/auth/types.js.map +1 -0
  135. package/dist/cli.d.ts +8 -0
  136. package/dist/cli.d.ts.map +1 -0
  137. package/dist/cli.js +236 -0
  138. package/dist/cli.js.map +1 -0
  139. package/dist/constants/ServerInstructions.d.ts +45 -0
  140. package/dist/constants/ServerInstructions.d.ts.map +1 -0
  141. package/dist/constants/ServerInstructions.js +356 -0
  142. package/dist/constants/ServerInstructions.js.map +1 -0
  143. package/dist/filtering/ToolConstants.d.ts +34 -0
  144. package/dist/filtering/ToolConstants.d.ts.map +1 -0
  145. package/dist/filtering/ToolConstants.js +174 -0
  146. package/dist/filtering/ToolConstants.js.map +1 -0
  147. package/dist/filtering/ToolFilter.d.ts +82 -0
  148. package/dist/filtering/ToolFilter.d.ts.map +1 -0
  149. package/dist/filtering/ToolFilter.js +296 -0
  150. package/dist/filtering/ToolFilter.js.map +1 -0
  151. package/dist/index.d.ts +13 -0
  152. package/dist/index.d.ts.map +1 -0
  153. package/dist/index.js +17 -0
  154. package/dist/index.js.map +1 -0
  155. package/dist/server/McpServer.d.ts +61 -0
  156. package/dist/server/McpServer.d.ts.map +1 -0
  157. package/dist/server/McpServer.js +270 -0
  158. package/dist/server/McpServer.js.map +1 -0
  159. package/dist/transports/http.d.ts +134 -0
  160. package/dist/transports/http.d.ts.map +1 -0
  161. package/dist/transports/http.js +516 -0
  162. package/dist/transports/http.js.map +1 -0
  163. package/dist/transports/index.d.ts +5 -0
  164. package/dist/transports/index.d.ts.map +1 -0
  165. package/dist/transports/index.js +5 -0
  166. package/dist/transports/index.js.map +1 -0
  167. package/dist/types/index.d.ts +380 -0
  168. package/dist/types/index.d.ts.map +1 -0
  169. package/dist/types/index.js +68 -0
  170. package/dist/types/index.js.map +1 -0
  171. package/dist/utils/annotations.d.ts +44 -0
  172. package/dist/utils/annotations.d.ts.map +1 -0
  173. package/dist/utils/annotations.js +77 -0
  174. package/dist/utils/annotations.js.map +1 -0
  175. package/dist/utils/errors.d.ts +155 -0
  176. package/dist/utils/errors.d.ts.map +1 -0
  177. package/dist/utils/errors.js +329 -0
  178. package/dist/utils/errors.js.map +1 -0
  179. package/dist/utils/identifiers.d.ts +121 -0
  180. package/dist/utils/identifiers.d.ts.map +1 -0
  181. package/dist/utils/identifiers.js +319 -0
  182. package/dist/utils/identifiers.js.map +1 -0
  183. package/dist/utils/index.d.ts +7 -0
  184. package/dist/utils/index.d.ts.map +1 -0
  185. package/dist/utils/index.js +7 -0
  186. package/dist/utils/index.js.map +1 -0
  187. package/dist/utils/insightsManager.d.ts +39 -0
  188. package/dist/utils/insightsManager.d.ts.map +1 -0
  189. package/dist/utils/insightsManager.js +63 -0
  190. package/dist/utils/insightsManager.js.map +1 -0
  191. package/dist/utils/logger.d.ts +189 -0
  192. package/dist/utils/logger.d.ts.map +1 -0
  193. package/dist/utils/logger.js +394 -0
  194. package/dist/utils/logger.js.map +1 -0
  195. package/dist/utils/progress-utils.d.ts +54 -0
  196. package/dist/utils/progress-utils.d.ts.map +1 -0
  197. package/dist/utils/progress-utils.js +74 -0
  198. package/dist/utils/progress-utils.js.map +1 -0
  199. package/dist/utils/resourceAnnotations.d.ts +36 -0
  200. package/dist/utils/resourceAnnotations.d.ts.map +1 -0
  201. package/dist/utils/resourceAnnotations.js +57 -0
  202. package/dist/utils/resourceAnnotations.js.map +1 -0
  203. package/dist/utils/where-clause.d.ts +41 -0
  204. package/dist/utils/where-clause.d.ts.map +1 -0
  205. package/dist/utils/where-clause.js +116 -0
  206. package/dist/utils/where-clause.js.map +1 -0
  207. package/package.json +83 -0
  208. package/server.json +53 -0
@@ -0,0 +1,1219 @@
1
+ /**
2
+ * SQLite Statistics Tools
3
+ *
4
+ * Statistical analysis and aggregation functions:
5
+ * sum, avg, min, max, count, distinct, percentile, histogram, correlation,
6
+ * outlier detection, regression, hypothesis testing.
7
+ * 13 tools total.
8
+ */
9
+ import { z } from "zod";
10
+ import { readOnly } from "../../../utils/annotations.js";
11
+ import { validateWhereClause, sanitizeIdentifier, } from "../../../utils/index.js";
12
+ import { StatsBasicOutputSchema, StatsCountOutputSchema, StatsGroupByOutputSchema, StatsHistogramOutputSchema, StatsPercentileOutputSchema, StatsCorrelationOutputSchema, StatsTopNOutputSchema, StatsDistinctOutputSchema, StatsSummaryOutputSchema, StatsFrequencyOutputSchema, } from "../output-schemas.js";
13
+ // Stats schemas
14
+ const BasicStatsSchema = z.object({
15
+ table: z.string().describe("Table name"),
16
+ column: z.string().describe("Numeric column for statistics"),
17
+ whereClause: z.string().optional(),
18
+ });
19
+ const CountSchema = z.object({
20
+ table: z.string().describe("Table name"),
21
+ column: z.string().optional().describe("Column to count (default: *)"),
22
+ distinct: z.boolean().optional().default(false),
23
+ whereClause: z.string().optional(),
24
+ });
25
+ const GroupByStatsSchema = z.object({
26
+ table: z.string().describe("Table name"),
27
+ valueColumn: z.string().describe("Column for statistical value"),
28
+ groupByColumn: z.string().describe("Column to group by"),
29
+ stat: z
30
+ .enum(["sum", "avg", "min", "max", "count"])
31
+ .describe("Statistic type"),
32
+ whereClause: z.string().optional(),
33
+ orderBy: z.enum(["value", "group"]).optional().default("group"),
34
+ limit: z.number().optional().default(100),
35
+ });
36
+ const HistogramSchema = z.object({
37
+ table: z.string().describe("Table name"),
38
+ column: z.string().describe("Numeric column"),
39
+ buckets: z.number().optional().default(10).describe("Number of buckets"),
40
+ whereClause: z.string().optional(),
41
+ });
42
+ const PercentileSchema = z.object({
43
+ table: z.string().describe("Table name"),
44
+ column: z.string().describe("Numeric column"),
45
+ percentiles: z
46
+ .array(z.number().min(0).max(100))
47
+ .describe("Percentiles to compute"),
48
+ whereClause: z.string().optional(),
49
+ });
50
+ const CorrelationSchema = z.object({
51
+ table: z.string().describe("Table name"),
52
+ column1: z.string().describe("First numeric column"),
53
+ column2: z.string().describe("Second numeric column"),
54
+ whereClause: z.string().optional(),
55
+ });
56
+ const TopNSchema = z.object({
57
+ table: z.string().describe("Table name"),
58
+ column: z.string().describe("Column to rank"),
59
+ n: z.number().optional().default(10).describe("Number of top values"),
60
+ orderDirection: z.enum(["asc", "desc"]).optional().default("desc"),
61
+ whereClause: z.string().optional(),
62
+ selectColumns: z
63
+ .array(z.string())
64
+ .optional()
65
+ .describe("Columns to include in result (default: all columns)"),
66
+ });
67
+ const DistinctValuesSchema = z.object({
68
+ table: z.string().describe("Table name"),
69
+ column: z.string().describe("Column to get distinct values"),
70
+ limit: z.number().optional().default(100),
71
+ whereClause: z.string().optional(),
72
+ });
73
+ const SummaryStatsSchema = z.object({
74
+ table: z.string().describe("Table name"),
75
+ columns: z
76
+ .array(z.string())
77
+ .optional()
78
+ .describe("Columns to summarize (default: all numeric)"),
79
+ whereClause: z.string().optional(),
80
+ });
81
+ const FrequencySchema = z.object({
82
+ table: z.string().describe("Table name"),
83
+ column: z.string().describe("Column to count frequency"),
84
+ limit: z.number().optional().default(20),
85
+ whereClause: z.string().optional(),
86
+ });
87
+ // New statistical schemas
88
+ const OutlierSchema = z.object({
89
+ table: z.string().describe("Table name"),
90
+ column: z.string().describe("Numeric column to analyze"),
91
+ method: z.enum(["iqr", "zscore"]).optional().default("iqr"),
92
+ threshold: z
93
+ .number()
94
+ .optional()
95
+ .describe("IQR multiplier (default 1.5) or Z-score threshold (default 3)"),
96
+ whereClause: z.string().optional(),
97
+ limit: z.number().optional().default(100),
98
+ });
99
+ const RegressionSchema = z.object({
100
+ table: z.string().describe("Table name"),
101
+ xColumn: z.string().describe("Independent variable column"),
102
+ yColumn: z.string().describe("Dependent variable column"),
103
+ degree: z
104
+ .number()
105
+ .min(1)
106
+ .max(3)
107
+ .optional()
108
+ .default(1)
109
+ .describe("Polynomial degree (1=linear)"),
110
+ whereClause: z.string().optional(),
111
+ });
112
+ const HypothesisSchema = z.object({
113
+ table: z.string().describe("Table name"),
114
+ testType: z.enum(["ttest_one", "ttest_two", "chi_square"]),
115
+ column: z.string().describe("Primary column for analysis"),
116
+ column2: z
117
+ .string()
118
+ .optional()
119
+ .describe("Second column for two-sample t-test"),
120
+ groupColumn: z.string().optional().describe("Group column for chi-square"),
121
+ expectedMean: z
122
+ .number()
123
+ .optional()
124
+ .describe("Expected mean for one-sample t-test"),
125
+ whereClause: z.string().optional(),
126
+ });
127
+ /**
128
+ * Get all statistics tools
129
+ */
130
+ export function getStatsTools(adapter) {
131
+ return [
132
+ createBasicStatsTool(adapter),
133
+ createCountTool(adapter),
134
+ createGroupByStatsTool(adapter),
135
+ createHistogramTool(adapter),
136
+ createPercentileTool(adapter),
137
+ createCorrelationTool(adapter),
138
+ createTopNTool(adapter),
139
+ createDistinctValuesTool(adapter),
140
+ createSummaryStatsTool(adapter),
141
+ createFrequencyTool(adapter),
142
+ // New statistical tools
143
+ createOutlierTool(adapter),
144
+ createRegressionTool(adapter),
145
+ createHypothesisTool(adapter),
146
+ ];
147
+ }
148
+ /**
149
+ * Basic statistics (sum, avg, min, max, stdev)
150
+ */
151
+ function createBasicStatsTool(adapter) {
152
+ return {
153
+ name: "sqlite_stats_basic",
154
+ description: "Get basic statistics (count, sum, avg, min, max) for a numeric column.",
155
+ group: "stats",
156
+ inputSchema: BasicStatsSchema,
157
+ outputSchema: StatsBasicOutputSchema,
158
+ requiredScopes: ["read"],
159
+ annotations: readOnly("Basic Statistics"),
160
+ handler: async (params, _context) => {
161
+ const input = BasicStatsSchema.parse(params);
162
+ // Validate and quote identifiers
163
+ const table = sanitizeIdentifier(input.table);
164
+ const column = sanitizeIdentifier(input.column);
165
+ let sql = `SELECT
166
+ COUNT(${column}) as count,
167
+ SUM(${column}) as sum,
168
+ AVG(${column}) as avg,
169
+ MIN(${column}) as min,
170
+ MAX(${column}) as max,
171
+ MAX(${column}) - MIN(${column}) as range
172
+ FROM ${table}`;
173
+ if (input.whereClause) {
174
+ validateWhereClause(input.whereClause);
175
+ sql += ` WHERE ${input.whereClause}`;
176
+ }
177
+ const result = await adapter.executeReadQuery(sql);
178
+ const row = result.rows?.[0];
179
+ // Helper to safely convert to number or null
180
+ const toNumberOrNull = (val) => {
181
+ if (val === null || val === undefined)
182
+ return null;
183
+ if (typeof val === "number")
184
+ return val;
185
+ const num = Number(val);
186
+ return Number.isNaN(num) ? null : num;
187
+ };
188
+ return {
189
+ success: true,
190
+ column: input.column,
191
+ stats: {
192
+ count: Number(row?.["count"] ?? 0),
193
+ sum: toNumberOrNull(row?.["sum"]),
194
+ avg: toNumberOrNull(row?.["avg"]),
195
+ min: toNumberOrNull(row?.["min"]),
196
+ max: toNumberOrNull(row?.["max"]),
197
+ range: toNumberOrNull(row?.["range"]),
198
+ },
199
+ };
200
+ },
201
+ };
202
+ }
203
+ /**
204
+ * Count rows
205
+ */
206
+ function createCountTool(adapter) {
207
+ return {
208
+ name: "sqlite_stats_count",
209
+ description: "Count rows, optionally distinct values in a column.",
210
+ group: "stats",
211
+ inputSchema: CountSchema,
212
+ outputSchema: StatsCountOutputSchema,
213
+ requiredScopes: ["read"],
214
+ annotations: readOnly("Count Rows"),
215
+ handler: async (params, _context) => {
216
+ const input = CountSchema.parse(params);
217
+ // Validate and quote table name
218
+ const table = sanitizeIdentifier(input.table);
219
+ let countExpr;
220
+ if (input.column) {
221
+ const column = sanitizeIdentifier(input.column);
222
+ countExpr = input.distinct
223
+ ? `COUNT(DISTINCT ${column})`
224
+ : `COUNT(${column})`;
225
+ }
226
+ else {
227
+ countExpr = "COUNT(*)";
228
+ }
229
+ let sql = `SELECT ${countExpr} as count FROM ${table}`;
230
+ if (input.whereClause) {
231
+ validateWhereClause(input.whereClause);
232
+ sql += ` WHERE ${input.whereClause}`;
233
+ }
234
+ const result = await adapter.executeReadQuery(sql);
235
+ return {
236
+ success: true,
237
+ count: result.rows?.[0]?.["count"] ?? 0,
238
+ distinct: input.distinct,
239
+ };
240
+ },
241
+ };
242
+ }
243
+ /**
244
+ * Group by with aggregation
245
+ */
246
+ function createGroupByStatsTool(adapter) {
247
+ return {
248
+ name: "sqlite_stats_group_by",
249
+ description: "Aggregate statistics grouped by a column.",
250
+ group: "stats",
251
+ inputSchema: GroupByStatsSchema,
252
+ outputSchema: StatsGroupByOutputSchema,
253
+ requiredScopes: ["read"],
254
+ annotations: readOnly("Group By Stats"),
255
+ handler: async (params, _context) => {
256
+ const input = GroupByStatsSchema.parse(params);
257
+ // Validate and quote identifiers
258
+ const table = sanitizeIdentifier(input.table);
259
+ const valueColumn = sanitizeIdentifier(input.valueColumn);
260
+ const groupByColumn = sanitizeIdentifier(input.groupByColumn);
261
+ // Validate that columns exist to prevent SQLite from treating non-existent columns as string literals
262
+ const tableInfo = await adapter.describeTable(input.table);
263
+ const columnNames = new Set((tableInfo.columns ?? []).map((c) => c.name.toLowerCase()));
264
+ if (!columnNames.has(input.valueColumn.toLowerCase())) {
265
+ throw new Error(`Column "${input.valueColumn}" not found in table "${input.table}"`);
266
+ }
267
+ if (!columnNames.has(input.groupByColumn.toLowerCase())) {
268
+ throw new Error(`Column "${input.groupByColumn}" not found in table "${input.table}"`);
269
+ }
270
+ const statFunc = input.stat.toUpperCase();
271
+ const orderCol = input.orderBy === "value" ? "stat_value" : groupByColumn;
272
+ let sql = `SELECT ${groupByColumn}, ${statFunc}(${valueColumn}) as stat_value
273
+ FROM ${table}`;
274
+ if (input.whereClause) {
275
+ validateWhereClause(input.whereClause);
276
+ sql += ` WHERE ${input.whereClause}`;
277
+ }
278
+ sql += ` GROUP BY ${groupByColumn} ORDER BY ${orderCol} DESC LIMIT ${input.limit}`;
279
+ const result = await adapter.executeReadQuery(sql);
280
+ return {
281
+ success: true,
282
+ statistic: input.stat,
283
+ rowCount: result.rows?.length ?? 0,
284
+ results: result.rows,
285
+ };
286
+ },
287
+ };
288
+ }
289
+ /**
290
+ * Histogram
291
+ */
292
+ function createHistogramTool(adapter) {
293
+ return {
294
+ name: "sqlite_stats_histogram",
295
+ description: "Create a histogram with specified number of buckets.",
296
+ group: "stats",
297
+ inputSchema: HistogramSchema,
298
+ outputSchema: StatsHistogramOutputSchema,
299
+ requiredScopes: ["read"],
300
+ annotations: readOnly("Histogram"),
301
+ handler: async (params, _context) => {
302
+ const input = HistogramSchema.parse(params);
303
+ // Validate and quote identifiers
304
+ const table = sanitizeIdentifier(input.table);
305
+ const column = sanitizeIdentifier(input.column);
306
+ // First get min/max
307
+ let minMaxSql = `SELECT MIN(${column}) as min_val, MAX(${column}) as max_val FROM ${table}`;
308
+ if (input.whereClause) {
309
+ validateWhereClause(input.whereClause);
310
+ minMaxSql += ` WHERE ${input.whereClause}`;
311
+ }
312
+ const minMaxResult = await adapter.executeReadQuery(minMaxSql);
313
+ const minVal = minMaxResult.rows?.[0]?.["min_val"] ?? 0;
314
+ const maxVal = minMaxResult.rows?.[0]?.["max_val"] ?? 0;
315
+ const range = maxVal - minVal;
316
+ const bucketSize = range / input.buckets;
317
+ if (bucketSize === 0) {
318
+ return {
319
+ success: true,
320
+ buckets: [{ min: minVal, max: maxVal, count: 1 }],
321
+ };
322
+ }
323
+ // Build histogram using CASE expressions
324
+ // Final bucket uses <= to include the max value
325
+ const bucketCases = [];
326
+ for (let i = 0; i < input.buckets; i++) {
327
+ const bucketMin = minVal + i * bucketSize;
328
+ const bucketMax = minVal + (i + 1) * bucketSize;
329
+ const upperOp = i === input.buckets - 1 ? "<=" : "<";
330
+ bucketCases.push(`SUM(CASE WHEN ${column} >= ${bucketMin} AND ${column} ${upperOp} ${bucketMax} THEN 1 ELSE 0 END) as bucket_${i}`);
331
+ }
332
+ let sql = `SELECT ${bucketCases.join(", ")} FROM ${table}`;
333
+ if (input.whereClause) {
334
+ validateWhereClause(input.whereClause);
335
+ sql += ` WHERE ${input.whereClause}`;
336
+ }
337
+ const result = await adapter.executeReadQuery(sql);
338
+ // Format buckets
339
+ const buckets = [];
340
+ for (let i = 0; i < input.buckets; i++) {
341
+ const bucketMin = minVal + i * bucketSize;
342
+ const bucketMax = minVal + (i + 1) * bucketSize;
343
+ buckets.push({
344
+ bucket: i,
345
+ min: bucketMin,
346
+ max: bucketMax,
347
+ count: result.rows?.[0]?.[`bucket_${i}`] ?? 0,
348
+ });
349
+ }
350
+ return {
351
+ success: true,
352
+ column: input.column,
353
+ range: { min: minVal, max: maxVal },
354
+ bucketSize,
355
+ buckets,
356
+ };
357
+ },
358
+ };
359
+ }
360
+ /**
361
+ * Percentiles
362
+ */
363
+ function createPercentileTool(adapter) {
364
+ return {
365
+ name: "sqlite_stats_percentile",
366
+ description: "Calculate percentiles (median, quartiles, etc.) for a column.",
367
+ group: "stats",
368
+ inputSchema: PercentileSchema,
369
+ outputSchema: StatsPercentileOutputSchema,
370
+ requiredScopes: ["read"],
371
+ annotations: readOnly("Percentile"),
372
+ handler: async (params, _context) => {
373
+ const input = PercentileSchema.parse(params);
374
+ // Validate and quote identifiers
375
+ const table = sanitizeIdentifier(input.table);
376
+ const column = sanitizeIdentifier(input.column);
377
+ let sql = `SELECT ${column} as value FROM ${table} WHERE ${column} IS NOT NULL`;
378
+ if (input.whereClause) {
379
+ validateWhereClause(input.whereClause);
380
+ sql += ` AND ${input.whereClause}`;
381
+ }
382
+ sql += ` ORDER BY ${column}`;
383
+ const result = await adapter.executeReadQuery(sql);
384
+ const values = (result.rows ?? []).map((r) => r["value"]);
385
+ if (values.length === 0) {
386
+ return {
387
+ success: true,
388
+ percentiles: input.percentiles.map((p) => ({
389
+ percentile: p,
390
+ value: null,
391
+ })),
392
+ };
393
+ }
394
+ // Calculate percentiles
395
+ const percentiles = input.percentiles.map((p) => {
396
+ const index = Math.ceil((p / 100) * values.length) - 1;
397
+ const safeIndex = Math.max(0, Math.min(index, values.length - 1));
398
+ return {
399
+ percentile: p,
400
+ value: values[safeIndex],
401
+ };
402
+ });
403
+ return {
404
+ success: true,
405
+ column: input.column,
406
+ count: values.length,
407
+ percentiles,
408
+ };
409
+ },
410
+ };
411
+ }
412
+ /**
413
+ * Correlation between two columns
414
+ */
415
+ function createCorrelationTool(adapter) {
416
+ return {
417
+ name: "sqlite_stats_correlation",
418
+ description: "Calculate Pearson correlation coefficient between two numeric columns.",
419
+ group: "stats",
420
+ inputSchema: CorrelationSchema,
421
+ outputSchema: StatsCorrelationOutputSchema,
422
+ requiredScopes: ["read"],
423
+ annotations: readOnly("Correlation"),
424
+ handler: async (params, _context) => {
425
+ const input = CorrelationSchema.parse(params);
426
+ // Validate and quote identifiers
427
+ const table = sanitizeIdentifier(input.table);
428
+ const col1 = sanitizeIdentifier(input.column1);
429
+ const col2 = sanitizeIdentifier(input.column2);
430
+ // Get paired values
431
+ let sql = `SELECT ${col1} as x, ${col2} as y
432
+ FROM ${table}
433
+ WHERE ${col1} IS NOT NULL AND ${col2} IS NOT NULL`;
434
+ if (input.whereClause) {
435
+ validateWhereClause(input.whereClause);
436
+ sql += ` AND ${input.whereClause}`;
437
+ }
438
+ const result = await adapter.executeReadQuery(sql);
439
+ const pairs = (result.rows ?? []).map((r) => ({
440
+ x: r["x"],
441
+ y: r["y"],
442
+ }));
443
+ if (pairs.length < 2) {
444
+ return {
445
+ success: true,
446
+ correlation: null,
447
+ message: "Need at least 2 data points",
448
+ };
449
+ }
450
+ // Calculate correlation in JS
451
+ const n = pairs.length;
452
+ const sumX = pairs.reduce((s, p) => s + p.x, 0);
453
+ const sumY = pairs.reduce((s, p) => s + p.y, 0);
454
+ const sumXY = pairs.reduce((s, p) => s + p.x * p.y, 0);
455
+ const sumX2 = pairs.reduce((s, p) => s + p.x * p.x, 0);
456
+ const sumY2 = pairs.reduce((s, p) => s + p.y * p.y, 0);
457
+ const numerator = n * sumXY - sumX * sumY;
458
+ const denominator = Math.sqrt((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY));
459
+ const correlation = denominator === 0 ? 0 : numerator / denominator;
460
+ // Handle NaN case (e.g., all values are the same or data issues)
461
+ const roundedCorrelation = Number.isNaN(correlation)
462
+ ? null
463
+ : Math.round(correlation * 10000) / 10000;
464
+ return {
465
+ success: true,
466
+ column1: input.column1,
467
+ column2: input.column2,
468
+ n: pairs.length,
469
+ correlation: roundedCorrelation,
470
+ };
471
+ },
472
+ };
473
+ }
474
+ /**
475
+ * Top N values
476
+ */
477
+ function createTopNTool(adapter) {
478
+ return {
479
+ name: "sqlite_stats_top_n",
480
+ description: "Get top N values from a column.",
481
+ group: "stats",
482
+ inputSchema: TopNSchema,
483
+ outputSchema: StatsTopNOutputSchema,
484
+ requiredScopes: ["read"],
485
+ annotations: readOnly("Top N Values"),
486
+ handler: async (params, _context) => {
487
+ const input = TopNSchema.parse(params);
488
+ // Validate and quote identifiers
489
+ const table = sanitizeIdentifier(input.table);
490
+ const column = sanitizeIdentifier(input.column);
491
+ const order = input.orderDirection.toUpperCase();
492
+ // Build column list - use specified columns or default to all
493
+ let columnList = "*";
494
+ if (input.selectColumns && input.selectColumns.length > 0) {
495
+ columnList = input.selectColumns
496
+ .map((col) => sanitizeIdentifier(col))
497
+ .join(", ");
498
+ }
499
+ let sql = `SELECT ${columnList} FROM ${table}`;
500
+ if (input.whereClause) {
501
+ validateWhereClause(input.whereClause);
502
+ sql += ` WHERE ${input.whereClause}`;
503
+ }
504
+ sql += ` ORDER BY ${column} ${order} LIMIT ${input.n}`;
505
+ const result = await adapter.executeReadQuery(sql);
506
+ return {
507
+ success: true,
508
+ column: input.column,
509
+ direction: input.orderDirection,
510
+ count: result.rows?.length ?? 0,
511
+ rows: result.rows,
512
+ };
513
+ },
514
+ };
515
+ }
516
+ /**
517
+ * Distinct values
518
+ */
519
+ function createDistinctValuesTool(adapter) {
520
+ return {
521
+ name: "sqlite_stats_distinct",
522
+ description: "Get distinct values from a column.",
523
+ group: "stats",
524
+ inputSchema: DistinctValuesSchema,
525
+ outputSchema: StatsDistinctOutputSchema,
526
+ requiredScopes: ["read"],
527
+ annotations: readOnly("Distinct Values"),
528
+ handler: async (params, _context) => {
529
+ const input = DistinctValuesSchema.parse(params);
530
+ // Validate and quote identifiers
531
+ const table = sanitizeIdentifier(input.table);
532
+ const column = sanitizeIdentifier(input.column);
533
+ let sql = `SELECT DISTINCT ${column} as value FROM ${table}`;
534
+ if (input.whereClause) {
535
+ validateWhereClause(input.whereClause);
536
+ sql += ` WHERE ${input.whereClause}`;
537
+ }
538
+ sql += ` LIMIT ${input.limit}`;
539
+ const result = await adapter.executeReadQuery(sql);
540
+ return {
541
+ success: true,
542
+ column: input.column,
543
+ distinctCount: result.rows?.length ?? 0,
544
+ values: result.rows?.map((r) => r["value"]),
545
+ };
546
+ },
547
+ };
548
+ }
549
+ /**
550
+ * Summary statistics for all numeric columns
551
+ */
552
+ function createSummaryStatsTool(adapter) {
553
+ // Numeric SQLite column types
554
+ const numericTypes = new Set([
555
+ "integer",
556
+ "int",
557
+ "real",
558
+ "float",
559
+ "double",
560
+ "numeric",
561
+ "decimal",
562
+ "number",
563
+ "smallint",
564
+ "bigint",
565
+ "tinyint",
566
+ "mediumint",
567
+ ]);
568
+ return {
569
+ name: "sqlite_stats_summary",
570
+ description: "Get summary statistics for multiple columns at once.",
571
+ group: "stats",
572
+ inputSchema: SummaryStatsSchema,
573
+ outputSchema: StatsSummaryOutputSchema,
574
+ requiredScopes: ["read"],
575
+ annotations: readOnly("Summary Stats"),
576
+ handler: async (params, _context) => {
577
+ const input = SummaryStatsSchema.parse(params);
578
+ // Validate table name
579
+ const table = sanitizeIdentifier(input.table);
580
+ // Get table info to find columns
581
+ const tableInfo = await adapter.describeTable(input.table);
582
+ // Filter to requested columns or auto-detect numeric columns
583
+ let columns = [];
584
+ if (input.columns && input.columns.length > 0) {
585
+ // User-specified columns - validate them
586
+ columns = input.columns.map((col) => {
587
+ sanitizeIdentifier(col); // Validate
588
+ return col;
589
+ });
590
+ }
591
+ else {
592
+ // Auto-detect: only include numeric columns
593
+ columns = (tableInfo.columns ?? [])
594
+ .filter((c) => {
595
+ const typeLower = (c.type ?? "").toLowerCase();
596
+ // Check if type starts with a known numeric type
597
+ return [...numericTypes].some((nt) => typeLower === nt || typeLower.startsWith(nt));
598
+ })
599
+ .map((c) => c.name);
600
+ }
601
+ if (columns.length === 0) {
602
+ return {
603
+ success: true,
604
+ table: input.table,
605
+ summaries: [],
606
+ };
607
+ }
608
+ // Build summary query for each column
609
+ const summaries = [];
610
+ for (const col of columns) {
611
+ const quotedCol = sanitizeIdentifier(col);
612
+ let sql = `SELECT
613
+ COUNT(${quotedCol}) as count,
614
+ AVG(${quotedCol}) as avg,
615
+ MIN(${quotedCol}) as min,
616
+ MAX(${quotedCol}) as max
617
+ FROM ${table}`;
618
+ if (input.whereClause) {
619
+ validateWhereClause(input.whereClause);
620
+ sql += ` WHERE ${input.whereClause}`;
621
+ }
622
+ try {
623
+ const result = await adapter.executeReadQuery(sql);
624
+ const row = result.rows?.[0];
625
+ // Ensure numeric types - convert strings to numbers if needed
626
+ const count = Number(row?.["count"] ?? 0);
627
+ const avg = row?.["avg"];
628
+ const min = row?.["min"];
629
+ const max = row?.["max"];
630
+ summaries.push({
631
+ column: col,
632
+ count,
633
+ avg: typeof avg === "number"
634
+ ? avg
635
+ : avg === null
636
+ ? null
637
+ : Number(avg) || null,
638
+ min: typeof min === "number"
639
+ ? min
640
+ : min === null
641
+ ? null
642
+ : Number(min) || null,
643
+ max: typeof max === "number"
644
+ ? max
645
+ : max === null
646
+ ? null
647
+ : Number(max) || null,
648
+ });
649
+ }
650
+ catch {
651
+ // Column may not be numeric, skip
652
+ summaries.push({ column: col, error: "Not numeric" });
653
+ }
654
+ }
655
+ return {
656
+ success: true,
657
+ table: input.table,
658
+ summaries,
659
+ };
660
+ },
661
+ };
662
+ }
663
+ /**
664
+ * Value frequency distribution
665
+ */
666
+ function createFrequencyTool(adapter) {
667
+ return {
668
+ name: "sqlite_stats_frequency",
669
+ description: "Get frequency distribution of values in a column.",
670
+ group: "stats",
671
+ inputSchema: FrequencySchema,
672
+ outputSchema: StatsFrequencyOutputSchema,
673
+ requiredScopes: ["read"],
674
+ annotations: readOnly("Frequency"),
675
+ handler: async (params, _context) => {
676
+ const input = FrequencySchema.parse(params);
677
+ // Validate and quote identifiers
678
+ const table = sanitizeIdentifier(input.table);
679
+ const column = sanitizeIdentifier(input.column);
680
+ let sql = `SELECT ${column} as value, COUNT(*) as frequency
681
+ FROM ${table}`;
682
+ if (input.whereClause) {
683
+ validateWhereClause(input.whereClause);
684
+ sql += ` WHERE ${input.whereClause}`;
685
+ }
686
+ sql += ` GROUP BY ${column} ORDER BY frequency DESC LIMIT ${input.limit}`;
687
+ const result = await adapter.executeReadQuery(sql);
688
+ return {
689
+ success: true,
690
+ column: input.column,
691
+ distinctValues: result.rows?.length ?? 0,
692
+ distribution: result.rows,
693
+ };
694
+ },
695
+ };
696
+ }
697
+ // =============================================================================
698
+ // New Statistical Tools
699
+ // =============================================================================
700
+ /**
701
+ * Approximate normal CDF for p-value calculation
702
+ */
703
+ function normalCDF(x) {
704
+ const a1 = 0.254829592;
705
+ const a2 = -0.284496736;
706
+ const a3 = 1.421413741;
707
+ const a4 = -1.453152027;
708
+ const a5 = 1.061405429;
709
+ const p = 0.3275911;
710
+ const sign = x < 0 ? -1 : 1;
711
+ x = Math.abs(x) / Math.sqrt(2);
712
+ const t = 1.0 / (1.0 + p * x);
713
+ const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-x * x);
714
+ return 0.5 * (1.0 + sign * y);
715
+ }
716
+ /**
717
+ * Approximate t-distribution p-value (two-tailed)
718
+ */
719
+ function tDistPValue(t, df) {
720
+ // Use normal approximation for large df
721
+ if (df > 30) {
722
+ return 2 * (1 - normalCDF(Math.abs(t)));
723
+ }
724
+ // Simplified approximation for smaller df
725
+ const x = df / (df + t * t);
726
+ // Beta incomplete function approximation
727
+ const p = Math.pow(x, df / 2) * 0.5;
728
+ return Math.min(1, Math.max(0, 2 * p));
729
+ }
730
+ /**
731
+ * Outlier detection using IQR or Z-score
732
+ */
733
+ function createOutlierTool(adapter) {
734
+ return {
735
+ name: "sqlite_stats_outliers",
736
+ description: "Detect outliers using IQR (Interquartile Range) or Z-score method.",
737
+ group: "stats",
738
+ inputSchema: OutlierSchema,
739
+ outputSchema: z.object({
740
+ success: z.boolean(),
741
+ method: z.string(),
742
+ stats: z.object({
743
+ mean: z.number().optional(),
744
+ stdDev: z.number().optional(),
745
+ q1: z.number().optional(),
746
+ q3: z.number().optional(),
747
+ iqr: z.number().optional(),
748
+ lowerBound: z.number(),
749
+ upperBound: z.number(),
750
+ }),
751
+ outlierCount: z.number(),
752
+ totalRows: z.number(),
753
+ outliers: z.array(z.object({
754
+ value: z.number(),
755
+ rowid: z.number().optional(),
756
+ })),
757
+ }),
758
+ requiredScopes: ["read"],
759
+ annotations: readOnly("Outlier Detection"),
760
+ handler: async (params, _context) => {
761
+ const input = OutlierSchema.parse(params);
762
+ // Validate identifiers
763
+ sanitizeIdentifier(input.table);
764
+ sanitizeIdentifier(input.column);
765
+ // Security: Validate WHERE clause if provided
766
+ if (input.whereClause) {
767
+ validateWhereClause(input.whereClause);
768
+ }
769
+ const whereClause = input.whereClause ? ` AND ${input.whereClause}` : "";
770
+ if (input.method === "zscore") {
771
+ const threshold = input.threshold ?? 3;
772
+ // Get mean and stddev
773
+ const statsResult = await adapter.executeReadQuery(`SELECT AVG("${input.column}") as mean,
774
+ (SUM(("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause})) *
775
+ ("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}))) /
776
+ (COUNT(*) - 1)) as variance,
777
+ COUNT(*) as total
778
+ FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}`);
779
+ const mean = Number(statsResult.rows?.[0]?.["mean"] ?? 0);
780
+ const variance = Number(statsResult.rows?.[0]?.["variance"] ?? 0);
781
+ const stdDev = Math.sqrt(variance);
782
+ const total = Number(statsResult.rows?.[0]?.["total"] ?? 0);
783
+ const lowerBound = mean - threshold * stdDev;
784
+ const upperBound = mean + threshold * stdDev;
785
+ // Find outliers
786
+ const outlierResult = await adapter.executeReadQuery(`SELECT rowid, "${input.column}" as value FROM "${input.table}"
787
+ WHERE "${input.column}" IS NOT NULL${whereClause}
788
+ AND ("${input.column}" < ${lowerBound} OR "${input.column}" > ${upperBound})
789
+ LIMIT ${input.limit}`);
790
+ const outliers = (outlierResult.rows ?? []).map((row) => {
791
+ const rowid = row["rowid"];
792
+ return {
793
+ value: Number(row["value"]),
794
+ ...(typeof rowid === "number" ? { rowid } : {}),
795
+ };
796
+ });
797
+ return {
798
+ success: true,
799
+ method: "zscore",
800
+ stats: { mean, stdDev, lowerBound, upperBound },
801
+ outlierCount: outliers.length,
802
+ totalRows: total,
803
+ outliers,
804
+ };
805
+ }
806
+ else {
807
+ // IQR method
808
+ const multiplier = input.threshold ?? 1.5;
809
+ // Get sorted values for percentile calculation
810
+ const allResult = await adapter.executeReadQuery(`SELECT "${input.column}" as value FROM "${input.table}"
811
+ WHERE "${input.column}" IS NOT NULL${whereClause}
812
+ ORDER BY "${input.column}"`);
813
+ const values = (allResult.rows ?? []).map((r) => Number(r["value"]));
814
+ const n = values.length;
815
+ if (n === 0) {
816
+ return {
817
+ success: true,
818
+ method: "iqr",
819
+ stats: { q1: 0, q3: 0, iqr: 0, lowerBound: 0, upperBound: 0 },
820
+ outlierCount: 0,
821
+ totalRows: 0,
822
+ outliers: [],
823
+ };
824
+ }
825
+ const q1Idx = Math.floor(n * 0.25);
826
+ const q3Idx = Math.floor(n * 0.75);
827
+ const q1 = values[q1Idx] ?? 0;
828
+ const q3 = values[q3Idx] ?? 0;
829
+ const iqr = q3 - q1;
830
+ const lowerBound = q1 - multiplier * iqr;
831
+ const upperBound = q3 + multiplier * iqr;
832
+ // Find outliers
833
+ const outlierResult = await adapter.executeReadQuery(`SELECT rowid, "${input.column}" as value FROM "${input.table}"
834
+ WHERE "${input.column}" IS NOT NULL${whereClause}
835
+ AND ("${input.column}" < ${lowerBound} OR "${input.column}" > ${upperBound})
836
+ LIMIT ${input.limit}`);
837
+ const outliers = (outlierResult.rows ?? []).map((row) => {
838
+ const rowid = row["rowid"];
839
+ return {
840
+ value: Number(row["value"]),
841
+ ...(typeof rowid === "number" ? { rowid } : {}),
842
+ };
843
+ });
844
+ return {
845
+ success: true,
846
+ method: "iqr",
847
+ stats: { q1, q3, iqr, lowerBound, upperBound },
848
+ outlierCount: outliers.length,
849
+ totalRows: n,
850
+ outliers,
851
+ };
852
+ }
853
+ },
854
+ };
855
+ }
856
+ /**
857
+ * Linear/polynomial regression analysis
858
+ */
859
+ // Matrix utility functions for polynomial regression
860
+ function matrixTranspose(A) {
861
+ const rows = A.length;
862
+ const cols = A[0]?.length ?? 0;
863
+ const result = [];
864
+ for (let j = 0; j < cols; j++) {
865
+ const row = [];
866
+ for (let i = 0; i < rows; i++) {
867
+ row.push(A[i]?.[j] ?? 0);
868
+ }
869
+ result.push(row);
870
+ }
871
+ return result;
872
+ }
873
+ function matrixMultiply(A, B) {
874
+ const rowsA = A.length;
875
+ const colsA = A[0]?.length ?? 0;
876
+ const colsB = B[0]?.length ?? 0;
877
+ const result = [];
878
+ for (let i = 0; i < rowsA; i++) {
879
+ const row = [];
880
+ for (let j = 0; j < colsB; j++) {
881
+ let sum = 0;
882
+ for (let k = 0; k < colsA; k++) {
883
+ sum += (A[i]?.[k] ?? 0) * (B[k]?.[j] ?? 0);
884
+ }
885
+ row.push(sum);
886
+ }
887
+ result.push(row);
888
+ }
889
+ return result;
890
+ }
891
+ function matrixInverse(A) {
892
+ const n = A.length;
893
+ // Create augmented matrix [A|I]
894
+ const aug = A.map((row, i) => [
895
+ ...row,
896
+ ...Array.from({ length: n }, (_, j) => (i === j ? 1 : 0)),
897
+ ]);
898
+ // Gauss-Jordan elimination
899
+ for (let col = 0; col < n; col++) {
900
+ // Find pivot
901
+ let maxRow = col;
902
+ for (let row = col + 1; row < n; row++) {
903
+ const currentVal = Math.abs(aug[row]?.[col] ?? 0);
904
+ const maxVal = Math.abs(aug[maxRow]?.[col] ?? 0);
905
+ if (currentVal > maxVal) {
906
+ maxRow = row;
907
+ }
908
+ }
909
+ // Swap rows
910
+ const temp = aug[col];
911
+ const swapRow = aug[maxRow];
912
+ if (temp && swapRow) {
913
+ aug[col] = swapRow;
914
+ aug[maxRow] = temp;
915
+ }
916
+ const pivotRow = aug[col];
917
+ if (!pivotRow)
918
+ continue;
919
+ const pivot = pivotRow[col] ?? 0;
920
+ if (Math.abs(pivot) < 1e-10) {
921
+ throw new Error("Matrix is singular, cannot compute inverse");
922
+ }
923
+ // Scale pivot row
924
+ for (let j = 0; j < 2 * n; j++) {
925
+ pivotRow[j] = (pivotRow[j] ?? 0) / pivot;
926
+ }
927
+ // Eliminate column
928
+ for (let row = 0; row < n; row++) {
929
+ if (row !== col) {
930
+ const currentRow = aug[row];
931
+ if (!currentRow)
932
+ continue;
933
+ const factor = currentRow[col] ?? 0;
934
+ for (let j = 0; j < 2 * n; j++) {
935
+ currentRow[j] = (currentRow[j] ?? 0) - factor * (pivotRow[j] ?? 0);
936
+ }
937
+ }
938
+ }
939
+ }
940
+ // Extract inverse from augmented matrix
941
+ return aug.map((row) => row.slice(n));
942
+ }
943
+ function createRegressionTool(adapter) {
944
+ return {
945
+ name: "sqlite_stats_regression",
946
+ description: "Perform linear or polynomial regression analysis between two columns.",
947
+ group: "stats",
948
+ inputSchema: RegressionSchema,
949
+ outputSchema: z.object({
950
+ success: z.boolean(),
951
+ type: z.string(),
952
+ sampleSize: z.number(),
953
+ coefficients: z.object({
954
+ intercept: z.number(),
955
+ linear: z.number().optional(),
956
+ quadratic: z.number().optional(),
957
+ cubic: z.number().optional(),
958
+ }),
959
+ rSquared: z.number(),
960
+ equation: z.string(),
961
+ }),
962
+ requiredScopes: ["read"],
963
+ annotations: readOnly("Regression Analysis"),
964
+ handler: async (params, _context) => {
965
+ const input = RegressionSchema.parse(params);
966
+ // Validate identifiers
967
+ sanitizeIdentifier(input.table);
968
+ sanitizeIdentifier(input.xColumn);
969
+ sanitizeIdentifier(input.yColumn);
970
+ const andClause = input.whereClause ? ` AND ${input.whereClause}` : "";
971
+ const degree = input.degree ?? 1;
972
+ // Fetch data points
973
+ const sql = `
974
+ SELECT "${input.xColumn}" as x, "${input.yColumn}" as y
975
+ FROM "${input.table}"
976
+ WHERE "${input.xColumn}" IS NOT NULL AND "${input.yColumn}" IS NOT NULL${andClause}
977
+ `;
978
+ const result = await adapter.executeReadQuery(sql);
979
+ const pairs = (result.rows ?? []).map((r) => ({
980
+ x: Number(r["x"]),
981
+ y: Number(r["y"]),
982
+ }));
983
+ if (pairs.length < degree + 1) {
984
+ throw new Error(`Insufficient data for degree ${degree} regression (need at least ${degree + 1} points, got ${pairs.length})`);
985
+ }
986
+ // Build design matrix X = [[1, x, x², ...], ...]
987
+ const X = pairs.map((p) => Array.from({ length: degree + 1 }, (_, i) => Math.pow(p.x, i)));
988
+ const y = pairs.map((p) => [p.y]);
989
+ // Solve β = (XᵀX)⁻¹Xᵀy using normal equation
990
+ const Xt = matrixTranspose(X);
991
+ const XtX = matrixMultiply(Xt, X);
992
+ const XtXInv = matrixInverse(XtX);
993
+ const XtY = matrixMultiply(Xt, y);
994
+ const beta = matrixMultiply(XtXInv, XtY).map((r) => r[0] ?? 0);
995
+ // Calculate R² (coefficient of determination)
996
+ const meanY = pairs.reduce((s, p) => s + p.y, 0) / pairs.length;
997
+ let ssRes = 0; // Sum of squared residuals
998
+ let ssTot = 0; // Total sum of squares
999
+ for (const p of pairs) {
1000
+ // Predicted value: β₀ + β₁x + β₂x² + ...
1001
+ let predicted = 0;
1002
+ for (let i = 0; i <= degree; i++) {
1003
+ predicted += (beta[i] ?? 0) * Math.pow(p.x, i);
1004
+ }
1005
+ ssRes += Math.pow(p.y - predicted, 2);
1006
+ ssTot += Math.pow(p.y - meanY, 2);
1007
+ }
1008
+ const rSquared = ssTot === 0 ? 1 : 1 - ssRes / ssTot;
1009
+ // Build coefficients object
1010
+ const coefficients = {
1011
+ intercept: beta[0] ?? 0,
1012
+ };
1013
+ if (degree >= 1)
1014
+ coefficients.linear = beta[1] ?? 0;
1015
+ if (degree >= 2)
1016
+ coefficients.quadratic = beta[2] ?? 0;
1017
+ if (degree >= 3)
1018
+ coefficients.cubic = beta[3] ?? 0;
1019
+ // Build equation string
1020
+ const terms = [];
1021
+ if (degree >= 3 && beta[3] !== undefined) {
1022
+ terms.push(`${beta[3].toFixed(4)}x³`);
1023
+ }
1024
+ if (degree >= 2 && beta[2] !== undefined) {
1025
+ const sign = terms.length > 0 && beta[2] >= 0 ? " + " : "";
1026
+ terms.push(`${sign}${beta[2].toFixed(4)}x²`);
1027
+ }
1028
+ if (degree >= 1 && beta[1] !== undefined) {
1029
+ const sign = terms.length > 0 && beta[1] >= 0 ? " + " : "";
1030
+ terms.push(`${sign}${beta[1].toFixed(4)}x`);
1031
+ }
1032
+ const interceptSign = terms.length > 0 && (beta[0] ?? 0) >= 0 ? " + " : "";
1033
+ terms.push(`${interceptSign}${(beta[0] ?? 0).toFixed(4)}`);
1034
+ const equation = `y = ${terms.join("").replace(/^\s*\+\s*/, "")}`;
1035
+ return {
1036
+ success: true,
1037
+ type: degree === 1 ? "linear" : `polynomial_${degree}`,
1038
+ sampleSize: pairs.length,
1039
+ coefficients,
1040
+ rSquared: Math.round(rSquared * 10000) / 10000,
1041
+ equation,
1042
+ };
1043
+ },
1044
+ };
1045
+ }
1046
+ /**
1047
+ * Hypothesis testing (t-test, chi-square)
1048
+ */
1049
+ function createHypothesisTool(adapter) {
1050
+ return {
1051
+ name: "sqlite_stats_hypothesis",
1052
+ description: "Perform statistical hypothesis tests: one-sample t-test, two-sample t-test, or chi-square test.",
1053
+ group: "stats",
1054
+ inputSchema: HypothesisSchema,
1055
+ outputSchema: z.object({
1056
+ success: z.boolean(),
1057
+ testType: z.string(),
1058
+ statistic: z.number(),
1059
+ pValue: z.number(),
1060
+ degreesOfFreedom: z.number(),
1061
+ significant: z.boolean(),
1062
+ details: z.record(z.string(), z.unknown()),
1063
+ }),
1064
+ requiredScopes: ["read"],
1065
+ annotations: readOnly("Hypothesis Testing"),
1066
+ handler: async (params, _context) => {
1067
+ const input = HypothesisSchema.parse(params);
1068
+ // Validate identifiers
1069
+ sanitizeIdentifier(input.table);
1070
+ sanitizeIdentifier(input.column);
1071
+ const whereClause = input.whereClause ? ` AND ${input.whereClause}` : "";
1072
+ if (input.testType === "ttest_one") {
1073
+ const expectedMean = input.expectedMean ?? 0;
1074
+ const statsResult = await adapter.executeReadQuery(`SELECT COUNT(*) as n, AVG("${input.column}") as mean,
1075
+ SUM(("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause})) *
1076
+ ("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}))) /
1077
+ (COUNT(*) - 1) as variance
1078
+ FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}`);
1079
+ const n = Number(statsResult.rows?.[0]?.["n"] ?? 0);
1080
+ const mean = Number(statsResult.rows?.[0]?.["mean"] ?? 0);
1081
+ const variance = Number(statsResult.rows?.[0]?.["variance"] ?? 0);
1082
+ const stdDev = Math.sqrt(variance);
1083
+ const df = n - 1;
1084
+ if (n < 2) {
1085
+ throw new Error("Insufficient sample size for t-test");
1086
+ }
1087
+ const tStatistic = (mean - expectedMean) / (stdDev / Math.sqrt(n));
1088
+ // Validate result - Infinity or NaN indicates data issues (zero variance, non-numeric column, etc.)
1089
+ if (!Number.isFinite(tStatistic)) {
1090
+ throw new Error(`Cannot compute t-statistic: stdDev=${stdDev.toFixed(4)}, n=${n}. ` +
1091
+ `This may indicate zero variance, non-numeric data, or that column "${input.column}" does not exist.`);
1092
+ }
1093
+ const pValue = tDistPValue(tStatistic, df);
1094
+ return {
1095
+ success: true,
1096
+ testType: "ttest_one",
1097
+ statistic: tStatistic,
1098
+ pValue,
1099
+ degreesOfFreedom: df,
1100
+ significant: pValue < 0.05,
1101
+ details: {
1102
+ sampleMean: mean,
1103
+ sampleStdDev: stdDev,
1104
+ sampleSize: n,
1105
+ expectedMean,
1106
+ },
1107
+ };
1108
+ }
1109
+ else if (input.testType === "ttest_two") {
1110
+ if (!input.column2) {
1111
+ throw new Error("column2 is required for two-sample t-test");
1112
+ }
1113
+ sanitizeIdentifier(input.column2);
1114
+ // Get stats for both columns
1115
+ const statsResult = await adapter.executeReadQuery(`SELECT
1116
+ COUNT("${input.column}") as n1, AVG("${input.column}") as mean1,
1117
+ SUM(("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause})) *
1118
+ ("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}))) /
1119
+ (COUNT("${input.column}") - 1) as var1,
1120
+ COUNT("${input.column2}") as n2, AVG("${input.column2}") as mean2,
1121
+ SUM(("${input.column2}" - (SELECT AVG("${input.column2}") FROM "${input.table}" WHERE "${input.column2}" IS NOT NULL${whereClause})) *
1122
+ ("${input.column2}" - (SELECT AVG("${input.column2}") FROM "${input.table}" WHERE "${input.column2}" IS NOT NULL${whereClause}))) /
1123
+ (COUNT("${input.column2}") - 1) as var2
1124
+ FROM "${input.table}" WHERE 1=1${whereClause}`);
1125
+ const n1 = Number(statsResult.rows?.[0]?.["n1"] ?? 0);
1126
+ const n2 = Number(statsResult.rows?.[0]?.["n2"] ?? 0);
1127
+ const mean1 = Number(statsResult.rows?.[0]?.["mean1"] ?? 0);
1128
+ const mean2 = Number(statsResult.rows?.[0]?.["mean2"] ?? 0);
1129
+ const var1 = Number(statsResult.rows?.[0]?.["var1"] ?? 0);
1130
+ const var2 = Number(statsResult.rows?.[0]?.["var2"] ?? 0);
1131
+ if (n1 < 2 || n2 < 2) {
1132
+ throw new Error("Insufficient sample size for t-test");
1133
+ }
1134
+ // Welch's t-test
1135
+ const tStatistic = (mean1 - mean2) / Math.sqrt(var1 / n1 + var2 / n2);
1136
+ // Validate result - Infinity or NaN indicates data issues
1137
+ if (!Number.isFinite(tStatistic)) {
1138
+ throw new Error(`Cannot compute t-statistic: var1=${var1.toFixed(4)}, var2=${var2.toFixed(4)}. ` +
1139
+ `This may indicate zero variance or non-numeric data.`);
1140
+ }
1141
+ const dfNum = Math.pow(var1 / n1 + var2 / n2, 2);
1142
+ const dfDen = Math.pow(var1 / n1, 2) / (n1 - 1) + Math.pow(var2 / n2, 2) / (n2 - 1);
1143
+ const df = Number.isFinite(dfNum / dfDen) ? dfNum / dfDen : n1 + n2 - 2;
1144
+ const pValue = tDistPValue(tStatistic, df);
1145
+ return {
1146
+ success: true,
1147
+ testType: "ttest_two",
1148
+ statistic: tStatistic,
1149
+ pValue,
1150
+ degreesOfFreedom: df,
1151
+ significant: pValue < 0.05,
1152
+ details: {
1153
+ group1: { mean: mean1, variance: var1, n: n1 },
1154
+ group2: { mean: mean2, variance: var2, n: n2 },
1155
+ },
1156
+ };
1157
+ }
1158
+ else {
1159
+ // Chi-square test
1160
+ if (!input.groupColumn) {
1161
+ throw new Error("groupColumn is required for chi-square test");
1162
+ }
1163
+ if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(input.groupColumn)) {
1164
+ throw new Error("Invalid groupColumn name");
1165
+ }
1166
+ // Get contingency table
1167
+ const freqResult = await adapter.executeReadQuery(`SELECT "${input.column}" as col1, "${input.groupColumn}" as col2, COUNT(*) as observed
1168
+ FROM "${input.table}"
1169
+ WHERE "${input.column}" IS NOT NULL AND "${input.groupColumn}" IS NOT NULL${whereClause}
1170
+ GROUP BY "${input.column}", "${input.groupColumn}"`);
1171
+ // Calculate totals
1172
+ const rowTotals = new Map();
1173
+ const colTotals = new Map();
1174
+ let grandTotal = 0;
1175
+ for (const row of freqResult.rows ?? []) {
1176
+ const col1 = String(row["col1"]);
1177
+ const col2 = String(row["col2"]);
1178
+ const observed = Number(row["observed"]);
1179
+ rowTotals.set(col1, (rowTotals.get(col1) ?? 0) + observed);
1180
+ colTotals.set(col2, (colTotals.get(col2) ?? 0) + observed);
1181
+ grandTotal += observed;
1182
+ }
1183
+ // Calculate chi-square statistic
1184
+ let chiSquare = 0;
1185
+ for (const row of freqResult.rows ?? []) {
1186
+ const col1 = String(row["col1"]);
1187
+ const col2 = String(row["col2"]);
1188
+ const observed = Number(row["observed"]);
1189
+ const expected = ((rowTotals.get(col1) ?? 0) * (colTotals.get(col2) ?? 0)) /
1190
+ grandTotal;
1191
+ if (expected > 0) {
1192
+ chiSquare += Math.pow(observed - expected, 2) / expected;
1193
+ }
1194
+ }
1195
+ const df = (rowTotals.size - 1) * (colTotals.size - 1);
1196
+ // Validate sufficient categories for chi-square test
1197
+ if (df === 0) {
1198
+ throw new Error(`Insufficient categories for chi-square test: "${input.column}" has ${rowTotals.size} category(s), "${input.groupColumn}" has ${colTotals.size} category(s). Both columns must have at least 2 distinct values.`);
1199
+ }
1200
+ // Approximate p-value using chi-square distribution
1201
+ const pValue = Math.exp(-chiSquare / 2);
1202
+ return {
1203
+ success: true,
1204
+ testType: "chi_square",
1205
+ statistic: chiSquare,
1206
+ pValue,
1207
+ degreesOfFreedom: df,
1208
+ significant: pValue < 0.05,
1209
+ details: {
1210
+ grandTotal,
1211
+ rowCategories: rowTotals.size,
1212
+ colCategories: colTotals.size,
1213
+ },
1214
+ };
1215
+ }
1216
+ },
1217
+ };
1218
+ }
1219
+ //# sourceMappingURL=stats.js.map