db-mcp 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +860 -0
- package/dist/adapters/DatabaseAdapter.d.ts +141 -0
- package/dist/adapters/DatabaseAdapter.d.ts.map +1 -0
- package/dist/adapters/DatabaseAdapter.js +131 -0
- package/dist/adapters/DatabaseAdapter.js.map +1 -0
- package/dist/adapters/sqlite/SchemaManager.d.ts +58 -0
- package/dist/adapters/sqlite/SchemaManager.d.ts.map +1 -0
- package/dist/adapters/sqlite/SchemaManager.js +187 -0
- package/dist/adapters/sqlite/SchemaManager.js.map +1 -0
- package/dist/adapters/sqlite/SqliteAdapter.d.ts +161 -0
- package/dist/adapters/sqlite/SqliteAdapter.d.ts.map +1 -0
- package/dist/adapters/sqlite/SqliteAdapter.js +741 -0
- package/dist/adapters/sqlite/SqliteAdapter.js.map +1 -0
- package/dist/adapters/sqlite/index.d.ts +9 -0
- package/dist/adapters/sqlite/index.d.ts.map +1 -0
- package/dist/adapters/sqlite/index.js +8 -0
- package/dist/adapters/sqlite/index.js.map +1 -0
- package/dist/adapters/sqlite/json-utils.d.ts +100 -0
- package/dist/adapters/sqlite/json-utils.d.ts.map +1 -0
- package/dist/adapters/sqlite/json-utils.js +274 -0
- package/dist/adapters/sqlite/json-utils.js.map +1 -0
- package/dist/adapters/sqlite/output-schemas.d.ts +1187 -0
- package/dist/adapters/sqlite/output-schemas.d.ts.map +1 -0
- package/dist/adapters/sqlite/output-schemas.js +1337 -0
- package/dist/adapters/sqlite/output-schemas.js.map +1 -0
- package/dist/adapters/sqlite/prompts.d.ts +13 -0
- package/dist/adapters/sqlite/prompts.d.ts.map +1 -0
- package/dist/adapters/sqlite/prompts.js +605 -0
- package/dist/adapters/sqlite/prompts.js.map +1 -0
- package/dist/adapters/sqlite/resources.d.ts +13 -0
- package/dist/adapters/sqlite/resources.d.ts.map +1 -0
- package/dist/adapters/sqlite/resources.js +251 -0
- package/dist/adapters/sqlite/resources.js.map +1 -0
- package/dist/adapters/sqlite/tools/admin.d.ts +14 -0
- package/dist/adapters/sqlite/tools/admin.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/admin.js +788 -0
- package/dist/adapters/sqlite/tools/admin.js.map +1 -0
- package/dist/adapters/sqlite/tools/core.d.ts +25 -0
- package/dist/adapters/sqlite/tools/core.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/core.js +359 -0
- package/dist/adapters/sqlite/tools/core.js.map +1 -0
- package/dist/adapters/sqlite/tools/fts.d.ts +13 -0
- package/dist/adapters/sqlite/tools/fts.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/fts.js +347 -0
- package/dist/adapters/sqlite/tools/fts.js.map +1 -0
- package/dist/adapters/sqlite/tools/geo.d.ts +14 -0
- package/dist/adapters/sqlite/tools/geo.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/geo.js +252 -0
- package/dist/adapters/sqlite/tools/geo.js.map +1 -0
- package/dist/adapters/sqlite/tools/index.d.ts +30 -0
- package/dist/adapters/sqlite/tools/index.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/index.js +61 -0
- package/dist/adapters/sqlite/tools/index.js.map +1 -0
- package/dist/adapters/sqlite/tools/json-helpers.d.ts +14 -0
- package/dist/adapters/sqlite/tools/json-helpers.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/json-helpers.js +477 -0
- package/dist/adapters/sqlite/tools/json-helpers.js.map +1 -0
- package/dist/adapters/sqlite/tools/json-operations.d.ts +14 -0
- package/dist/adapters/sqlite/tools/json-operations.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/json-operations.js +839 -0
- package/dist/adapters/sqlite/tools/json-operations.js.map +1 -0
- package/dist/adapters/sqlite/tools/stats.d.ts +15 -0
- package/dist/adapters/sqlite/tools/stats.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/stats.js +1219 -0
- package/dist/adapters/sqlite/tools/stats.js.map +1 -0
- package/dist/adapters/sqlite/tools/text.d.ts +14 -0
- package/dist/adapters/sqlite/tools/text.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/text.js +1141 -0
- package/dist/adapters/sqlite/tools/text.js.map +1 -0
- package/dist/adapters/sqlite/tools/vector.d.ts +14 -0
- package/dist/adapters/sqlite/tools/vector.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/vector.js +613 -0
- package/dist/adapters/sqlite/tools/vector.js.map +1 -0
- package/dist/adapters/sqlite/tools/virtual.d.ts +13 -0
- package/dist/adapters/sqlite/tools/virtual.d.ts.map +1 -0
- package/dist/adapters/sqlite/tools/virtual.js +930 -0
- package/dist/adapters/sqlite/tools/virtual.js.map +1 -0
- package/dist/adapters/sqlite/types.d.ts +207 -0
- package/dist/adapters/sqlite/types.d.ts.map +1 -0
- package/dist/adapters/sqlite/types.js +186 -0
- package/dist/adapters/sqlite/types.js.map +1 -0
- package/dist/adapters/sqlite-native/NativeSqliteAdapter.d.ts +163 -0
- package/dist/adapters/sqlite-native/NativeSqliteAdapter.d.ts.map +1 -0
- package/dist/adapters/sqlite-native/NativeSqliteAdapter.js +748 -0
- package/dist/adapters/sqlite-native/NativeSqliteAdapter.js.map +1 -0
- package/dist/adapters/sqlite-native/index.d.ts +11 -0
- package/dist/adapters/sqlite-native/index.d.ts.map +1 -0
- package/dist/adapters/sqlite-native/index.js +11 -0
- package/dist/adapters/sqlite-native/index.js.map +1 -0
- package/dist/adapters/sqlite-native/tools/spatialite.d.ts +19 -0
- package/dist/adapters/sqlite-native/tools/spatialite.d.ts.map +1 -0
- package/dist/adapters/sqlite-native/tools/spatialite.js +628 -0
- package/dist/adapters/sqlite-native/tools/spatialite.js.map +1 -0
- package/dist/adapters/sqlite-native/tools/transactions.d.ts +12 -0
- package/dist/adapters/sqlite-native/tools/transactions.d.ts.map +1 -0
- package/dist/adapters/sqlite-native/tools/transactions.js +255 -0
- package/dist/adapters/sqlite-native/tools/transactions.js.map +1 -0
- package/dist/adapters/sqlite-native/tools/window.d.ts +12 -0
- package/dist/adapters/sqlite-native/tools/window.d.ts.map +1 -0
- package/dist/adapters/sqlite-native/tools/window.js +370 -0
- package/dist/adapters/sqlite-native/tools/window.js.map +1 -0
- package/dist/auth/AuthorizationServerDiscovery.d.ts +90 -0
- package/dist/auth/AuthorizationServerDiscovery.d.ts.map +1 -0
- package/dist/auth/AuthorizationServerDiscovery.js +204 -0
- package/dist/auth/AuthorizationServerDiscovery.js.map +1 -0
- package/dist/auth/OAuthResourceServer.d.ts +65 -0
- package/dist/auth/OAuthResourceServer.d.ts.map +1 -0
- package/dist/auth/OAuthResourceServer.js +121 -0
- package/dist/auth/OAuthResourceServer.js.map +1 -0
- package/dist/auth/TokenValidator.d.ts +60 -0
- package/dist/auth/TokenValidator.d.ts.map +1 -0
- package/dist/auth/TokenValidator.js +235 -0
- package/dist/auth/TokenValidator.js.map +1 -0
- package/dist/auth/errors.d.ts +74 -0
- package/dist/auth/errors.d.ts.map +1 -0
- package/dist/auth/errors.js +133 -0
- package/dist/auth/errors.js.map +1 -0
- package/dist/auth/index.d.ts +13 -0
- package/dist/auth/index.d.ts.map +1 -0
- package/dist/auth/index.js +15 -0
- package/dist/auth/index.js.map +1 -0
- package/dist/auth/middleware.d.ts +81 -0
- package/dist/auth/middleware.d.ts.map +1 -0
- package/dist/auth/middleware.js +291 -0
- package/dist/auth/middleware.js.map +1 -0
- package/dist/auth/scopes.d.ts +136 -0
- package/dist/auth/scopes.d.ts.map +1 -0
- package/dist/auth/scopes.js +349 -0
- package/dist/auth/scopes.js.map +1 -0
- package/dist/auth/types.d.ts +257 -0
- package/dist/auth/types.d.ts.map +1 -0
- package/dist/auth/types.js +8 -0
- package/dist/auth/types.js.map +1 -0
- package/dist/cli.d.ts +8 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +236 -0
- package/dist/cli.js.map +1 -0
- package/dist/constants/ServerInstructions.d.ts +45 -0
- package/dist/constants/ServerInstructions.d.ts.map +1 -0
- package/dist/constants/ServerInstructions.js +356 -0
- package/dist/constants/ServerInstructions.js.map +1 -0
- package/dist/filtering/ToolConstants.d.ts +34 -0
- package/dist/filtering/ToolConstants.d.ts.map +1 -0
- package/dist/filtering/ToolConstants.js +174 -0
- package/dist/filtering/ToolConstants.js.map +1 -0
- package/dist/filtering/ToolFilter.d.ts +82 -0
- package/dist/filtering/ToolFilter.d.ts.map +1 -0
- package/dist/filtering/ToolFilter.js +296 -0
- package/dist/filtering/ToolFilter.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +17 -0
- package/dist/index.js.map +1 -0
- package/dist/server/McpServer.d.ts +61 -0
- package/dist/server/McpServer.d.ts.map +1 -0
- package/dist/server/McpServer.js +270 -0
- package/dist/server/McpServer.js.map +1 -0
- package/dist/transports/http.d.ts +134 -0
- package/dist/transports/http.d.ts.map +1 -0
- package/dist/transports/http.js +516 -0
- package/dist/transports/http.js.map +1 -0
- package/dist/transports/index.d.ts +5 -0
- package/dist/transports/index.d.ts.map +1 -0
- package/dist/transports/index.js +5 -0
- package/dist/transports/index.js.map +1 -0
- package/dist/types/index.d.ts +380 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +68 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/annotations.d.ts +44 -0
- package/dist/utils/annotations.d.ts.map +1 -0
- package/dist/utils/annotations.js +77 -0
- package/dist/utils/annotations.js.map +1 -0
- package/dist/utils/errors.d.ts +155 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/dist/utils/errors.js +329 -0
- package/dist/utils/errors.js.map +1 -0
- package/dist/utils/identifiers.d.ts +121 -0
- package/dist/utils/identifiers.d.ts.map +1 -0
- package/dist/utils/identifiers.js +319 -0
- package/dist/utils/identifiers.js.map +1 -0
- package/dist/utils/index.d.ts +7 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +7 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/insightsManager.d.ts +39 -0
- package/dist/utils/insightsManager.d.ts.map +1 -0
- package/dist/utils/insightsManager.js +63 -0
- package/dist/utils/insightsManager.js.map +1 -0
- package/dist/utils/logger.d.ts +189 -0
- package/dist/utils/logger.d.ts.map +1 -0
- package/dist/utils/logger.js +394 -0
- package/dist/utils/logger.js.map +1 -0
- package/dist/utils/progress-utils.d.ts +54 -0
- package/dist/utils/progress-utils.d.ts.map +1 -0
- package/dist/utils/progress-utils.js +74 -0
- package/dist/utils/progress-utils.js.map +1 -0
- package/dist/utils/resourceAnnotations.d.ts +36 -0
- package/dist/utils/resourceAnnotations.d.ts.map +1 -0
- package/dist/utils/resourceAnnotations.js +57 -0
- package/dist/utils/resourceAnnotations.js.map +1 -0
- package/dist/utils/where-clause.d.ts +41 -0
- package/dist/utils/where-clause.d.ts.map +1 -0
- package/dist/utils/where-clause.js +116 -0
- package/dist/utils/where-clause.js.map +1 -0
- package/package.json +83 -0
- package/server.json +53 -0
|
@@ -0,0 +1,1219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite Statistics Tools
|
|
3
|
+
*
|
|
4
|
+
* Statistical analysis and aggregation functions:
|
|
5
|
+
* sum, avg, min, max, count, distinct, percentile, histogram, correlation,
|
|
6
|
+
* outlier detection, regression, hypothesis testing.
|
|
7
|
+
* 13 tools total.
|
|
8
|
+
*/
|
|
9
|
+
import { z } from "zod";
|
|
10
|
+
import { readOnly } from "../../../utils/annotations.js";
|
|
11
|
+
import { validateWhereClause, sanitizeIdentifier, } from "../../../utils/index.js";
|
|
12
|
+
import { StatsBasicOutputSchema, StatsCountOutputSchema, StatsGroupByOutputSchema, StatsHistogramOutputSchema, StatsPercentileOutputSchema, StatsCorrelationOutputSchema, StatsTopNOutputSchema, StatsDistinctOutputSchema, StatsSummaryOutputSchema, StatsFrequencyOutputSchema, } from "../output-schemas.js";
|
|
13
|
+
// Stats schemas
|
|
14
|
+
const BasicStatsSchema = z.object({
|
|
15
|
+
table: z.string().describe("Table name"),
|
|
16
|
+
column: z.string().describe("Numeric column for statistics"),
|
|
17
|
+
whereClause: z.string().optional(),
|
|
18
|
+
});
|
|
19
|
+
const CountSchema = z.object({
|
|
20
|
+
table: z.string().describe("Table name"),
|
|
21
|
+
column: z.string().optional().describe("Column to count (default: *)"),
|
|
22
|
+
distinct: z.boolean().optional().default(false),
|
|
23
|
+
whereClause: z.string().optional(),
|
|
24
|
+
});
|
|
25
|
+
const GroupByStatsSchema = z.object({
|
|
26
|
+
table: z.string().describe("Table name"),
|
|
27
|
+
valueColumn: z.string().describe("Column for statistical value"),
|
|
28
|
+
groupByColumn: z.string().describe("Column to group by"),
|
|
29
|
+
stat: z
|
|
30
|
+
.enum(["sum", "avg", "min", "max", "count"])
|
|
31
|
+
.describe("Statistic type"),
|
|
32
|
+
whereClause: z.string().optional(),
|
|
33
|
+
orderBy: z.enum(["value", "group"]).optional().default("group"),
|
|
34
|
+
limit: z.number().optional().default(100),
|
|
35
|
+
});
|
|
36
|
+
const HistogramSchema = z.object({
|
|
37
|
+
table: z.string().describe("Table name"),
|
|
38
|
+
column: z.string().describe("Numeric column"),
|
|
39
|
+
buckets: z.number().optional().default(10).describe("Number of buckets"),
|
|
40
|
+
whereClause: z.string().optional(),
|
|
41
|
+
});
|
|
42
|
+
const PercentileSchema = z.object({
|
|
43
|
+
table: z.string().describe("Table name"),
|
|
44
|
+
column: z.string().describe("Numeric column"),
|
|
45
|
+
percentiles: z
|
|
46
|
+
.array(z.number().min(0).max(100))
|
|
47
|
+
.describe("Percentiles to compute"),
|
|
48
|
+
whereClause: z.string().optional(),
|
|
49
|
+
});
|
|
50
|
+
const CorrelationSchema = z.object({
|
|
51
|
+
table: z.string().describe("Table name"),
|
|
52
|
+
column1: z.string().describe("First numeric column"),
|
|
53
|
+
column2: z.string().describe("Second numeric column"),
|
|
54
|
+
whereClause: z.string().optional(),
|
|
55
|
+
});
|
|
56
|
+
const TopNSchema = z.object({
|
|
57
|
+
table: z.string().describe("Table name"),
|
|
58
|
+
column: z.string().describe("Column to rank"),
|
|
59
|
+
n: z.number().optional().default(10).describe("Number of top values"),
|
|
60
|
+
orderDirection: z.enum(["asc", "desc"]).optional().default("desc"),
|
|
61
|
+
whereClause: z.string().optional(),
|
|
62
|
+
selectColumns: z
|
|
63
|
+
.array(z.string())
|
|
64
|
+
.optional()
|
|
65
|
+
.describe("Columns to include in result (default: all columns)"),
|
|
66
|
+
});
|
|
67
|
+
const DistinctValuesSchema = z.object({
|
|
68
|
+
table: z.string().describe("Table name"),
|
|
69
|
+
column: z.string().describe("Column to get distinct values"),
|
|
70
|
+
limit: z.number().optional().default(100),
|
|
71
|
+
whereClause: z.string().optional(),
|
|
72
|
+
});
|
|
73
|
+
const SummaryStatsSchema = z.object({
|
|
74
|
+
table: z.string().describe("Table name"),
|
|
75
|
+
columns: z
|
|
76
|
+
.array(z.string())
|
|
77
|
+
.optional()
|
|
78
|
+
.describe("Columns to summarize (default: all numeric)"),
|
|
79
|
+
whereClause: z.string().optional(),
|
|
80
|
+
});
|
|
81
|
+
const FrequencySchema = z.object({
|
|
82
|
+
table: z.string().describe("Table name"),
|
|
83
|
+
column: z.string().describe("Column to count frequency"),
|
|
84
|
+
limit: z.number().optional().default(20),
|
|
85
|
+
whereClause: z.string().optional(),
|
|
86
|
+
});
|
|
87
|
+
// New statistical schemas
|
|
88
|
+
const OutlierSchema = z.object({
|
|
89
|
+
table: z.string().describe("Table name"),
|
|
90
|
+
column: z.string().describe("Numeric column to analyze"),
|
|
91
|
+
method: z.enum(["iqr", "zscore"]).optional().default("iqr"),
|
|
92
|
+
threshold: z
|
|
93
|
+
.number()
|
|
94
|
+
.optional()
|
|
95
|
+
.describe("IQR multiplier (default 1.5) or Z-score threshold (default 3)"),
|
|
96
|
+
whereClause: z.string().optional(),
|
|
97
|
+
limit: z.number().optional().default(100),
|
|
98
|
+
});
|
|
99
|
+
const RegressionSchema = z.object({
|
|
100
|
+
table: z.string().describe("Table name"),
|
|
101
|
+
xColumn: z.string().describe("Independent variable column"),
|
|
102
|
+
yColumn: z.string().describe("Dependent variable column"),
|
|
103
|
+
degree: z
|
|
104
|
+
.number()
|
|
105
|
+
.min(1)
|
|
106
|
+
.max(3)
|
|
107
|
+
.optional()
|
|
108
|
+
.default(1)
|
|
109
|
+
.describe("Polynomial degree (1=linear)"),
|
|
110
|
+
whereClause: z.string().optional(),
|
|
111
|
+
});
|
|
112
|
+
const HypothesisSchema = z.object({
|
|
113
|
+
table: z.string().describe("Table name"),
|
|
114
|
+
testType: z.enum(["ttest_one", "ttest_two", "chi_square"]),
|
|
115
|
+
column: z.string().describe("Primary column for analysis"),
|
|
116
|
+
column2: z
|
|
117
|
+
.string()
|
|
118
|
+
.optional()
|
|
119
|
+
.describe("Second column for two-sample t-test"),
|
|
120
|
+
groupColumn: z.string().optional().describe("Group column for chi-square"),
|
|
121
|
+
expectedMean: z
|
|
122
|
+
.number()
|
|
123
|
+
.optional()
|
|
124
|
+
.describe("Expected mean for one-sample t-test"),
|
|
125
|
+
whereClause: z.string().optional(),
|
|
126
|
+
});
|
|
127
|
+
/**
|
|
128
|
+
* Get all statistics tools
|
|
129
|
+
*/
|
|
130
|
+
export function getStatsTools(adapter) {
|
|
131
|
+
return [
|
|
132
|
+
createBasicStatsTool(adapter),
|
|
133
|
+
createCountTool(adapter),
|
|
134
|
+
createGroupByStatsTool(adapter),
|
|
135
|
+
createHistogramTool(adapter),
|
|
136
|
+
createPercentileTool(adapter),
|
|
137
|
+
createCorrelationTool(adapter),
|
|
138
|
+
createTopNTool(adapter),
|
|
139
|
+
createDistinctValuesTool(adapter),
|
|
140
|
+
createSummaryStatsTool(adapter),
|
|
141
|
+
createFrequencyTool(adapter),
|
|
142
|
+
// New statistical tools
|
|
143
|
+
createOutlierTool(adapter),
|
|
144
|
+
createRegressionTool(adapter),
|
|
145
|
+
createHypothesisTool(adapter),
|
|
146
|
+
];
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Basic statistics (sum, avg, min, max, stdev)
|
|
150
|
+
*/
|
|
151
|
+
function createBasicStatsTool(adapter) {
|
|
152
|
+
return {
|
|
153
|
+
name: "sqlite_stats_basic",
|
|
154
|
+
description: "Get basic statistics (count, sum, avg, min, max) for a numeric column.",
|
|
155
|
+
group: "stats",
|
|
156
|
+
inputSchema: BasicStatsSchema,
|
|
157
|
+
outputSchema: StatsBasicOutputSchema,
|
|
158
|
+
requiredScopes: ["read"],
|
|
159
|
+
annotations: readOnly("Basic Statistics"),
|
|
160
|
+
handler: async (params, _context) => {
|
|
161
|
+
const input = BasicStatsSchema.parse(params);
|
|
162
|
+
// Validate and quote identifiers
|
|
163
|
+
const table = sanitizeIdentifier(input.table);
|
|
164
|
+
const column = sanitizeIdentifier(input.column);
|
|
165
|
+
let sql = `SELECT
|
|
166
|
+
COUNT(${column}) as count,
|
|
167
|
+
SUM(${column}) as sum,
|
|
168
|
+
AVG(${column}) as avg,
|
|
169
|
+
MIN(${column}) as min,
|
|
170
|
+
MAX(${column}) as max,
|
|
171
|
+
MAX(${column}) - MIN(${column}) as range
|
|
172
|
+
FROM ${table}`;
|
|
173
|
+
if (input.whereClause) {
|
|
174
|
+
validateWhereClause(input.whereClause);
|
|
175
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
176
|
+
}
|
|
177
|
+
const result = await adapter.executeReadQuery(sql);
|
|
178
|
+
const row = result.rows?.[0];
|
|
179
|
+
// Helper to safely convert to number or null
|
|
180
|
+
const toNumberOrNull = (val) => {
|
|
181
|
+
if (val === null || val === undefined)
|
|
182
|
+
return null;
|
|
183
|
+
if (typeof val === "number")
|
|
184
|
+
return val;
|
|
185
|
+
const num = Number(val);
|
|
186
|
+
return Number.isNaN(num) ? null : num;
|
|
187
|
+
};
|
|
188
|
+
return {
|
|
189
|
+
success: true,
|
|
190
|
+
column: input.column,
|
|
191
|
+
stats: {
|
|
192
|
+
count: Number(row?.["count"] ?? 0),
|
|
193
|
+
sum: toNumberOrNull(row?.["sum"]),
|
|
194
|
+
avg: toNumberOrNull(row?.["avg"]),
|
|
195
|
+
min: toNumberOrNull(row?.["min"]),
|
|
196
|
+
max: toNumberOrNull(row?.["max"]),
|
|
197
|
+
range: toNumberOrNull(row?.["range"]),
|
|
198
|
+
},
|
|
199
|
+
};
|
|
200
|
+
},
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Count rows
|
|
205
|
+
*/
|
|
206
|
+
function createCountTool(adapter) {
|
|
207
|
+
return {
|
|
208
|
+
name: "sqlite_stats_count",
|
|
209
|
+
description: "Count rows, optionally distinct values in a column.",
|
|
210
|
+
group: "stats",
|
|
211
|
+
inputSchema: CountSchema,
|
|
212
|
+
outputSchema: StatsCountOutputSchema,
|
|
213
|
+
requiredScopes: ["read"],
|
|
214
|
+
annotations: readOnly("Count Rows"),
|
|
215
|
+
handler: async (params, _context) => {
|
|
216
|
+
const input = CountSchema.parse(params);
|
|
217
|
+
// Validate and quote table name
|
|
218
|
+
const table = sanitizeIdentifier(input.table);
|
|
219
|
+
let countExpr;
|
|
220
|
+
if (input.column) {
|
|
221
|
+
const column = sanitizeIdentifier(input.column);
|
|
222
|
+
countExpr = input.distinct
|
|
223
|
+
? `COUNT(DISTINCT ${column})`
|
|
224
|
+
: `COUNT(${column})`;
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
countExpr = "COUNT(*)";
|
|
228
|
+
}
|
|
229
|
+
let sql = `SELECT ${countExpr} as count FROM ${table}`;
|
|
230
|
+
if (input.whereClause) {
|
|
231
|
+
validateWhereClause(input.whereClause);
|
|
232
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
233
|
+
}
|
|
234
|
+
const result = await adapter.executeReadQuery(sql);
|
|
235
|
+
return {
|
|
236
|
+
success: true,
|
|
237
|
+
count: result.rows?.[0]?.["count"] ?? 0,
|
|
238
|
+
distinct: input.distinct,
|
|
239
|
+
};
|
|
240
|
+
},
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Group by with aggregation
|
|
245
|
+
*/
|
|
246
|
+
function createGroupByStatsTool(adapter) {
|
|
247
|
+
return {
|
|
248
|
+
name: "sqlite_stats_group_by",
|
|
249
|
+
description: "Aggregate statistics grouped by a column.",
|
|
250
|
+
group: "stats",
|
|
251
|
+
inputSchema: GroupByStatsSchema,
|
|
252
|
+
outputSchema: StatsGroupByOutputSchema,
|
|
253
|
+
requiredScopes: ["read"],
|
|
254
|
+
annotations: readOnly("Group By Stats"),
|
|
255
|
+
handler: async (params, _context) => {
|
|
256
|
+
const input = GroupByStatsSchema.parse(params);
|
|
257
|
+
// Validate and quote identifiers
|
|
258
|
+
const table = sanitizeIdentifier(input.table);
|
|
259
|
+
const valueColumn = sanitizeIdentifier(input.valueColumn);
|
|
260
|
+
const groupByColumn = sanitizeIdentifier(input.groupByColumn);
|
|
261
|
+
// Validate that columns exist to prevent SQLite from treating non-existent columns as string literals
|
|
262
|
+
const tableInfo = await adapter.describeTable(input.table);
|
|
263
|
+
const columnNames = new Set((tableInfo.columns ?? []).map((c) => c.name.toLowerCase()));
|
|
264
|
+
if (!columnNames.has(input.valueColumn.toLowerCase())) {
|
|
265
|
+
throw new Error(`Column "${input.valueColumn}" not found in table "${input.table}"`);
|
|
266
|
+
}
|
|
267
|
+
if (!columnNames.has(input.groupByColumn.toLowerCase())) {
|
|
268
|
+
throw new Error(`Column "${input.groupByColumn}" not found in table "${input.table}"`);
|
|
269
|
+
}
|
|
270
|
+
const statFunc = input.stat.toUpperCase();
|
|
271
|
+
const orderCol = input.orderBy === "value" ? "stat_value" : groupByColumn;
|
|
272
|
+
let sql = `SELECT ${groupByColumn}, ${statFunc}(${valueColumn}) as stat_value
|
|
273
|
+
FROM ${table}`;
|
|
274
|
+
if (input.whereClause) {
|
|
275
|
+
validateWhereClause(input.whereClause);
|
|
276
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
277
|
+
}
|
|
278
|
+
sql += ` GROUP BY ${groupByColumn} ORDER BY ${orderCol} DESC LIMIT ${input.limit}`;
|
|
279
|
+
const result = await adapter.executeReadQuery(sql);
|
|
280
|
+
return {
|
|
281
|
+
success: true,
|
|
282
|
+
statistic: input.stat,
|
|
283
|
+
rowCount: result.rows?.length ?? 0,
|
|
284
|
+
results: result.rows,
|
|
285
|
+
};
|
|
286
|
+
},
|
|
287
|
+
};
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Histogram
|
|
291
|
+
*/
|
|
292
|
+
function createHistogramTool(adapter) {
|
|
293
|
+
return {
|
|
294
|
+
name: "sqlite_stats_histogram",
|
|
295
|
+
description: "Create a histogram with specified number of buckets.",
|
|
296
|
+
group: "stats",
|
|
297
|
+
inputSchema: HistogramSchema,
|
|
298
|
+
outputSchema: StatsHistogramOutputSchema,
|
|
299
|
+
requiredScopes: ["read"],
|
|
300
|
+
annotations: readOnly("Histogram"),
|
|
301
|
+
handler: async (params, _context) => {
|
|
302
|
+
const input = HistogramSchema.parse(params);
|
|
303
|
+
// Validate and quote identifiers
|
|
304
|
+
const table = sanitizeIdentifier(input.table);
|
|
305
|
+
const column = sanitizeIdentifier(input.column);
|
|
306
|
+
// First get min/max
|
|
307
|
+
let minMaxSql = `SELECT MIN(${column}) as min_val, MAX(${column}) as max_val FROM ${table}`;
|
|
308
|
+
if (input.whereClause) {
|
|
309
|
+
validateWhereClause(input.whereClause);
|
|
310
|
+
minMaxSql += ` WHERE ${input.whereClause}`;
|
|
311
|
+
}
|
|
312
|
+
const minMaxResult = await adapter.executeReadQuery(minMaxSql);
|
|
313
|
+
const minVal = minMaxResult.rows?.[0]?.["min_val"] ?? 0;
|
|
314
|
+
const maxVal = minMaxResult.rows?.[0]?.["max_val"] ?? 0;
|
|
315
|
+
const range = maxVal - minVal;
|
|
316
|
+
const bucketSize = range / input.buckets;
|
|
317
|
+
if (bucketSize === 0) {
|
|
318
|
+
return {
|
|
319
|
+
success: true,
|
|
320
|
+
buckets: [{ min: minVal, max: maxVal, count: 1 }],
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
// Build histogram using CASE expressions
|
|
324
|
+
// Final bucket uses <= to include the max value
|
|
325
|
+
const bucketCases = [];
|
|
326
|
+
for (let i = 0; i < input.buckets; i++) {
|
|
327
|
+
const bucketMin = minVal + i * bucketSize;
|
|
328
|
+
const bucketMax = minVal + (i + 1) * bucketSize;
|
|
329
|
+
const upperOp = i === input.buckets - 1 ? "<=" : "<";
|
|
330
|
+
bucketCases.push(`SUM(CASE WHEN ${column} >= ${bucketMin} AND ${column} ${upperOp} ${bucketMax} THEN 1 ELSE 0 END) as bucket_${i}`);
|
|
331
|
+
}
|
|
332
|
+
let sql = `SELECT ${bucketCases.join(", ")} FROM ${table}`;
|
|
333
|
+
if (input.whereClause) {
|
|
334
|
+
validateWhereClause(input.whereClause);
|
|
335
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
336
|
+
}
|
|
337
|
+
const result = await adapter.executeReadQuery(sql);
|
|
338
|
+
// Format buckets
|
|
339
|
+
const buckets = [];
|
|
340
|
+
for (let i = 0; i < input.buckets; i++) {
|
|
341
|
+
const bucketMin = minVal + i * bucketSize;
|
|
342
|
+
const bucketMax = minVal + (i + 1) * bucketSize;
|
|
343
|
+
buckets.push({
|
|
344
|
+
bucket: i,
|
|
345
|
+
min: bucketMin,
|
|
346
|
+
max: bucketMax,
|
|
347
|
+
count: result.rows?.[0]?.[`bucket_${i}`] ?? 0,
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
return {
|
|
351
|
+
success: true,
|
|
352
|
+
column: input.column,
|
|
353
|
+
range: { min: minVal, max: maxVal },
|
|
354
|
+
bucketSize,
|
|
355
|
+
buckets,
|
|
356
|
+
};
|
|
357
|
+
},
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Percentiles
|
|
362
|
+
*/
|
|
363
|
+
function createPercentileTool(adapter) {
|
|
364
|
+
return {
|
|
365
|
+
name: "sqlite_stats_percentile",
|
|
366
|
+
description: "Calculate percentiles (median, quartiles, etc.) for a column.",
|
|
367
|
+
group: "stats",
|
|
368
|
+
inputSchema: PercentileSchema,
|
|
369
|
+
outputSchema: StatsPercentileOutputSchema,
|
|
370
|
+
requiredScopes: ["read"],
|
|
371
|
+
annotations: readOnly("Percentile"),
|
|
372
|
+
handler: async (params, _context) => {
|
|
373
|
+
const input = PercentileSchema.parse(params);
|
|
374
|
+
// Validate and quote identifiers
|
|
375
|
+
const table = sanitizeIdentifier(input.table);
|
|
376
|
+
const column = sanitizeIdentifier(input.column);
|
|
377
|
+
let sql = `SELECT ${column} as value FROM ${table} WHERE ${column} IS NOT NULL`;
|
|
378
|
+
if (input.whereClause) {
|
|
379
|
+
validateWhereClause(input.whereClause);
|
|
380
|
+
sql += ` AND ${input.whereClause}`;
|
|
381
|
+
}
|
|
382
|
+
sql += ` ORDER BY ${column}`;
|
|
383
|
+
const result = await adapter.executeReadQuery(sql);
|
|
384
|
+
const values = (result.rows ?? []).map((r) => r["value"]);
|
|
385
|
+
if (values.length === 0) {
|
|
386
|
+
return {
|
|
387
|
+
success: true,
|
|
388
|
+
percentiles: input.percentiles.map((p) => ({
|
|
389
|
+
percentile: p,
|
|
390
|
+
value: null,
|
|
391
|
+
})),
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
// Calculate percentiles
|
|
395
|
+
const percentiles = input.percentiles.map((p) => {
|
|
396
|
+
const index = Math.ceil((p / 100) * values.length) - 1;
|
|
397
|
+
const safeIndex = Math.max(0, Math.min(index, values.length - 1));
|
|
398
|
+
return {
|
|
399
|
+
percentile: p,
|
|
400
|
+
value: values[safeIndex],
|
|
401
|
+
};
|
|
402
|
+
});
|
|
403
|
+
return {
|
|
404
|
+
success: true,
|
|
405
|
+
column: input.column,
|
|
406
|
+
count: values.length,
|
|
407
|
+
percentiles,
|
|
408
|
+
};
|
|
409
|
+
},
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
/**
|
|
413
|
+
* Correlation between two columns
|
|
414
|
+
*/
|
|
415
|
+
function createCorrelationTool(adapter) {
|
|
416
|
+
return {
|
|
417
|
+
name: "sqlite_stats_correlation",
|
|
418
|
+
description: "Calculate Pearson correlation coefficient between two numeric columns.",
|
|
419
|
+
group: "stats",
|
|
420
|
+
inputSchema: CorrelationSchema,
|
|
421
|
+
outputSchema: StatsCorrelationOutputSchema,
|
|
422
|
+
requiredScopes: ["read"],
|
|
423
|
+
annotations: readOnly("Correlation"),
|
|
424
|
+
handler: async (params, _context) => {
|
|
425
|
+
const input = CorrelationSchema.parse(params);
|
|
426
|
+
// Validate and quote identifiers
|
|
427
|
+
const table = sanitizeIdentifier(input.table);
|
|
428
|
+
const col1 = sanitizeIdentifier(input.column1);
|
|
429
|
+
const col2 = sanitizeIdentifier(input.column2);
|
|
430
|
+
// Get paired values
|
|
431
|
+
let sql = `SELECT ${col1} as x, ${col2} as y
|
|
432
|
+
FROM ${table}
|
|
433
|
+
WHERE ${col1} IS NOT NULL AND ${col2} IS NOT NULL`;
|
|
434
|
+
if (input.whereClause) {
|
|
435
|
+
validateWhereClause(input.whereClause);
|
|
436
|
+
sql += ` AND ${input.whereClause}`;
|
|
437
|
+
}
|
|
438
|
+
const result = await adapter.executeReadQuery(sql);
|
|
439
|
+
const pairs = (result.rows ?? []).map((r) => ({
|
|
440
|
+
x: r["x"],
|
|
441
|
+
y: r["y"],
|
|
442
|
+
}));
|
|
443
|
+
if (pairs.length < 2) {
|
|
444
|
+
return {
|
|
445
|
+
success: true,
|
|
446
|
+
correlation: null,
|
|
447
|
+
message: "Need at least 2 data points",
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
// Calculate correlation in JS
|
|
451
|
+
const n = pairs.length;
|
|
452
|
+
const sumX = pairs.reduce((s, p) => s + p.x, 0);
|
|
453
|
+
const sumY = pairs.reduce((s, p) => s + p.y, 0);
|
|
454
|
+
const sumXY = pairs.reduce((s, p) => s + p.x * p.y, 0);
|
|
455
|
+
const sumX2 = pairs.reduce((s, p) => s + p.x * p.x, 0);
|
|
456
|
+
const sumY2 = pairs.reduce((s, p) => s + p.y * p.y, 0);
|
|
457
|
+
const numerator = n * sumXY - sumX * sumY;
|
|
458
|
+
const denominator = Math.sqrt((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY));
|
|
459
|
+
const correlation = denominator === 0 ? 0 : numerator / denominator;
|
|
460
|
+
// Handle NaN case (e.g., all values are the same or data issues)
|
|
461
|
+
const roundedCorrelation = Number.isNaN(correlation)
|
|
462
|
+
? null
|
|
463
|
+
: Math.round(correlation * 10000) / 10000;
|
|
464
|
+
return {
|
|
465
|
+
success: true,
|
|
466
|
+
column1: input.column1,
|
|
467
|
+
column2: input.column2,
|
|
468
|
+
n: pairs.length,
|
|
469
|
+
correlation: roundedCorrelation,
|
|
470
|
+
};
|
|
471
|
+
},
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* Top N values
|
|
476
|
+
*/
|
|
477
|
+
function createTopNTool(adapter) {
|
|
478
|
+
return {
|
|
479
|
+
name: "sqlite_stats_top_n",
|
|
480
|
+
description: "Get top N values from a column.",
|
|
481
|
+
group: "stats",
|
|
482
|
+
inputSchema: TopNSchema,
|
|
483
|
+
outputSchema: StatsTopNOutputSchema,
|
|
484
|
+
requiredScopes: ["read"],
|
|
485
|
+
annotations: readOnly("Top N Values"),
|
|
486
|
+
handler: async (params, _context) => {
|
|
487
|
+
const input = TopNSchema.parse(params);
|
|
488
|
+
// Validate and quote identifiers
|
|
489
|
+
const table = sanitizeIdentifier(input.table);
|
|
490
|
+
const column = sanitizeIdentifier(input.column);
|
|
491
|
+
const order = input.orderDirection.toUpperCase();
|
|
492
|
+
// Build column list - use specified columns or default to all
|
|
493
|
+
let columnList = "*";
|
|
494
|
+
if (input.selectColumns && input.selectColumns.length > 0) {
|
|
495
|
+
columnList = input.selectColumns
|
|
496
|
+
.map((col) => sanitizeIdentifier(col))
|
|
497
|
+
.join(", ");
|
|
498
|
+
}
|
|
499
|
+
let sql = `SELECT ${columnList} FROM ${table}`;
|
|
500
|
+
if (input.whereClause) {
|
|
501
|
+
validateWhereClause(input.whereClause);
|
|
502
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
503
|
+
}
|
|
504
|
+
sql += ` ORDER BY ${column} ${order} LIMIT ${input.n}`;
|
|
505
|
+
const result = await adapter.executeReadQuery(sql);
|
|
506
|
+
return {
|
|
507
|
+
success: true,
|
|
508
|
+
column: input.column,
|
|
509
|
+
direction: input.orderDirection,
|
|
510
|
+
count: result.rows?.length ?? 0,
|
|
511
|
+
rows: result.rows,
|
|
512
|
+
};
|
|
513
|
+
},
|
|
514
|
+
};
|
|
515
|
+
}
|
|
516
|
+
/**
|
|
517
|
+
* Distinct values
|
|
518
|
+
*/
|
|
519
|
+
function createDistinctValuesTool(adapter) {
|
|
520
|
+
return {
|
|
521
|
+
name: "sqlite_stats_distinct",
|
|
522
|
+
description: "Get distinct values from a column.",
|
|
523
|
+
group: "stats",
|
|
524
|
+
inputSchema: DistinctValuesSchema,
|
|
525
|
+
outputSchema: StatsDistinctOutputSchema,
|
|
526
|
+
requiredScopes: ["read"],
|
|
527
|
+
annotations: readOnly("Distinct Values"),
|
|
528
|
+
handler: async (params, _context) => {
|
|
529
|
+
const input = DistinctValuesSchema.parse(params);
|
|
530
|
+
// Validate and quote identifiers
|
|
531
|
+
const table = sanitizeIdentifier(input.table);
|
|
532
|
+
const column = sanitizeIdentifier(input.column);
|
|
533
|
+
let sql = `SELECT DISTINCT ${column} as value FROM ${table}`;
|
|
534
|
+
if (input.whereClause) {
|
|
535
|
+
validateWhereClause(input.whereClause);
|
|
536
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
537
|
+
}
|
|
538
|
+
sql += ` LIMIT ${input.limit}`;
|
|
539
|
+
const result = await adapter.executeReadQuery(sql);
|
|
540
|
+
return {
|
|
541
|
+
success: true,
|
|
542
|
+
column: input.column,
|
|
543
|
+
distinctCount: result.rows?.length ?? 0,
|
|
544
|
+
values: result.rows?.map((r) => r["value"]),
|
|
545
|
+
};
|
|
546
|
+
},
|
|
547
|
+
};
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Summary statistics for all numeric columns
|
|
551
|
+
*/
|
|
552
|
+
function createSummaryStatsTool(adapter) {
|
|
553
|
+
// Numeric SQLite column types
|
|
554
|
+
const numericTypes = new Set([
|
|
555
|
+
"integer",
|
|
556
|
+
"int",
|
|
557
|
+
"real",
|
|
558
|
+
"float",
|
|
559
|
+
"double",
|
|
560
|
+
"numeric",
|
|
561
|
+
"decimal",
|
|
562
|
+
"number",
|
|
563
|
+
"smallint",
|
|
564
|
+
"bigint",
|
|
565
|
+
"tinyint",
|
|
566
|
+
"mediumint",
|
|
567
|
+
]);
|
|
568
|
+
return {
|
|
569
|
+
name: "sqlite_stats_summary",
|
|
570
|
+
description: "Get summary statistics for multiple columns at once.",
|
|
571
|
+
group: "stats",
|
|
572
|
+
inputSchema: SummaryStatsSchema,
|
|
573
|
+
outputSchema: StatsSummaryOutputSchema,
|
|
574
|
+
requiredScopes: ["read"],
|
|
575
|
+
annotations: readOnly("Summary Stats"),
|
|
576
|
+
handler: async (params, _context) => {
|
|
577
|
+
const input = SummaryStatsSchema.parse(params);
|
|
578
|
+
// Validate table name
|
|
579
|
+
const table = sanitizeIdentifier(input.table);
|
|
580
|
+
// Get table info to find columns
|
|
581
|
+
const tableInfo = await adapter.describeTable(input.table);
|
|
582
|
+
// Filter to requested columns or auto-detect numeric columns
|
|
583
|
+
let columns = [];
|
|
584
|
+
if (input.columns && input.columns.length > 0) {
|
|
585
|
+
// User-specified columns - validate them
|
|
586
|
+
columns = input.columns.map((col) => {
|
|
587
|
+
sanitizeIdentifier(col); // Validate
|
|
588
|
+
return col;
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
else {
|
|
592
|
+
// Auto-detect: only include numeric columns
|
|
593
|
+
columns = (tableInfo.columns ?? [])
|
|
594
|
+
.filter((c) => {
|
|
595
|
+
const typeLower = (c.type ?? "").toLowerCase();
|
|
596
|
+
// Check if type starts with a known numeric type
|
|
597
|
+
return [...numericTypes].some((nt) => typeLower === nt || typeLower.startsWith(nt));
|
|
598
|
+
})
|
|
599
|
+
.map((c) => c.name);
|
|
600
|
+
}
|
|
601
|
+
if (columns.length === 0) {
|
|
602
|
+
return {
|
|
603
|
+
success: true,
|
|
604
|
+
table: input.table,
|
|
605
|
+
summaries: [],
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
// Build summary query for each column
|
|
609
|
+
const summaries = [];
|
|
610
|
+
for (const col of columns) {
|
|
611
|
+
const quotedCol = sanitizeIdentifier(col);
|
|
612
|
+
let sql = `SELECT
|
|
613
|
+
COUNT(${quotedCol}) as count,
|
|
614
|
+
AVG(${quotedCol}) as avg,
|
|
615
|
+
MIN(${quotedCol}) as min,
|
|
616
|
+
MAX(${quotedCol}) as max
|
|
617
|
+
FROM ${table}`;
|
|
618
|
+
if (input.whereClause) {
|
|
619
|
+
validateWhereClause(input.whereClause);
|
|
620
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
621
|
+
}
|
|
622
|
+
try {
|
|
623
|
+
const result = await adapter.executeReadQuery(sql);
|
|
624
|
+
const row = result.rows?.[0];
|
|
625
|
+
// Ensure numeric types - convert strings to numbers if needed
|
|
626
|
+
const count = Number(row?.["count"] ?? 0);
|
|
627
|
+
const avg = row?.["avg"];
|
|
628
|
+
const min = row?.["min"];
|
|
629
|
+
const max = row?.["max"];
|
|
630
|
+
summaries.push({
|
|
631
|
+
column: col,
|
|
632
|
+
count,
|
|
633
|
+
avg: typeof avg === "number"
|
|
634
|
+
? avg
|
|
635
|
+
: avg === null
|
|
636
|
+
? null
|
|
637
|
+
: Number(avg) || null,
|
|
638
|
+
min: typeof min === "number"
|
|
639
|
+
? min
|
|
640
|
+
: min === null
|
|
641
|
+
? null
|
|
642
|
+
: Number(min) || null,
|
|
643
|
+
max: typeof max === "number"
|
|
644
|
+
? max
|
|
645
|
+
: max === null
|
|
646
|
+
? null
|
|
647
|
+
: Number(max) || null,
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
catch {
|
|
651
|
+
// Column may not be numeric, skip
|
|
652
|
+
summaries.push({ column: col, error: "Not numeric" });
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
return {
|
|
656
|
+
success: true,
|
|
657
|
+
table: input.table,
|
|
658
|
+
summaries,
|
|
659
|
+
};
|
|
660
|
+
},
|
|
661
|
+
};
|
|
662
|
+
}
|
|
663
|
+
/**
|
|
664
|
+
* Value frequency distribution
|
|
665
|
+
*/
|
|
666
|
+
function createFrequencyTool(adapter) {
|
|
667
|
+
return {
|
|
668
|
+
name: "sqlite_stats_frequency",
|
|
669
|
+
description: "Get frequency distribution of values in a column.",
|
|
670
|
+
group: "stats",
|
|
671
|
+
inputSchema: FrequencySchema,
|
|
672
|
+
outputSchema: StatsFrequencyOutputSchema,
|
|
673
|
+
requiredScopes: ["read"],
|
|
674
|
+
annotations: readOnly("Frequency"),
|
|
675
|
+
handler: async (params, _context) => {
|
|
676
|
+
const input = FrequencySchema.parse(params);
|
|
677
|
+
// Validate and quote identifiers
|
|
678
|
+
const table = sanitizeIdentifier(input.table);
|
|
679
|
+
const column = sanitizeIdentifier(input.column);
|
|
680
|
+
let sql = `SELECT ${column} as value, COUNT(*) as frequency
|
|
681
|
+
FROM ${table}`;
|
|
682
|
+
if (input.whereClause) {
|
|
683
|
+
validateWhereClause(input.whereClause);
|
|
684
|
+
sql += ` WHERE ${input.whereClause}`;
|
|
685
|
+
}
|
|
686
|
+
sql += ` GROUP BY ${column} ORDER BY frequency DESC LIMIT ${input.limit}`;
|
|
687
|
+
const result = await adapter.executeReadQuery(sql);
|
|
688
|
+
return {
|
|
689
|
+
success: true,
|
|
690
|
+
column: input.column,
|
|
691
|
+
distinctValues: result.rows?.length ?? 0,
|
|
692
|
+
distribution: result.rows,
|
|
693
|
+
};
|
|
694
|
+
},
|
|
695
|
+
};
|
|
696
|
+
}
|
|
697
|
+
// =============================================================================
|
|
698
|
+
// New Statistical Tools
|
|
699
|
+
// =============================================================================
|
|
700
|
+
/**
|
|
701
|
+
* Approximate normal CDF for p-value calculation
|
|
702
|
+
*/
|
|
703
|
+
function normalCDF(x) {
|
|
704
|
+
const a1 = 0.254829592;
|
|
705
|
+
const a2 = -0.284496736;
|
|
706
|
+
const a3 = 1.421413741;
|
|
707
|
+
const a4 = -1.453152027;
|
|
708
|
+
const a5 = 1.061405429;
|
|
709
|
+
const p = 0.3275911;
|
|
710
|
+
const sign = x < 0 ? -1 : 1;
|
|
711
|
+
x = Math.abs(x) / Math.sqrt(2);
|
|
712
|
+
const t = 1.0 / (1.0 + p * x);
|
|
713
|
+
const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-x * x);
|
|
714
|
+
return 0.5 * (1.0 + sign * y);
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Approximate t-distribution p-value (two-tailed)
|
|
718
|
+
*/
|
|
719
|
+
function tDistPValue(t, df) {
|
|
720
|
+
// Use normal approximation for large df
|
|
721
|
+
if (df > 30) {
|
|
722
|
+
return 2 * (1 - normalCDF(Math.abs(t)));
|
|
723
|
+
}
|
|
724
|
+
// Simplified approximation for smaller df
|
|
725
|
+
const x = df / (df + t * t);
|
|
726
|
+
// Beta incomplete function approximation
|
|
727
|
+
const p = Math.pow(x, df / 2) * 0.5;
|
|
728
|
+
return Math.min(1, Math.max(0, 2 * p));
|
|
729
|
+
}
|
|
730
|
+
/**
|
|
731
|
+
* Outlier detection using IQR or Z-score
|
|
732
|
+
*/
|
|
733
|
+
function createOutlierTool(adapter) {
|
|
734
|
+
return {
|
|
735
|
+
name: "sqlite_stats_outliers",
|
|
736
|
+
description: "Detect outliers using IQR (Interquartile Range) or Z-score method.",
|
|
737
|
+
group: "stats",
|
|
738
|
+
inputSchema: OutlierSchema,
|
|
739
|
+
outputSchema: z.object({
|
|
740
|
+
success: z.boolean(),
|
|
741
|
+
method: z.string(),
|
|
742
|
+
stats: z.object({
|
|
743
|
+
mean: z.number().optional(),
|
|
744
|
+
stdDev: z.number().optional(),
|
|
745
|
+
q1: z.number().optional(),
|
|
746
|
+
q3: z.number().optional(),
|
|
747
|
+
iqr: z.number().optional(),
|
|
748
|
+
lowerBound: z.number(),
|
|
749
|
+
upperBound: z.number(),
|
|
750
|
+
}),
|
|
751
|
+
outlierCount: z.number(),
|
|
752
|
+
totalRows: z.number(),
|
|
753
|
+
outliers: z.array(z.object({
|
|
754
|
+
value: z.number(),
|
|
755
|
+
rowid: z.number().optional(),
|
|
756
|
+
})),
|
|
757
|
+
}),
|
|
758
|
+
requiredScopes: ["read"],
|
|
759
|
+
annotations: readOnly("Outlier Detection"),
|
|
760
|
+
handler: async (params, _context) => {
|
|
761
|
+
const input = OutlierSchema.parse(params);
|
|
762
|
+
// Validate identifiers
|
|
763
|
+
sanitizeIdentifier(input.table);
|
|
764
|
+
sanitizeIdentifier(input.column);
|
|
765
|
+
// Security: Validate WHERE clause if provided
|
|
766
|
+
if (input.whereClause) {
|
|
767
|
+
validateWhereClause(input.whereClause);
|
|
768
|
+
}
|
|
769
|
+
const whereClause = input.whereClause ? ` AND ${input.whereClause}` : "";
|
|
770
|
+
if (input.method === "zscore") {
|
|
771
|
+
const threshold = input.threshold ?? 3;
|
|
772
|
+
// Get mean and stddev
|
|
773
|
+
const statsResult = await adapter.executeReadQuery(`SELECT AVG("${input.column}") as mean,
|
|
774
|
+
(SUM(("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause})) *
|
|
775
|
+
("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}))) /
|
|
776
|
+
(COUNT(*) - 1)) as variance,
|
|
777
|
+
COUNT(*) as total
|
|
778
|
+
FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}`);
|
|
779
|
+
const mean = Number(statsResult.rows?.[0]?.["mean"] ?? 0);
|
|
780
|
+
const variance = Number(statsResult.rows?.[0]?.["variance"] ?? 0);
|
|
781
|
+
const stdDev = Math.sqrt(variance);
|
|
782
|
+
const total = Number(statsResult.rows?.[0]?.["total"] ?? 0);
|
|
783
|
+
const lowerBound = mean - threshold * stdDev;
|
|
784
|
+
const upperBound = mean + threshold * stdDev;
|
|
785
|
+
// Find outliers
|
|
786
|
+
const outlierResult = await adapter.executeReadQuery(`SELECT rowid, "${input.column}" as value FROM "${input.table}"
|
|
787
|
+
WHERE "${input.column}" IS NOT NULL${whereClause}
|
|
788
|
+
AND ("${input.column}" < ${lowerBound} OR "${input.column}" > ${upperBound})
|
|
789
|
+
LIMIT ${input.limit}`);
|
|
790
|
+
const outliers = (outlierResult.rows ?? []).map((row) => {
|
|
791
|
+
const rowid = row["rowid"];
|
|
792
|
+
return {
|
|
793
|
+
value: Number(row["value"]),
|
|
794
|
+
...(typeof rowid === "number" ? { rowid } : {}),
|
|
795
|
+
};
|
|
796
|
+
});
|
|
797
|
+
return {
|
|
798
|
+
success: true,
|
|
799
|
+
method: "zscore",
|
|
800
|
+
stats: { mean, stdDev, lowerBound, upperBound },
|
|
801
|
+
outlierCount: outliers.length,
|
|
802
|
+
totalRows: total,
|
|
803
|
+
outliers,
|
|
804
|
+
};
|
|
805
|
+
}
|
|
806
|
+
else {
|
|
807
|
+
// IQR method
|
|
808
|
+
const multiplier = input.threshold ?? 1.5;
|
|
809
|
+
// Get sorted values for percentile calculation
|
|
810
|
+
const allResult = await adapter.executeReadQuery(`SELECT "${input.column}" as value FROM "${input.table}"
|
|
811
|
+
WHERE "${input.column}" IS NOT NULL${whereClause}
|
|
812
|
+
ORDER BY "${input.column}"`);
|
|
813
|
+
const values = (allResult.rows ?? []).map((r) => Number(r["value"]));
|
|
814
|
+
const n = values.length;
|
|
815
|
+
if (n === 0) {
|
|
816
|
+
return {
|
|
817
|
+
success: true,
|
|
818
|
+
method: "iqr",
|
|
819
|
+
stats: { q1: 0, q3: 0, iqr: 0, lowerBound: 0, upperBound: 0 },
|
|
820
|
+
outlierCount: 0,
|
|
821
|
+
totalRows: 0,
|
|
822
|
+
outliers: [],
|
|
823
|
+
};
|
|
824
|
+
}
|
|
825
|
+
const q1Idx = Math.floor(n * 0.25);
|
|
826
|
+
const q3Idx = Math.floor(n * 0.75);
|
|
827
|
+
const q1 = values[q1Idx] ?? 0;
|
|
828
|
+
const q3 = values[q3Idx] ?? 0;
|
|
829
|
+
const iqr = q3 - q1;
|
|
830
|
+
const lowerBound = q1 - multiplier * iqr;
|
|
831
|
+
const upperBound = q3 + multiplier * iqr;
|
|
832
|
+
// Find outliers
|
|
833
|
+
const outlierResult = await adapter.executeReadQuery(`SELECT rowid, "${input.column}" as value FROM "${input.table}"
|
|
834
|
+
WHERE "${input.column}" IS NOT NULL${whereClause}
|
|
835
|
+
AND ("${input.column}" < ${lowerBound} OR "${input.column}" > ${upperBound})
|
|
836
|
+
LIMIT ${input.limit}`);
|
|
837
|
+
const outliers = (outlierResult.rows ?? []).map((row) => {
|
|
838
|
+
const rowid = row["rowid"];
|
|
839
|
+
return {
|
|
840
|
+
value: Number(row["value"]),
|
|
841
|
+
...(typeof rowid === "number" ? { rowid } : {}),
|
|
842
|
+
};
|
|
843
|
+
});
|
|
844
|
+
return {
|
|
845
|
+
success: true,
|
|
846
|
+
method: "iqr",
|
|
847
|
+
stats: { q1, q3, iqr, lowerBound, upperBound },
|
|
848
|
+
outlierCount: outliers.length,
|
|
849
|
+
totalRows: n,
|
|
850
|
+
outliers,
|
|
851
|
+
};
|
|
852
|
+
}
|
|
853
|
+
},
|
|
854
|
+
};
|
|
855
|
+
}
|
|
856
|
+
/**
|
|
857
|
+
* Linear/polynomial regression analysis
|
|
858
|
+
*/
|
|
859
|
+
// Matrix utility functions for polynomial regression
|
|
860
|
+
function matrixTranspose(A) {
|
|
861
|
+
const rows = A.length;
|
|
862
|
+
const cols = A[0]?.length ?? 0;
|
|
863
|
+
const result = [];
|
|
864
|
+
for (let j = 0; j < cols; j++) {
|
|
865
|
+
const row = [];
|
|
866
|
+
for (let i = 0; i < rows; i++) {
|
|
867
|
+
row.push(A[i]?.[j] ?? 0);
|
|
868
|
+
}
|
|
869
|
+
result.push(row);
|
|
870
|
+
}
|
|
871
|
+
return result;
|
|
872
|
+
}
|
|
873
|
+
function matrixMultiply(A, B) {
|
|
874
|
+
const rowsA = A.length;
|
|
875
|
+
const colsA = A[0]?.length ?? 0;
|
|
876
|
+
const colsB = B[0]?.length ?? 0;
|
|
877
|
+
const result = [];
|
|
878
|
+
for (let i = 0; i < rowsA; i++) {
|
|
879
|
+
const row = [];
|
|
880
|
+
for (let j = 0; j < colsB; j++) {
|
|
881
|
+
let sum = 0;
|
|
882
|
+
for (let k = 0; k < colsA; k++) {
|
|
883
|
+
sum += (A[i]?.[k] ?? 0) * (B[k]?.[j] ?? 0);
|
|
884
|
+
}
|
|
885
|
+
row.push(sum);
|
|
886
|
+
}
|
|
887
|
+
result.push(row);
|
|
888
|
+
}
|
|
889
|
+
return result;
|
|
890
|
+
}
|
|
891
|
+
function matrixInverse(A) {
|
|
892
|
+
const n = A.length;
|
|
893
|
+
// Create augmented matrix [A|I]
|
|
894
|
+
const aug = A.map((row, i) => [
|
|
895
|
+
...row,
|
|
896
|
+
...Array.from({ length: n }, (_, j) => (i === j ? 1 : 0)),
|
|
897
|
+
]);
|
|
898
|
+
// Gauss-Jordan elimination
|
|
899
|
+
for (let col = 0; col < n; col++) {
|
|
900
|
+
// Find pivot
|
|
901
|
+
let maxRow = col;
|
|
902
|
+
for (let row = col + 1; row < n; row++) {
|
|
903
|
+
const currentVal = Math.abs(aug[row]?.[col] ?? 0);
|
|
904
|
+
const maxVal = Math.abs(aug[maxRow]?.[col] ?? 0);
|
|
905
|
+
if (currentVal > maxVal) {
|
|
906
|
+
maxRow = row;
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
// Swap rows
|
|
910
|
+
const temp = aug[col];
|
|
911
|
+
const swapRow = aug[maxRow];
|
|
912
|
+
if (temp && swapRow) {
|
|
913
|
+
aug[col] = swapRow;
|
|
914
|
+
aug[maxRow] = temp;
|
|
915
|
+
}
|
|
916
|
+
const pivotRow = aug[col];
|
|
917
|
+
if (!pivotRow)
|
|
918
|
+
continue;
|
|
919
|
+
const pivot = pivotRow[col] ?? 0;
|
|
920
|
+
if (Math.abs(pivot) < 1e-10) {
|
|
921
|
+
throw new Error("Matrix is singular, cannot compute inverse");
|
|
922
|
+
}
|
|
923
|
+
// Scale pivot row
|
|
924
|
+
for (let j = 0; j < 2 * n; j++) {
|
|
925
|
+
pivotRow[j] = (pivotRow[j] ?? 0) / pivot;
|
|
926
|
+
}
|
|
927
|
+
// Eliminate column
|
|
928
|
+
for (let row = 0; row < n; row++) {
|
|
929
|
+
if (row !== col) {
|
|
930
|
+
const currentRow = aug[row];
|
|
931
|
+
if (!currentRow)
|
|
932
|
+
continue;
|
|
933
|
+
const factor = currentRow[col] ?? 0;
|
|
934
|
+
for (let j = 0; j < 2 * n; j++) {
|
|
935
|
+
currentRow[j] = (currentRow[j] ?? 0) - factor * (pivotRow[j] ?? 0);
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
// Extract inverse from augmented matrix
|
|
941
|
+
return aug.map((row) => row.slice(n));
|
|
942
|
+
}
|
|
943
|
+
function createRegressionTool(adapter) {
|
|
944
|
+
return {
|
|
945
|
+
name: "sqlite_stats_regression",
|
|
946
|
+
description: "Perform linear or polynomial regression analysis between two columns.",
|
|
947
|
+
group: "stats",
|
|
948
|
+
inputSchema: RegressionSchema,
|
|
949
|
+
outputSchema: z.object({
|
|
950
|
+
success: z.boolean(),
|
|
951
|
+
type: z.string(),
|
|
952
|
+
sampleSize: z.number(),
|
|
953
|
+
coefficients: z.object({
|
|
954
|
+
intercept: z.number(),
|
|
955
|
+
linear: z.number().optional(),
|
|
956
|
+
quadratic: z.number().optional(),
|
|
957
|
+
cubic: z.number().optional(),
|
|
958
|
+
}),
|
|
959
|
+
rSquared: z.number(),
|
|
960
|
+
equation: z.string(),
|
|
961
|
+
}),
|
|
962
|
+
requiredScopes: ["read"],
|
|
963
|
+
annotations: readOnly("Regression Analysis"),
|
|
964
|
+
handler: async (params, _context) => {
|
|
965
|
+
const input = RegressionSchema.parse(params);
|
|
966
|
+
// Validate identifiers
|
|
967
|
+
sanitizeIdentifier(input.table);
|
|
968
|
+
sanitizeIdentifier(input.xColumn);
|
|
969
|
+
sanitizeIdentifier(input.yColumn);
|
|
970
|
+
const andClause = input.whereClause ? ` AND ${input.whereClause}` : "";
|
|
971
|
+
const degree = input.degree ?? 1;
|
|
972
|
+
// Fetch data points
|
|
973
|
+
const sql = `
|
|
974
|
+
SELECT "${input.xColumn}" as x, "${input.yColumn}" as y
|
|
975
|
+
FROM "${input.table}"
|
|
976
|
+
WHERE "${input.xColumn}" IS NOT NULL AND "${input.yColumn}" IS NOT NULL${andClause}
|
|
977
|
+
`;
|
|
978
|
+
const result = await adapter.executeReadQuery(sql);
|
|
979
|
+
const pairs = (result.rows ?? []).map((r) => ({
|
|
980
|
+
x: Number(r["x"]),
|
|
981
|
+
y: Number(r["y"]),
|
|
982
|
+
}));
|
|
983
|
+
if (pairs.length < degree + 1) {
|
|
984
|
+
throw new Error(`Insufficient data for degree ${degree} regression (need at least ${degree + 1} points, got ${pairs.length})`);
|
|
985
|
+
}
|
|
986
|
+
// Build design matrix X = [[1, x, x², ...], ...]
|
|
987
|
+
const X = pairs.map((p) => Array.from({ length: degree + 1 }, (_, i) => Math.pow(p.x, i)));
|
|
988
|
+
const y = pairs.map((p) => [p.y]);
|
|
989
|
+
// Solve β = (XᵀX)⁻¹Xᵀy using normal equation
|
|
990
|
+
const Xt = matrixTranspose(X);
|
|
991
|
+
const XtX = matrixMultiply(Xt, X);
|
|
992
|
+
const XtXInv = matrixInverse(XtX);
|
|
993
|
+
const XtY = matrixMultiply(Xt, y);
|
|
994
|
+
const beta = matrixMultiply(XtXInv, XtY).map((r) => r[0] ?? 0);
|
|
995
|
+
// Calculate R² (coefficient of determination)
|
|
996
|
+
const meanY = pairs.reduce((s, p) => s + p.y, 0) / pairs.length;
|
|
997
|
+
let ssRes = 0; // Sum of squared residuals
|
|
998
|
+
let ssTot = 0; // Total sum of squares
|
|
999
|
+
for (const p of pairs) {
|
|
1000
|
+
// Predicted value: β₀ + β₁x + β₂x² + ...
|
|
1001
|
+
let predicted = 0;
|
|
1002
|
+
for (let i = 0; i <= degree; i++) {
|
|
1003
|
+
predicted += (beta[i] ?? 0) * Math.pow(p.x, i);
|
|
1004
|
+
}
|
|
1005
|
+
ssRes += Math.pow(p.y - predicted, 2);
|
|
1006
|
+
ssTot += Math.pow(p.y - meanY, 2);
|
|
1007
|
+
}
|
|
1008
|
+
const rSquared = ssTot === 0 ? 1 : 1 - ssRes / ssTot;
|
|
1009
|
+
// Build coefficients object
|
|
1010
|
+
const coefficients = {
|
|
1011
|
+
intercept: beta[0] ?? 0,
|
|
1012
|
+
};
|
|
1013
|
+
if (degree >= 1)
|
|
1014
|
+
coefficients.linear = beta[1] ?? 0;
|
|
1015
|
+
if (degree >= 2)
|
|
1016
|
+
coefficients.quadratic = beta[2] ?? 0;
|
|
1017
|
+
if (degree >= 3)
|
|
1018
|
+
coefficients.cubic = beta[3] ?? 0;
|
|
1019
|
+
// Build equation string
|
|
1020
|
+
const terms = [];
|
|
1021
|
+
if (degree >= 3 && beta[3] !== undefined) {
|
|
1022
|
+
terms.push(`${beta[3].toFixed(4)}x³`);
|
|
1023
|
+
}
|
|
1024
|
+
if (degree >= 2 && beta[2] !== undefined) {
|
|
1025
|
+
const sign = terms.length > 0 && beta[2] >= 0 ? " + " : "";
|
|
1026
|
+
terms.push(`${sign}${beta[2].toFixed(4)}x²`);
|
|
1027
|
+
}
|
|
1028
|
+
if (degree >= 1 && beta[1] !== undefined) {
|
|
1029
|
+
const sign = terms.length > 0 && beta[1] >= 0 ? " + " : "";
|
|
1030
|
+
terms.push(`${sign}${beta[1].toFixed(4)}x`);
|
|
1031
|
+
}
|
|
1032
|
+
const interceptSign = terms.length > 0 && (beta[0] ?? 0) >= 0 ? " + " : "";
|
|
1033
|
+
terms.push(`${interceptSign}${(beta[0] ?? 0).toFixed(4)}`);
|
|
1034
|
+
const equation = `y = ${terms.join("").replace(/^\s*\+\s*/, "")}`;
|
|
1035
|
+
return {
|
|
1036
|
+
success: true,
|
|
1037
|
+
type: degree === 1 ? "linear" : `polynomial_${degree}`,
|
|
1038
|
+
sampleSize: pairs.length,
|
|
1039
|
+
coefficients,
|
|
1040
|
+
rSquared: Math.round(rSquared * 10000) / 10000,
|
|
1041
|
+
equation,
|
|
1042
|
+
};
|
|
1043
|
+
},
|
|
1044
|
+
};
|
|
1045
|
+
}
|
|
1046
|
+
/**
|
|
1047
|
+
* Hypothesis testing (t-test, chi-square)
|
|
1048
|
+
*/
|
|
1049
|
+
function createHypothesisTool(adapter) {
|
|
1050
|
+
return {
|
|
1051
|
+
name: "sqlite_stats_hypothesis",
|
|
1052
|
+
description: "Perform statistical hypothesis tests: one-sample t-test, two-sample t-test, or chi-square test.",
|
|
1053
|
+
group: "stats",
|
|
1054
|
+
inputSchema: HypothesisSchema,
|
|
1055
|
+
outputSchema: z.object({
|
|
1056
|
+
success: z.boolean(),
|
|
1057
|
+
testType: z.string(),
|
|
1058
|
+
statistic: z.number(),
|
|
1059
|
+
pValue: z.number(),
|
|
1060
|
+
degreesOfFreedom: z.number(),
|
|
1061
|
+
significant: z.boolean(),
|
|
1062
|
+
details: z.record(z.string(), z.unknown()),
|
|
1063
|
+
}),
|
|
1064
|
+
requiredScopes: ["read"],
|
|
1065
|
+
annotations: readOnly("Hypothesis Testing"),
|
|
1066
|
+
handler: async (params, _context) => {
|
|
1067
|
+
const input = HypothesisSchema.parse(params);
|
|
1068
|
+
// Validate identifiers
|
|
1069
|
+
sanitizeIdentifier(input.table);
|
|
1070
|
+
sanitizeIdentifier(input.column);
|
|
1071
|
+
const whereClause = input.whereClause ? ` AND ${input.whereClause}` : "";
|
|
1072
|
+
if (input.testType === "ttest_one") {
|
|
1073
|
+
const expectedMean = input.expectedMean ?? 0;
|
|
1074
|
+
const statsResult = await adapter.executeReadQuery(`SELECT COUNT(*) as n, AVG("${input.column}") as mean,
|
|
1075
|
+
SUM(("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause})) *
|
|
1076
|
+
("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}))) /
|
|
1077
|
+
(COUNT(*) - 1) as variance
|
|
1078
|
+
FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}`);
|
|
1079
|
+
const n = Number(statsResult.rows?.[0]?.["n"] ?? 0);
|
|
1080
|
+
const mean = Number(statsResult.rows?.[0]?.["mean"] ?? 0);
|
|
1081
|
+
const variance = Number(statsResult.rows?.[0]?.["variance"] ?? 0);
|
|
1082
|
+
const stdDev = Math.sqrt(variance);
|
|
1083
|
+
const df = n - 1;
|
|
1084
|
+
if (n < 2) {
|
|
1085
|
+
throw new Error("Insufficient sample size for t-test");
|
|
1086
|
+
}
|
|
1087
|
+
const tStatistic = (mean - expectedMean) / (stdDev / Math.sqrt(n));
|
|
1088
|
+
// Validate result - Infinity or NaN indicates data issues (zero variance, non-numeric column, etc.)
|
|
1089
|
+
if (!Number.isFinite(tStatistic)) {
|
|
1090
|
+
throw new Error(`Cannot compute t-statistic: stdDev=${stdDev.toFixed(4)}, n=${n}. ` +
|
|
1091
|
+
`This may indicate zero variance, non-numeric data, or that column "${input.column}" does not exist.`);
|
|
1092
|
+
}
|
|
1093
|
+
const pValue = tDistPValue(tStatistic, df);
|
|
1094
|
+
return {
|
|
1095
|
+
success: true,
|
|
1096
|
+
testType: "ttest_one",
|
|
1097
|
+
statistic: tStatistic,
|
|
1098
|
+
pValue,
|
|
1099
|
+
degreesOfFreedom: df,
|
|
1100
|
+
significant: pValue < 0.05,
|
|
1101
|
+
details: {
|
|
1102
|
+
sampleMean: mean,
|
|
1103
|
+
sampleStdDev: stdDev,
|
|
1104
|
+
sampleSize: n,
|
|
1105
|
+
expectedMean,
|
|
1106
|
+
},
|
|
1107
|
+
};
|
|
1108
|
+
}
|
|
1109
|
+
else if (input.testType === "ttest_two") {
|
|
1110
|
+
if (!input.column2) {
|
|
1111
|
+
throw new Error("column2 is required for two-sample t-test");
|
|
1112
|
+
}
|
|
1113
|
+
sanitizeIdentifier(input.column2);
|
|
1114
|
+
// Get stats for both columns
|
|
1115
|
+
const statsResult = await adapter.executeReadQuery(`SELECT
|
|
1116
|
+
COUNT("${input.column}") as n1, AVG("${input.column}") as mean1,
|
|
1117
|
+
SUM(("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause})) *
|
|
1118
|
+
("${input.column}" - (SELECT AVG("${input.column}") FROM "${input.table}" WHERE "${input.column}" IS NOT NULL${whereClause}))) /
|
|
1119
|
+
(COUNT("${input.column}") - 1) as var1,
|
|
1120
|
+
COUNT("${input.column2}") as n2, AVG("${input.column2}") as mean2,
|
|
1121
|
+
SUM(("${input.column2}" - (SELECT AVG("${input.column2}") FROM "${input.table}" WHERE "${input.column2}" IS NOT NULL${whereClause})) *
|
|
1122
|
+
("${input.column2}" - (SELECT AVG("${input.column2}") FROM "${input.table}" WHERE "${input.column2}" IS NOT NULL${whereClause}))) /
|
|
1123
|
+
(COUNT("${input.column2}") - 1) as var2
|
|
1124
|
+
FROM "${input.table}" WHERE 1=1${whereClause}`);
|
|
1125
|
+
const n1 = Number(statsResult.rows?.[0]?.["n1"] ?? 0);
|
|
1126
|
+
const n2 = Number(statsResult.rows?.[0]?.["n2"] ?? 0);
|
|
1127
|
+
const mean1 = Number(statsResult.rows?.[0]?.["mean1"] ?? 0);
|
|
1128
|
+
const mean2 = Number(statsResult.rows?.[0]?.["mean2"] ?? 0);
|
|
1129
|
+
const var1 = Number(statsResult.rows?.[0]?.["var1"] ?? 0);
|
|
1130
|
+
const var2 = Number(statsResult.rows?.[0]?.["var2"] ?? 0);
|
|
1131
|
+
if (n1 < 2 || n2 < 2) {
|
|
1132
|
+
throw new Error("Insufficient sample size for t-test");
|
|
1133
|
+
}
|
|
1134
|
+
// Welch's t-test
|
|
1135
|
+
const tStatistic = (mean1 - mean2) / Math.sqrt(var1 / n1 + var2 / n2);
|
|
1136
|
+
// Validate result - Infinity or NaN indicates data issues
|
|
1137
|
+
if (!Number.isFinite(tStatistic)) {
|
|
1138
|
+
throw new Error(`Cannot compute t-statistic: var1=${var1.toFixed(4)}, var2=${var2.toFixed(4)}. ` +
|
|
1139
|
+
`This may indicate zero variance or non-numeric data.`);
|
|
1140
|
+
}
|
|
1141
|
+
const dfNum = Math.pow(var1 / n1 + var2 / n2, 2);
|
|
1142
|
+
const dfDen = Math.pow(var1 / n1, 2) / (n1 - 1) + Math.pow(var2 / n2, 2) / (n2 - 1);
|
|
1143
|
+
const df = Number.isFinite(dfNum / dfDen) ? dfNum / dfDen : n1 + n2 - 2;
|
|
1144
|
+
const pValue = tDistPValue(tStatistic, df);
|
|
1145
|
+
return {
|
|
1146
|
+
success: true,
|
|
1147
|
+
testType: "ttest_two",
|
|
1148
|
+
statistic: tStatistic,
|
|
1149
|
+
pValue,
|
|
1150
|
+
degreesOfFreedom: df,
|
|
1151
|
+
significant: pValue < 0.05,
|
|
1152
|
+
details: {
|
|
1153
|
+
group1: { mean: mean1, variance: var1, n: n1 },
|
|
1154
|
+
group2: { mean: mean2, variance: var2, n: n2 },
|
|
1155
|
+
},
|
|
1156
|
+
};
|
|
1157
|
+
}
|
|
1158
|
+
else {
|
|
1159
|
+
// Chi-square test
|
|
1160
|
+
if (!input.groupColumn) {
|
|
1161
|
+
throw new Error("groupColumn is required for chi-square test");
|
|
1162
|
+
}
|
|
1163
|
+
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(input.groupColumn)) {
|
|
1164
|
+
throw new Error("Invalid groupColumn name");
|
|
1165
|
+
}
|
|
1166
|
+
// Get contingency table
|
|
1167
|
+
const freqResult = await adapter.executeReadQuery(`SELECT "${input.column}" as col1, "${input.groupColumn}" as col2, COUNT(*) as observed
|
|
1168
|
+
FROM "${input.table}"
|
|
1169
|
+
WHERE "${input.column}" IS NOT NULL AND "${input.groupColumn}" IS NOT NULL${whereClause}
|
|
1170
|
+
GROUP BY "${input.column}", "${input.groupColumn}"`);
|
|
1171
|
+
// Calculate totals
|
|
1172
|
+
const rowTotals = new Map();
|
|
1173
|
+
const colTotals = new Map();
|
|
1174
|
+
let grandTotal = 0;
|
|
1175
|
+
for (const row of freqResult.rows ?? []) {
|
|
1176
|
+
const col1 = String(row["col1"]);
|
|
1177
|
+
const col2 = String(row["col2"]);
|
|
1178
|
+
const observed = Number(row["observed"]);
|
|
1179
|
+
rowTotals.set(col1, (rowTotals.get(col1) ?? 0) + observed);
|
|
1180
|
+
colTotals.set(col2, (colTotals.get(col2) ?? 0) + observed);
|
|
1181
|
+
grandTotal += observed;
|
|
1182
|
+
}
|
|
1183
|
+
// Calculate chi-square statistic
|
|
1184
|
+
let chiSquare = 0;
|
|
1185
|
+
for (const row of freqResult.rows ?? []) {
|
|
1186
|
+
const col1 = String(row["col1"]);
|
|
1187
|
+
const col2 = String(row["col2"]);
|
|
1188
|
+
const observed = Number(row["observed"]);
|
|
1189
|
+
const expected = ((rowTotals.get(col1) ?? 0) * (colTotals.get(col2) ?? 0)) /
|
|
1190
|
+
grandTotal;
|
|
1191
|
+
if (expected > 0) {
|
|
1192
|
+
chiSquare += Math.pow(observed - expected, 2) / expected;
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
const df = (rowTotals.size - 1) * (colTotals.size - 1);
|
|
1196
|
+
// Validate sufficient categories for chi-square test
|
|
1197
|
+
if (df === 0) {
|
|
1198
|
+
throw new Error(`Insufficient categories for chi-square test: "${input.column}" has ${rowTotals.size} category(s), "${input.groupColumn}" has ${colTotals.size} category(s). Both columns must have at least 2 distinct values.`);
|
|
1199
|
+
}
|
|
1200
|
+
// Approximate p-value using chi-square distribution
|
|
1201
|
+
const pValue = Math.exp(-chiSquare / 2);
|
|
1202
|
+
return {
|
|
1203
|
+
success: true,
|
|
1204
|
+
testType: "chi_square",
|
|
1205
|
+
statistic: chiSquare,
|
|
1206
|
+
pValue,
|
|
1207
|
+
degreesOfFreedom: df,
|
|
1208
|
+
significant: pValue < 0.05,
|
|
1209
|
+
details: {
|
|
1210
|
+
grandTotal,
|
|
1211
|
+
rowCategories: rowTotals.size,
|
|
1212
|
+
colCategories: colTotals.size,
|
|
1213
|
+
},
|
|
1214
|
+
};
|
|
1215
|
+
}
|
|
1216
|
+
},
|
|
1217
|
+
};
|
|
1218
|
+
}
|
|
1219
|
+
//# sourceMappingURL=stats.js.map
|