mcp-data-pipeline-connector 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,44 @@
1
+ # ─────────────────────────────────────────────────────────────────────────────
2
+ # mcp-data-pipeline-connector — Environment Variables
3
+ #
4
+ # Copy this file to .env and fill in the values for your environment.
5
+ # Never commit your .env file to version control.
6
+ # ─────────────────────────────────────────────────────────────────────────────
7
+
8
+ # ── HTTP Transport Auth (src/auth.ts) ─────────────────────────────────────────
9
+ # Set ONE of the following to protect the /mcp HTTP endpoint.
10
+ # If neither is set, all requests pass through (open access — only safe on localhost).
11
+
12
+ # API key guard: clients must send this value in the X-API-Key request header.
13
+ MCP_API_KEY=your-api-key-here
14
+
15
+ # JWT secret guard: clients must send a valid HMAC-SHA256 signed Bearer token.
16
+ # If both MCP_API_KEY and MCP_JWT_SECRET are set, BOTH checks are enforced.
17
+ MCP_JWT_SECRET=your-jwt-secret-here
18
+
19
+ # ── PostgreSQL Connector (src/config-loader.ts / data-sources.yaml) ──────────
20
+ # Connection strings for PostgreSQL sources are referenced from your
21
+ # ~/.mcp/data-sources.yaml config file using ${ENV_VAR} substitution.
22
+ # Define your Postgres DSNs here and reference them in the YAML, e.g.:
23
+ # connection_string: "${POSTGRES_CONNECTION_STRING}"
24
+ POSTGRES_CONNECTION_STRING=postgresql://user:password@localhost:5432/mydb
25
+
26
+ # ── REST API Connector (src/connectors/rest-connector.ts) ────────────────────
27
+ # REST sources can optionally send an Authorization header for authenticated APIs.
28
+ # In your data-sources.yaml set `auth_header: REST_API_AUTH_HEADER` (the env var name).
29
+ # The value of that env var is sent verbatim as the Authorization header, e.g.:
30
+ # REST_API_AUTH_HEADER=Bearer my-token
31
+ # REST_API_AUTH_HEADER=ApiKey my-key
32
+ REST_API_AUTH_HEADER=Bearer your-rest-api-token-here
33
+
34
+ # ── Airtable Connector (src/connectors/airtable-connector.ts) ────────────────
35
+ # Required when connecting any source of type "airtable".
36
+ # Generate a Personal Access Token at https://airtable.com/create/tokens
37
+ AIRTABLE_API_KEY=your-airtable-api-key-here
38
+
39
+ # ── Google Sheets Connector (src/connectors/sheets-connector.ts) ─────────────
40
+ # Required when connecting any source of type "sheets".
41
+ # Provide the full JSON content of a Google service account key file, as a
42
+ # single-line JSON string (replace newlines in the private_key with \n).
43
+ # Create a service account at https://console.cloud.google.com/iam-admin/serviceaccounts
44
+ GOOGLE_SERVICE_ACCOUNT_JSON={"type":"service_account","project_id":"your-project","private_key_id":"key-id","private_key":"-----BEGIN RSA PRIVATE KEY-----\n...\n-----END RSA PRIVATE KEY-----\n","client_email":"your-sa@your-project.iam.gserviceaccount.com","client_id":"123456789","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token"}
package/CHANGELOG.md ADDED
@@ -0,0 +1,82 @@
1
+ # Changelog
2
+
3
+ All notable changes to MCP Data Pipeline Connector will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [1.0.1] - 2026-03-23
11
+
12
+ ### Fixed
13
+
14
+ - **BigInt serialization crash in `query` tool**: DuckDB returns integer columns as JavaScript `BigInt` values, which `JSON.stringify` cannot serialize, causing a `"Do not know how to serialize a BigInt"` error. Fixed by converting `BigInt` values to `Number` in the row-mapping pass inside `CsvConnector.query()` (`src/connectors/csv-connector.ts`) and in the cross-source query path in `createServer()` (`src/server.ts`). Updated the corresponding test to assert integer columns are returned as plain numbers.
15
+
16
+ ## [1.0.0] - 2026-03-12
17
+
18
+ ### Added
19
+
20
+ - `.env.example` documenting `MCP_API_KEY`, `MCP_JWT_SECRET`, `POSTGRES_CONNECTION_STRING`, `REST_API_AUTH_HEADER`, `AIRTABLE_API_KEY`, and `GOOGLE_SERVICE_ACCOUNT_JSON`.
21
+
22
+ ### Changed
23
+
24
+ - `express` upgraded from `^4.x` to `^5.2.1`.
25
+ - `@types/node` upgraded from `^20.x` to `^24.12.0` (Node 24 LTS).
26
+ - `yargs` upgraded from `^17.x` to `^18.0.0`.
27
+ - Added `author`, `license`, `repository`, and `homepage` fields to `package.json`.
28
+
29
+ ### Security
30
+
31
+ - Resolved **GHSA-67mh-4wv8-2f99** (`esbuild` ≤ 0.24.2 dev-server cross-origin exposure) by upgrading `vitest` and `@vitest/coverage-v8` to `^4.1.0`. Affects local development only; not a production runtime concern.
32
+
33
+ ## [0.2.0] - 2026-03-12
34
+
35
+ ### Added
36
+
37
+ - **REST API connector** (`src/connectors/rest-connector.ts`): Connect GET endpoints with optional auth header (from env var), configurable pagination parameter, and in-memory TTL caching.
38
+ - **`--rest-cache-ttl` flag**: Controls REST response cache TTL in seconds (default 60). Pass `0` to disable caching entirely.
39
+ - **Cross-source joins via DuckDB**: `query` tool now accepts `source='_all'` or a `sources` array to query across multiple CSV sources in a single SQL statement using a shared in-memory DuckDB instance.
40
+ - **`transform` tool**: Apply filter, select, rename, and aggregate operations to a source table. Returns results as JSON or CSV.
41
+ - **`check_health` tool**: Check whether registered data sources are reachable. Runs file existence check (CSV), `SELECT 1` (Postgres), or HEAD request (REST). Returns per-source health status with timestamp.
42
+ - **Schema normalization** (`normalizeType` in `src/connectors/base.ts`): Maps source-specific types to a standard set — `string`, `integer`, `number`, `boolean`, `datetime`, `json`, `unknown`. All `getSchema()` responses now include a `normalized_type` field.
43
+ - **`healthCheck()` method** on all connectors (`DataConnector` interface extended).
44
+ - **`SourceRegistry.checkHealth()`**: Run health checks across all or a specific named source.
45
+ - **`SourceRegistry.getCrossSourceDb()`**: Returns a shared DuckDB in-memory connection with all CSV sources attached as views for cross-source query execution.
46
+ - **Result pagination**: `query` tool accepts `limit` and `offset` parameters. Response includes `total_returned`, `offset`, and `has_more` fields.
47
+ - **MCP Resources primitive**: `data://{source_name}/{table_name}` URIs expose source schemas as browsable MCP resources. Agents can list and read schemas without calling `get_schema` explicitly.
48
+ - **MCP Prompts primitive**: `explore-data` prompt template guides agents through schema discovery (list_sources → list_tables → get_schema) before writing any queries.
49
+ - **MCP logging notifications** (`notifications/message`): Emits `info` on source connect, `debug` with query execution time, and `warning` when health checks fail.
50
+ - **Progress notifications** (`notifications/progress`): Emitted for complex queries (JOIN, GROUP BY, ORDER BY, DISTINCT, UNION) at 0%, 30%, 60%, and 100%.
51
+ - **Streamable HTTP transport** (`src/http-server.ts`): `--http-port` flag (default 0 = disabled) exposes the MCP server via the MCP 2025 Streamable HTTP spec for shared team deployments.
52
+ - **ESLint + Prettier**: `eslint.config.js` and `.prettierrc.json` configured. `npm run lint` and `npm run format` scripts added.
53
+ - **GitHub Actions CI** (`.github/workflows/ci.yml`): Runs build, test, and lint on push/PR to `main`.
54
+ - **Expanded test coverage**: Phase 2 tests covering REST connector (mocked fetch), schema normalization, health checks, pagination, transform, and cross-source queries. Total: 61 tests across 3 suites.
55
+ - **`CsvConnector.getResolvedPath()`**: Exposes the resolved file path for cross-source DuckDB view registration.
56
+ - **`url` parameter** accepted by `connect_source` tool for REST sources.
57
+ - **Server version** bumped to `0.2.0`.
58
+
59
+ ## [0.1.0] - 2026-03-12
60
+
61
+ ### Added
62
+
63
+ - Initial public release of `mcp-data-pipeline-connector` (Phase 1 MVP).
64
+ - **CSV/JSON connector**: auto-schema detection via DuckDB `read_csv_auto` / `read_json_auto`.
65
+ - **PostgreSQL connector**: table and view enumeration via DuckDB postgres extension.
66
+ - **DuckDB-powered query engine**: embedded in-process SQL execution — no separate service required.
67
+ - **`connect_source` tool**: register data sources at runtime or from `~/.mcp/data-sources.yaml`.
68
+ - **`list_sources` tool**: show all registered sources and their connection status.
69
+ - **`list_tables` tool**: enumerate available tables per source or across all sources.
70
+ - **`get_schema` tool**: return column names and normalized types (`string`, `number`, `boolean`, `date`, `json`).
71
+ - **`query` tool**: DuckDB-powered SQL with `--max-rows` and `--read-only` flag support.
72
+ - **`~/.mcp/data-sources.yaml` config format** with `${ENV_VAR}` substitution for credentials.
73
+ - **`--read-only` flag** (default `true`): rejects non-SELECT statements at the SQL layer.
74
+ - **`--max-rows` flag** (default `1000`): prevents accidental large result sets.
75
+ - **`--config` / `--sources-config` flag**: custom path for the data sources YAML file.
76
+ - **Credential safety**: connection strings are never accepted as tool input arguments; never logged; sanitized in all output.
77
+ - stdio transport compliant with the MCP protocol.
78
+ - Strict JSON Schema validation on all tool inputs.
79
+ - Tool annotations: `query`, `list_sources`, `list_tables`, `get_schema` marked `readOnlyHint: true`; `connect_source` marked `readOnlyHint: false`.
80
+ - Proper MCP error codes: `invalid_params` for bad SQL or unknown source; `internal_error` for connection/query failures.
81
+ - TypeScript strict mode throughout.
82
+ - Vitest test suite with fixture CSV datasets covering connector behavior, read-only enforcement, max-rows truncation, and env var substitution.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MCP Data Pipeline Connector contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,197 @@
1
+ # MCP Data Pipeline Connector
2
+
3
+ npm `mcp-data-pipeline-connector` package
4
+
5
+ One MCP server for all your data sources — with cross-source SQL joins and no external query service. DuckDB runs embedded in-process, so you can join a CSV file against a Postgres table against a REST API response in a single query, entirely on your machine. Agents work with your data without needing source-specific knowledge or multiple MCP server configs.
6
+
7
+ [Tool reference](#tools) | [Configuration](#configuration) | [Contributing](#contributing) | [Troubleshooting](#troubleshooting)
8
+
9
+ ## Key features
10
+
11
+ - **Unified query interface**: SQL across all connected sources via DuckDB — including cross-source joins.
12
+ - **Multiple source types**: CSV/JSON files, PostgreSQL databases, and REST API endpoints in a single server.
13
+ - **Auto schema detection**: Infers column names and types from CSV headers and Postgres metadata.
14
+ - **REST caching**: REST API responses are cached with a configurable TTL to avoid redundant calls.
15
+ - **Schema normalization**: Maps source-specific types to a standard set (string, number, date, boolean, json).
16
+ - **In-process query engine**: DuckDB runs embedded — no separate query service to install or manage.
17
+
18
+ ## Why this over separate per-source MCP servers?
19
+
20
+ The common alternative is running one MCP server per data source — a postgres MCP server, a CSV MCP server, a REST MCP server. Each works fine in isolation, but they can't talk to each other.
21
+
22
+ | | mcp-data-pipeline-connector | Separate per-source servers |
23
+ | ------------------ | --------------------------------------------------------------------------- | ------------------------------------------------- |
24
+ | Cross-source joins | Native SQL via embedded DuckDB | Not possible — agent must fetch and join manually |
25
+ | Config complexity | One server entry in your MCP config | One entry per source type |
26
+ | Query engine | DuckDB in-process — no install, no service | Depends on each source's query capabilities |
27
+ | Schema unification | Normalizes all types to string/integer/number/datetime/boolean/json/unknown | Each source uses its own type system |
28
+ | Data residency | All queries run locally | Depends on each connector's implementation |
29
+
30
+ If you're asking questions that span multiple data sources — "join my sales CSV with the users table" — this is the right tool. If you only ever query one source type, a dedicated single-source server is simpler.
31
+
32
+ ## Disclaimers
33
+
34
+ `mcp-data-pipeline-connector` connects to data sources you configure and executes queries against them on behalf of your agent. Ensure agents only have the database permissions they need. Connection strings are never logged or transmitted; keep them out of version-controlled config files. Use environment variables for credentials.
35
+
36
+ ## Requirements
37
+
38
+ - Node.js v20.19 or newer.
39
+ - npm.
40
+ - Optional: A running PostgreSQL instance for the Postgres connector.
41
+
42
+ ## Getting started
43
+
44
+ Add the following config to your MCP client:
45
+
46
+ ```json
47
+ {
48
+ "mcpServers": {
49
+ "data-connector": {
50
+ "command": "npx",
51
+ "args": ["-y", "mcp-data-pipeline-connector@latest"]
52
+ }
53
+ }
54
+ }
55
+ ```
56
+
57
+ Define your data sources in `~/.mcp/data-sources.yaml`:
58
+
59
+ ```yaml
60
+ sources:
61
+ - name: sales
62
+ type: csv
63
+ path: ~/data/sales-2025.csv
64
+ - name: users
65
+ type: postgres
66
+ connection_string: "${POSTGRES_URL}"
67
+ tables: [users, subscriptions]
68
+ ```
69
+
70
+ > Store connection strings in environment variables, not directly in the YAML file.
71
+
72
+ ### MCP Client configuration
73
+
74
+ Amp · Claude Code · Cline · Cursor · VS Code · Windsurf · Zed
75
+
76
+ ## Your first prompt
77
+
78
+ Place a CSV file at `~/data/sample.csv`, add it as a source in your config, then enter:
79
+
80
+ ```
81
+ What columns are in the sample table? Show me the first 5 rows.
82
+ ```
83
+
84
+ Your client should return the schema and a preview of the data.
85
+
86
+ ## Tools
87
+
88
+ ### Sources (2 tools)
89
+
90
+ - `connect_source`
91
+ - `list_sources`
92
+
93
+ ### Schema (2 tools)
94
+
95
+ - `list_tables`
96
+ - `get_schema`
97
+
98
+ ### Data (2 tools)
99
+
100
+ - `query`
101
+ - `transform`
102
+
103
+ ### Health (1 tool)
104
+
105
+ - `check_health`
106
+
107
+ ## Configuration
108
+
109
+ ### `--config` / `--sources-config`
110
+
111
+ Path to the YAML file defining data sources.
112
+
113
+ Type: `string`
114
+ Default: `~/.mcp/data-sources.yaml`
115
+
116
+ ### `--rest-cache-ttl`
117
+
118
+ Time-to-live in seconds for cached REST API responses. Set to `0` to disable caching.
119
+
120
+ Type: `number`
121
+ Default: `300`
122
+
123
+ ### `--max-rows`
124
+
125
+ Maximum number of rows returned by a single `query` call. Prevents accidental large result sets.
126
+
127
+ Type: `number`
128
+ Default: `1000`
129
+
130
+ ### `--read-only`
131
+
132
+ Reject any SQL statements that are not `SELECT` queries. Enforces read-only access across all sources.
133
+
134
+ Type: `boolean`
135
+ Default: `true`
136
+
137
+ Pass flags via the `args` property in your JSON config:
138
+
139
+ ```json
140
+ {
141
+ "mcpServers": {
142
+ "data-connector": {
143
+ "command": "npx",
144
+ "args": ["-y", "mcp-data-pipeline-connector@latest", "--max-rows=5000", "--rest-cache-ttl=60"]
145
+ }
146
+ }
147
+ }
148
+ ```
149
+
150
+ ## Verification
151
+
152
+ Before publishing a new version, verify the server with MCP Inspector to confirm all tools are exposed correctly and the protocol handshake succeeds.
153
+
154
+ **Interactive UI** (opens browser):
155
+
156
+ ```bash
157
+ npm run build && npm run inspect
158
+ ```
159
+
160
+ **CLI mode** (scripted / CI-friendly):
161
+
162
+ ```bash
163
+ # List all tools
164
+ npx @modelcontextprotocol/inspector --cli node dist/index.js --method tools/list
165
+
166
+ # List resources and prompts
167
+ npx @modelcontextprotocol/inspector --cli node dist/index.js --method resources/list
168
+ npx @modelcontextprotocol/inspector --cli node dist/index.js --method prompts/list
169
+
170
+ # Call a tool (example — replace with a relevant read-only tool for this plugin)
171
+ npx @modelcontextprotocol/inspector --cli node dist/index.js \
172
+ --method tools/call --tool-name list_sources
173
+
174
+ # Call a tool with arguments
175
+ npx @modelcontextprotocol/inspector --cli node dist/index.js \
176
+ --method tools/call --tool-name list_sources --tool-arg key=value
177
+ ```
178
+
179
+ Run before publishing to catch regressions in tool registration and runtime startup.
180
+
181
+ ## Contributing
182
+
183
+ Each connector lives in `src/connectors/` and must implement the `DataConnector` interface. Add fixture data files under `tests/fixtures/` for integration tests. Never log connection strings or credentials — sanitize before any output or error message.
184
+
185
+ ```bash
186
+ npm install && npm test
187
+ ```
188
+
189
+ ## Listings
190
+
191
+ `mcp-data-pipeline-connector` is listed on [MCP Registry](https://registry.modelcontextprotocol.io) and [MCP Market](https://mcpmarket.io).
192
+
193
+ ## Troubleshooting
194
+
195
+ - **REST source fails to connect**: Confirm the URL is reachable and any auth env var is set. Use `check_health` to retest after startup.
196
+ - **Cross-source join returns no results**: Ensure both sources are CSV type and registered before using `source='_all'`.
197
+ - **Query returns `truncated: true`**: Increase `--max-rows` or add a `LIMIT` clause to your SQL.
@@ -0,0 +1,19 @@
1
+ export interface AccessPolicy {
2
+ source: string;
3
+ deny_columns?: string[];
4
+ row_filter?: string;
5
+ }
6
+ /**
7
+ * Apply column-level and row-level access control to a SQL query.
8
+ *
9
+ * - Removes denied columns from the SELECT list (regex-based).
10
+ * - Appends a WHERE clause for the row_filter.
11
+ *
12
+ * If no matching policy exists or the policy file is missing, returns sql unchanged.
13
+ *
14
+ * @param sql The original SQL query string.
15
+ * @param source The data source name being queried.
16
+ * @param _userClaims JWT claims from the authenticated user (reserved for future use).
17
+ * @param policyPath Optional path to the YAML policy file.
18
+ */
19
+ export declare function applyAccessControl(sql: string, source: string, _userClaims: Record<string, string>, policyPath?: string): string;
@@ -0,0 +1,81 @@
1
+ import * as fs from "node:fs";
2
+ import * as os from "node:os";
3
+ import * as path from "node:path";
4
+ /**
5
+ * Load the access policy from the given path, or from the default location
6
+ * (~/.mcp/data-access-policy.yaml). Returns null if the file does not exist.
7
+ */
8
+ function loadPolicy(policyPath) {
9
+ const resolvedPath = policyPath ?? path.join(os.homedir(), ".mcp", "data-access-policy.yaml");
10
+ if (!fs.existsSync(resolvedPath)) {
11
+ return [];
12
+ }
13
+ try {
14
+ // Minimal YAML parser: only handles the specific schema we need.
15
+ // We avoid external yaml deps per the project constraint of no new runtime deps.
16
+ // The file uses js-yaml which is already a dependency — use it.
17
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
18
+ const yaml = require("js-yaml");
19
+ const content = fs.readFileSync(resolvedPath, "utf-8");
20
+ const parsed = yaml.load(content);
21
+ return parsed?.policies ?? [];
22
+ }
23
+ catch {
24
+ return [];
25
+ }
26
+ }
27
+ /**
28
+ * Apply column-level and row-level access control to a SQL query.
29
+ *
30
+ * - Removes denied columns from the SELECT list (regex-based).
31
+ * - Appends a WHERE clause for the row_filter.
32
+ *
33
+ * If no matching policy exists or the policy file is missing, returns sql unchanged.
34
+ *
35
+ * @param sql The original SQL query string.
36
+ * @param source The data source name being queried.
37
+ * @param _userClaims JWT claims from the authenticated user (reserved for future use).
38
+ * @param policyPath Optional path to the YAML policy file.
39
+ */
40
+ export function applyAccessControl(sql, source, _userClaims, policyPath) {
41
+ const policies = loadPolicy(policyPath);
42
+ const policy = policies.find((p) => p.source === source);
43
+ if (!policy) {
44
+ return sql;
45
+ }
46
+ let result = sql;
47
+ // ── Column-level filtering ────────────────────────────────────────────────
48
+ if (policy.deny_columns && policy.deny_columns.length > 0) {
49
+ for (const col of policy.deny_columns) {
50
+ // Match the column name (with optional alias, quotes, backticks or plain) in a SELECT list.
51
+ // Patterns handled:
52
+ // `col`, "col", col, table.col, "table"."col", col AS alias, col alias
53
+ // We remove the column token plus any surrounding comma, being careful about
54
+ // SELECT * (we leave * unchanged — the policy is best-effort for explicit lists).
55
+ const escaped = col.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
56
+ // Match: optional comma+space before, or comma+space after the column expression
57
+ const pattern = new RegExp(`(,\\s*(?:"?${escaped}"?|\\w+\\.${escaped})(?:\\s+(?:AS\\s+)?\\w+)?|(?:"?${escaped}"?|\\w+\\.${escaped})(?:\\s+(?:AS\\s+)?\\w+)?\\s*,?)`, "gi");
58
+ result = result.replace(pattern, "");
59
+ }
60
+ // Clean up any double commas or trailing commas before FROM
61
+ result = result.replace(/,\s*,/g, ",");
62
+ result = result.replace(/,\s*(FROM\b)/gi, " $1");
63
+ result = result.replace(/(SELECT\s+),/gi, "$1");
64
+ }
65
+ // ── Row-level filtering ───────────────────────────────────────────────────
66
+ if (policy.row_filter) {
67
+ if (/\bWHERE\b/i.test(result)) {
68
+ result = result.replace(/\bWHERE\b/i, `WHERE (${policy.row_filter}) AND`);
69
+ }
70
+ else {
71
+ // Insert WHERE before ORDER BY / GROUP BY / LIMIT / end of string
72
+ if (/\b(ORDER BY|GROUP BY|LIMIT|OFFSET)\b/i.test(result)) {
73
+ result = result.replace(/\b(ORDER BY|GROUP BY|LIMIT|OFFSET)\b/i, `WHERE ${policy.row_filter} $1`);
74
+ }
75
+ else {
76
+ result = `${result.trimEnd()} WHERE ${policy.row_filter}`;
77
+ }
78
+ }
79
+ }
80
+ return result;
81
+ }
package/dist/auth.d.ts ADDED
@@ -0,0 +1,9 @@
1
+ import type { RequestHandler } from "express";
2
+ /**
3
+ * Express middleware for API key and JWT authentication.
4
+ *
5
+ * - If MCP_API_KEY is set, validates the X-API-Key header.
6
+ * - If MCP_JWT_SECRET is set, validates the Authorization: Bearer <token> header using HMAC-SHA256.
7
+ * - If neither env var is set, all requests pass through (open access).
8
+ */
9
+ export declare function createAuthMiddleware(): RequestHandler;
package/dist/auth.js ADDED
@@ -0,0 +1,51 @@
1
+ import * as crypto from "node:crypto";
2
+ /**
3
+ * Express middleware for API key and JWT authentication.
4
+ *
5
+ * - If MCP_API_KEY is set, validates the X-API-Key header.
6
+ * - If MCP_JWT_SECRET is set, validates the Authorization: Bearer <token> header using HMAC-SHA256.
7
+ * - If neither env var is set, all requests pass through (open access).
8
+ */
9
+ export function createAuthMiddleware() {
10
+ return (req, res, next) => {
11
+ const apiKey = process.env["MCP_API_KEY"];
12
+ const jwtSecret = process.env["MCP_JWT_SECRET"];
13
+ // If neither guard is configured, pass through
14
+ if (!apiKey && !jwtSecret) {
15
+ next();
16
+ return;
17
+ }
18
+ // Check API key if configured
19
+ if (apiKey) {
20
+ const providedKey = req.headers["x-api-key"];
21
+ if (!providedKey || providedKey !== apiKey) {
22
+ res.status(401).json({ error: "Unauthorized: invalid or missing API key" });
23
+ return;
24
+ }
25
+ }
26
+ // Check JWT if configured
27
+ if (jwtSecret) {
28
+ const authHeader = req.headers["authorization"];
29
+ if (!authHeader || !authHeader.startsWith("Bearer ")) {
30
+ res.status(401).json({ error: "Unauthorized: missing Bearer token" });
31
+ return;
32
+ }
33
+ const token = authHeader.slice("Bearer ".length);
34
+ const parts = token.split(".");
35
+ if (parts.length !== 3) {
36
+ res.status(401).json({ error: "Unauthorized: malformed JWT" });
37
+ return;
38
+ }
39
+ const [h, p, s] = parts;
40
+ const expected = crypto
41
+ .createHmac("sha256", jwtSecret)
42
+ .update(`${h}.${p}`)
43
+ .digest("base64url");
44
+ if (expected !== s) {
45
+ res.status(401).json({ error: "Unauthorized: invalid JWT signature" });
46
+ return;
47
+ }
48
+ }
49
+ next();
50
+ };
51
+ }
@@ -0,0 +1,14 @@
1
+ import type { DataSourcesConfig } from "./types.js";
2
+ /**
3
+ * Substitute ${ENV_VAR_NAME} placeholders with their environment variable values.
4
+ * Throws a descriptive error if a referenced variable is not set.
5
+ */
6
+ export declare function substituteEnvVars(value: string): string;
7
+ /**
8
+ * Load data sources from a YAML config file.
9
+ * Performs env var substitution on all string values.
10
+ *
11
+ * @param configPath Path to the YAML file. Defaults to ~/.mcp/data-sources.yaml
12
+ * @returns Parsed and substituted DataSourcesConfig, or null if the file doesn't exist
13
+ */
14
+ export declare function loadConfig(configPath?: string): DataSourcesConfig | null;
@@ -0,0 +1,91 @@
1
+ import { readFileSync, existsSync } from "node:fs";
2
+ import { resolve } from "node:path";
3
+ import { homedir } from "node:os";
4
+ import yaml from "js-yaml";
5
+ const DEFAULT_CONFIG_PATH = "~/.mcp/data-sources.yaml";
6
+ /**
7
+ * Substitute ${ENV_VAR_NAME} placeholders with their environment variable values.
8
+ * Throws a descriptive error if a referenced variable is not set.
9
+ */
10
+ export function substituteEnvVars(value) {
11
+ return value.replace(/\$\{([^}]+)\}/g, (_, name) => {
12
+ const val = process.env[name];
13
+ if (val === undefined || val === "") {
14
+ throw new Error(`Environment variable ${name} is not set`);
15
+ }
16
+ return val;
17
+ });
18
+ }
19
+ /**
20
+ * Recursively walk an object/array and apply env var substitution to all string values.
21
+ */
22
+ function substituteDeep(obj) {
23
+ if (typeof obj === "string") {
24
+ return substituteEnvVars(obj);
25
+ }
26
+ if (Array.isArray(obj)) {
27
+ return obj.map(substituteDeep);
28
+ }
29
+ if (obj !== null && typeof obj === "object") {
30
+ const result = {};
31
+ for (const [key, val] of Object.entries(obj)) {
32
+ result[key] = substituteDeep(val);
33
+ }
34
+ return result;
35
+ }
36
+ return obj;
37
+ }
38
+ /**
39
+ * Resolve ~ to the user's home directory in a file path string.
40
+ */
41
+ function resolveConfigPath(configPath) {
42
+ if (configPath.startsWith("~")) {
43
+ return resolve(homedir(), configPath.slice(2));
44
+ }
45
+ return resolve(configPath);
46
+ }
47
+ /**
48
+ * Load data sources from a YAML config file.
49
+ * Performs env var substitution on all string values.
50
+ *
51
+ * @param configPath Path to the YAML file. Defaults to ~/.mcp/data-sources.yaml
52
+ * @returns Parsed and substituted DataSourcesConfig, or null if the file doesn't exist
53
+ */
54
+ export function loadConfig(configPath) {
55
+ const effectivePath = resolveConfigPath(configPath ?? DEFAULT_CONFIG_PATH);
56
+ if (!existsSync(effectivePath)) {
57
+ return null;
58
+ }
59
+ let raw;
60
+ try {
61
+ raw = readFileSync(effectivePath, "utf-8");
62
+ }
63
+ catch (err) {
64
+ throw new Error(`Failed to read config file '${effectivePath}': ${err.message}`);
65
+ }
66
+ let parsed;
67
+ try {
68
+ parsed = yaml.load(raw);
69
+ }
70
+ catch (err) {
71
+ throw new Error(`Failed to parse YAML config '${effectivePath}': ${err.message}`);
72
+ }
73
+ if (!parsed || typeof parsed !== "object" || !("sources" in parsed)) {
74
+ throw new Error(`Config file '${effectivePath}' must have a top-level 'sources' array`);
75
+ }
76
+ // Apply env var substitution to all string values
77
+ const substituted = substituteDeep(parsed);
78
+ if (!Array.isArray(substituted.sources)) {
79
+ throw new Error(`Config file '${effectivePath}': 'sources' must be an array`);
80
+ }
81
+ // Validate each source has required fields
82
+ for (const source of substituted.sources) {
83
+ if (!source.name || typeof source.name !== "string") {
84
+ throw new Error(`Each source in config must have a 'name' string field`);
85
+ }
86
+ if (!source.type || !["csv", "postgres", "rest"].includes(source.type)) {
87
+ throw new Error(`Source '${source.name}': type must be one of 'csv', 'postgres', 'rest'`);
88
+ }
89
+ }
90
+ return substituted;
91
+ }
@@ -0,0 +1,24 @@
1
+ import type { DataConnector } from "./base.js";
2
+ import type { ColumnInfo, QueryResult } from "../types.js";
3
+ export interface AirtableConfig {
4
+ type: "airtable";
5
+ base_id: string;
6
+ table_name: string;
7
+ }
8
+ export declare class AirtableConnector implements DataConnector {
9
+ readonly name: string;
10
+ readonly type = "airtable";
11
+ private config;
12
+ private connected;
13
+ private apiKey;
14
+ constructor(config: AirtableConfig & {
15
+ name: string;
16
+ });
17
+ connect(config?: AirtableConfig): Promise<void>;
18
+ disconnect(): Promise<void>;
19
+ isConnected(): boolean;
20
+ healthCheck(): Promise<boolean>;
21
+ listTables(): Promise<string[]>;
22
+ getSchema(table: string): Promise<ColumnInfo[]>;
23
+ query(sql: string, maxRows: number): Promise<QueryResult>;
24
+ }