npm - mcp-data-pipeline-connector - Versions diffs - 1.0.0 - Mend

mcp-data-pipeline-connector 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.env.example +44 -0
package/CHANGELOG.md +82 -0
package/LICENSE +21 -0
package/README.md +197 -0
package/dist/access-control.d.ts +19 -0
package/dist/access-control.js +81 -0
package/dist/auth.d.ts +9 -0
package/dist/auth.js +51 -0
package/dist/config-loader.d.ts +14 -0
package/dist/config-loader.js +91 -0
package/dist/connectors/airtable-connector.d.ts +24 -0
package/dist/connectors/airtable-connector.js +164 -0
package/dist/connectors/base.d.ts +22 -0
package/dist/connectors/base.js +82 -0
package/dist/connectors/csv-connector.d.ts +25 -0
package/dist/connectors/csv-connector.js +126 -0
package/dist/connectors/postgres-connector.d.ts +19 -0
package/dist/connectors/postgres-connector.js +131 -0
package/dist/connectors/rest-connector.d.ts +21 -0
package/dist/connectors/rest-connector.js +166 -0
package/dist/connectors/sheets-connector.d.ts +24 -0
package/dist/connectors/sheets-connector.js +204 -0
package/dist/http-server.d.ts +14 -0
package/dist/http-server.js +93 -0
package/dist/index.d.ts +2 -0
package/dist/index.js +86 -0
package/dist/query-audit-log.d.ts +25 -0
package/dist/query-audit-log.js +69 -0
package/dist/rate-limiter.d.ts +9 -0
package/dist/rate-limiter.js +32 -0
package/dist/server.d.ts +17 -0
package/dist/server.js +860 -0
package/dist/source-registry.d.ts +43 -0
package/dist/source-registry.js +177 -0
package/dist/types.d.ts +52 -0
package/dist/types.js +1 -0
package/package.json +67 -0

package/.env.example ADDED Viewed

@@ -0,0 +1,44 @@
+# ─────────────────────────────────────────────────────────────────────────────
+# mcp-data-pipeline-connector — Environment Variables
+#
+# Copy this file to .env and fill in the values for your environment.
+# Never commit your .env file to version control.
+# ─────────────────────────────────────────────────────────────────────────────
+# ── HTTP Transport Auth (src/auth.ts) ─────────────────────────────────────────
+# Set ONE of the following to protect the /mcp HTTP endpoint.
+# If neither is set, all requests pass through (open access — only safe on localhost).
+# API key guard: clients must send this value in the X-API-Key request header.
+MCP_API_KEY=your-api-key-here
+# JWT secret guard: clients must send a valid HMAC-SHA256 signed Bearer token.
+# If both MCP_API_KEY and MCP_JWT_SECRET are set, BOTH checks are enforced.
+MCP_JWT_SECRET=your-jwt-secret-here
+# ── PostgreSQL Connector (src/config-loader.ts / data-sources.yaml) ──────────
+# Connection strings for PostgreSQL sources are referenced from your
+# ~/.mcp/data-sources.yaml config file using ${ENV_VAR} substitution.
+# Define your Postgres DSNs here and reference them in the YAML, e.g.:
+#   connection_string: "${POSTGRES_CONNECTION_STRING}"
+POSTGRES_CONNECTION_STRING=postgresql://user:password@localhost:5432/mydb
+# ── REST API Connector (src/connectors/rest-connector.ts) ────────────────────
+# REST sources can optionally send an Authorization header for authenticated APIs.
+# In your data-sources.yaml set `auth_header: REST_API_AUTH_HEADER` (the env var name).
+# The value of that env var is sent verbatim as the Authorization header, e.g.:
+#   REST_API_AUTH_HEADER=Bearer my-token
+#   REST_API_AUTH_HEADER=ApiKey my-key
+REST_API_AUTH_HEADER=Bearer your-rest-api-token-here
+# ── Airtable Connector (src/connectors/airtable-connector.ts) ────────────────
+# Required when connecting any source of type "airtable".
+# Generate a Personal Access Token at https://airtable.com/create/tokens
+AIRTABLE_API_KEY=your-airtable-api-key-here
+# ── Google Sheets Connector (src/connectors/sheets-connector.ts) ─────────────
+# Required when connecting any source of type "sheets".
+# Provide the full JSON content of a Google service account key file, as a
+# single-line JSON string (replace newlines in the private_key with \n).
+# Create a service account at https://console.cloud.google.com/iam-admin/serviceaccounts
+GOOGLE_SERVICE_ACCOUNT_JSON={"type":"service_account","project_id":"your-project","private_key_id":"key-id","private_key":"-----BEGIN RSA PRIVATE KEY-----\n...\n-----END RSA PRIVATE KEY-----\n","client_email":"your-sa@your-project.iam.gserviceaccount.com","client_id":"123456789","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token"}

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,82 @@
+# Changelog
+All notable changes to MCP Data Pipeline Connector will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+## [1.0.1] - 2026-03-23
+### Fixed
+- **BigInt serialization crash in `query` tool**: DuckDB returns integer columns as JavaScript `BigInt` values, which `JSON.stringify` cannot serialize, causing a `"Do not know how to serialize a BigInt"` error. Fixed by converting `BigInt` values to `Number` in the row-mapping pass inside `CsvConnector.query()` (`src/connectors/csv-connector.ts`) and in the cross-source query path in `createServer()` (`src/server.ts`). Updated the corresponding test to assert integer columns are returned as plain numbers.
+## [1.0.0] - 2026-03-12
+### Added
+- `.env.example` documenting `MCP_API_KEY`, `MCP_JWT_SECRET`, `POSTGRES_CONNECTION_STRING`, `REST_API_AUTH_HEADER`, `AIRTABLE_API_KEY`, and `GOOGLE_SERVICE_ACCOUNT_JSON`.
+### Changed
+- `express` upgraded from `^4.x` to `^5.2.1`.
+- `@types/node` upgraded from `^20.x` to `^24.12.0` (Node 24 LTS).
+- `yargs` upgraded from `^17.x` to `^18.0.0`.
+- Added `author`, `license`, `repository`, and `homepage` fields to `package.json`.
+### Security
+- Resolved **GHSA-67mh-4wv8-2f99** (`esbuild` ≤ 0.24.2 dev-server cross-origin exposure) by upgrading `vitest` and `@vitest/coverage-v8` to `^4.1.0`. Affects local development only; not a production runtime concern.
+## [0.2.0] - 2026-03-12
+### Added
+- **REST API connector** (`src/connectors/rest-connector.ts`): Connect GET endpoints with optional auth header (from env var), configurable pagination parameter, and in-memory TTL caching.
+- **`--rest-cache-ttl` flag**: Controls REST response cache TTL in seconds (default 60). Pass `0` to disable caching entirely.
+- **Cross-source joins via DuckDB**: `query` tool now accepts `source='_all'` or a `sources` array to query across multiple CSV sources in a single SQL statement using a shared in-memory DuckDB instance.
+- **`transform` tool**: Apply filter, select, rename, and aggregate operations to a source table. Returns results as JSON or CSV.
+- **`check_health` tool**: Check whether registered data sources are reachable. Runs file existence check (CSV), `SELECT 1` (Postgres), or HEAD request (REST). Returns per-source health status with timestamp.
+- **Schema normalization** (`normalizeType` in `src/connectors/base.ts`): Maps source-specific types to a standard set — `string`, `integer`, `number`, `boolean`, `datetime`, `json`, `unknown`. All `getSchema()` responses now include a `normalized_type` field.
+- **`healthCheck()` method** on all connectors (`DataConnector` interface extended).
+- **`SourceRegistry.checkHealth()`**: Run health checks across all or a specific named source.
+- **`SourceRegistry.getCrossSourceDb()`**: Returns a shared DuckDB in-memory connection with all CSV sources attached as views for cross-source query execution.
+- **Result pagination**: `query` tool accepts `limit` and `offset` parameters. Response includes `total_returned`, `offset`, and `has_more` fields.
+- **MCP Resources primitive**: `data://{source_name}/{table_name}` URIs expose source schemas as browsable MCP resources. Agents can list and read schemas without calling `get_schema` explicitly.
+- **MCP Prompts primitive**: `explore-data` prompt template guides agents through schema discovery (list_sources → list_tables → get_schema) before writing any queries.
+- **MCP logging notifications** (`notifications/message`): Emits `info` on source connect, `debug` with query execution time, and `warning` when health checks fail.
+- **Progress notifications** (`notifications/progress`): Emitted for complex queries (JOIN, GROUP BY, ORDER BY, DISTINCT, UNION) at 0%, 30%, 60%, and 100%.
+- **Streamable HTTP transport** (`src/http-server.ts`): `--http-port` flag (default 0 = disabled) exposes the MCP server via the MCP 2025 Streamable HTTP spec for shared team deployments.
+- **ESLint + Prettier**: `eslint.config.js` and `.prettierrc.json` configured. `npm run lint` and `npm run format` scripts added.
+- **GitHub Actions CI** (`.github/workflows/ci.yml`): Runs build, test, and lint on push/PR to `main`.
+- **Expanded test coverage**: Phase 2 tests covering REST connector (mocked fetch), schema normalization, health checks, pagination, transform, and cross-source queries. Total: 61 tests across 3 suites.
+- **`CsvConnector.getResolvedPath()`**: Exposes the resolved file path for cross-source DuckDB view registration.
+- **`url` parameter** accepted by `connect_source` tool for REST sources.
+- **Server version** bumped to `0.2.0`.
+## [0.1.0] - 2026-03-12
+### Added
+- Initial public release of `mcp-data-pipeline-connector` (Phase 1 MVP).
+- **CSV/JSON connector**: auto-schema detection via DuckDB `read_csv_auto` / `read_json_auto`.
+- **PostgreSQL connector**: table and view enumeration via DuckDB postgres extension.
+- **DuckDB-powered query engine**: embedded in-process SQL execution — no separate service required.
+- **`connect_source` tool**: register data sources at runtime or from `~/.mcp/data-sources.yaml`.
+- **`list_sources` tool**: show all registered sources and their connection status.
+- **`list_tables` tool**: enumerate available tables per source or across all sources.
+- **`get_schema` tool**: return column names and normalized types (`string`, `number`, `boolean`, `date`, `json`).
+- **`query` tool**: DuckDB-powered SQL with `--max-rows` and `--read-only` flag support.
+- **`~/.mcp/data-sources.yaml` config format** with `${ENV_VAR}` substitution for credentials.
+- **`--read-only` flag** (default `true`): rejects non-SELECT statements at the SQL layer.
+- **`--max-rows` flag** (default `1000`): prevents accidental large result sets.
+- **`--config` / `--sources-config` flag**: custom path for the data sources YAML file.
+- **Credential safety**: connection strings are never accepted as tool input arguments; never logged; sanitized in all output.
+- stdio transport compliant with the MCP protocol.
+- Strict JSON Schema validation on all tool inputs.
+- Tool annotations: `query`, `list_sources`, `list_tables`, `get_schema` marked `readOnlyHint: true`; `connect_source` marked `readOnlyHint: false`.
+- Proper MCP error codes: `invalid_params` for bad SQL or unknown source; `internal_error` for connection/query failures.
+- TypeScript strict mode throughout.
+- Vitest test suite with fixture CSV datasets covering connector behavior, read-only enforcement, max-rows truncation, and env var substitution.

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 MCP Data Pipeline Connector contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,197 @@
+# MCP Data Pipeline Connector
+npm `mcp-data-pipeline-connector` package
+One MCP server for all your data sources — with cross-source SQL joins and no external query service. DuckDB runs embedded in-process, so you can join a CSV file against a Postgres table against a REST API response in a single query, entirely on your machine. Agents work with your data without needing source-specific knowledge or multiple MCP server configs.
+[Tool reference](#tools) | [Configuration](#configuration) | [Contributing](#contributing) | [Troubleshooting](#troubleshooting)
+## Key features
+- **Unified query interface**: SQL across all connected sources via DuckDB — including cross-source joins.
+- **Multiple source types**: CSV/JSON files, PostgreSQL databases, and REST API endpoints in a single server.
+- **Auto schema detection**: Infers column names and types from CSV headers and Postgres metadata.
+- **REST caching**: REST API responses are cached with a configurable TTL to avoid redundant calls.
+- **Schema normalization**: Maps source-specific types to a standard set (string, number, date, boolean, json).
+- **In-process query engine**: DuckDB runs embedded — no separate query service to install or manage.
+## Why this over separate per-source MCP servers?
+The common alternative is running one MCP server per data source — a postgres MCP server, a CSV MCP server, a REST MCP server. Each works fine in isolation, but they can't talk to each other.
+|                    | mcp-data-pipeline-connector                                                 | Separate per-source servers                       |
+| ------------------ | --------------------------------------------------------------------------- | ------------------------------------------------- |
+| Cross-source joins | Native SQL via embedded DuckDB                                              | Not possible — agent must fetch and join manually |
+| Config complexity  | One server entry in your MCP config                                         | One entry per source type                         |
+| Query engine       | DuckDB in-process — no install, no service                                  | Depends on each source's query capabilities       |
+| Schema unification | Normalizes all types to string/integer/number/datetime/boolean/json/unknown | Each source uses its own type system              |
+| Data residency     | All queries run locally                                                     | Depends on each connector's implementation        |
+If you're asking questions that span multiple data sources — "join my sales CSV with the users table" — this is the right tool. If you only ever query one source type, a dedicated single-source server is simpler.
+## Disclaimers
+`mcp-data-pipeline-connector` connects to data sources you configure and executes queries against them on behalf of your agent. Ensure agents only have the database permissions they need. Connection strings are never logged or transmitted; keep them out of version-controlled config files. Use environment variables for credentials.
+## Requirements
+- Node.js v20.19 or newer.
+- npm.
+- Optional: A running PostgreSQL instance for the Postgres connector.
+## Getting started
+Add the following config to your MCP client:
+```json
+{
+  "mcpServers": {
+    "data-connector": {
+      "command": "npx",
+      "args": ["-y", "mcp-data-pipeline-connector@latest"]
+    }
+  }
+}
+```
+Define your data sources in `~/.mcp/data-sources.yaml`:
+```yaml
+sources:
+  - name: sales
+    type: csv
+    path: ~/data/sales-2025.csv
+  - name: users
+    type: postgres
+    connection_string: "${POSTGRES_URL}"
+    tables: [users, subscriptions]
+```
+> Store connection strings in environment variables, not directly in the YAML file.
+### MCP Client configuration
+Amp · Claude Code · Cline · Cursor · VS Code · Windsurf · Zed
+## Your first prompt
+Place a CSV file at `~/data/sample.csv`, add it as a source in your config, then enter:
+```
+What columns are in the sample table? Show me the first 5 rows.
+```
+Your client should return the schema and a preview of the data.
+## Tools
+### Sources (2 tools)
+- `connect_source`
+- `list_sources`
+### Schema (2 tools)
+- `list_tables`
+- `get_schema`
+### Data (2 tools)
+- `query`
+- `transform`
+### Health (1 tool)
+- `check_health`
+## Configuration
+### `--config` / `--sources-config`
+Path to the YAML file defining data sources.
+Type: `string`
+Default: `~/.mcp/data-sources.yaml`
+### `--rest-cache-ttl`
+Time-to-live in seconds for cached REST API responses. Set to `0` to disable caching.
+Type: `number`
+Default: `300`
+### `--max-rows`
+Maximum number of rows returned by a single `query` call. Prevents accidental large result sets.
+Type: `number`
+Default: `1000`
+### `--read-only`
+Reject any SQL statements that are not `SELECT` queries. Enforces read-only access across all sources.
+Type: `boolean`
+Default: `true`
+Pass flags via the `args` property in your JSON config:
+```json
+{
+  "mcpServers": {
+    "data-connector": {
+      "command": "npx",
+      "args": ["-y", "mcp-data-pipeline-connector@latest", "--max-rows=5000", "--rest-cache-ttl=60"]
+    }
+  }
+}
+```
+## Verification
+Before publishing a new version, verify the server with MCP Inspector to confirm all tools are exposed correctly and the protocol handshake succeeds.
+**Interactive UI** (opens browser):
+```bash
+npm run build && npm run inspect
+```
+**CLI mode** (scripted / CI-friendly):
+```bash
+# List all tools
+npx @modelcontextprotocol/inspector --cli node dist/index.js --method tools/list
+# List resources and prompts
+npx @modelcontextprotocol/inspector --cli node dist/index.js --method resources/list
+npx @modelcontextprotocol/inspector --cli node dist/index.js --method prompts/list
+# Call a tool (example — replace with a relevant read-only tool for this plugin)
+npx @modelcontextprotocol/inspector --cli node dist/index.js \
+  --method tools/call --tool-name list_sources
+# Call a tool with arguments
+npx @modelcontextprotocol/inspector --cli node dist/index.js \
+  --method tools/call --tool-name list_sources --tool-arg key=value
+```
+Run before publishing to catch regressions in tool registration and runtime startup.
+## Contributing
+Each connector lives in `src/connectors/` and must implement the `DataConnector` interface. Add fixture data files under `tests/fixtures/` for integration tests. Never log connection strings or credentials — sanitize before any output or error message.
+```bash
+npm install && npm test
+```
+## Listings
+`mcp-data-pipeline-connector` is listed on [MCP Registry](https://registry.modelcontextprotocol.io) and [MCP Market](https://mcpmarket.io).
+## Troubleshooting
+- **REST source fails to connect**: Confirm the URL is reachable and any auth env var is set. Use `check_health` to retest after startup.
+- **Cross-source join returns no results**: Ensure both sources are CSV type and registered before using `source='_all'`.
+- **Query returns `truncated: true`**: Increase `--max-rows` or add a `LIMIT` clause to your SQL.

package/dist/access-control.d.ts ADDED Viewed

@@ -0,0 +1,19 @@
+export interface AccessPolicy {
+    source: string;
+    deny_columns?: string[];
+    row_filter?: string;
+}
+/**
+ * Apply column-level and row-level access control to a SQL query.
+ *
+ * - Removes denied columns from the SELECT list (regex-based).
+ * - Appends a WHERE clause for the row_filter.
+ *
+ * If no matching policy exists or the policy file is missing, returns sql unchanged.
+ *
+ * @param sql        The original SQL query string.
+ * @param source     The data source name being queried.
+ * @param _userClaims  JWT claims from the authenticated user (reserved for future use).
+ * @param policyPath Optional path to the YAML policy file.
+ */
+export declare function applyAccessControl(sql: string, source: string, _userClaims: Record<string, string>, policyPath?: string): string;

package/dist/access-control.js ADDED Viewed

@@ -0,0 +1,81 @@
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+/**
+ * Load the access policy from the given path, or from the default location
+ * (~/.mcp/data-access-policy.yaml). Returns null if the file does not exist.
+ */
+function loadPolicy(policyPath) {
+    const resolvedPath = policyPath ?? path.join(os.homedir(), ".mcp", "data-access-policy.yaml");
+    if (!fs.existsSync(resolvedPath)) {
+        return [];
+    }
+    try {
+        // Minimal YAML parser: only handles the specific schema we need.
+        // We avoid external yaml deps per the project constraint of no new runtime deps.
+        // The file uses js-yaml which is already a dependency — use it.
+        // eslint-disable-next-line @typescript-eslint/no-require-imports
+        const yaml = require("js-yaml");
+        const content = fs.readFileSync(resolvedPath, "utf-8");
+        const parsed = yaml.load(content);
+        return parsed?.policies ?? [];
+    }
+    catch {
+        return [];
+    }
+}
+/**
+ * Apply column-level and row-level access control to a SQL query.
+ *
+ * - Removes denied columns from the SELECT list (regex-based).
+ * - Appends a WHERE clause for the row_filter.
+ *
+ * If no matching policy exists or the policy file is missing, returns sql unchanged.
+ *
+ * @param sql        The original SQL query string.
+ * @param source     The data source name being queried.
+ * @param _userClaims  JWT claims from the authenticated user (reserved for future use).
+ * @param policyPath Optional path to the YAML policy file.
+ */
+export function applyAccessControl(sql, source, _userClaims, policyPath) {
+    const policies = loadPolicy(policyPath);
+    const policy = policies.find((p) => p.source === source);
+    if (!policy) {
+        return sql;
+    }
+    let result = sql;
+    // ── Column-level filtering ────────────────────────────────────────────────
+    if (policy.deny_columns && policy.deny_columns.length > 0) {
+        for (const col of policy.deny_columns) {
+            // Match the column name (with optional alias, quotes, backticks or plain) in a SELECT list.
+            // Patterns handled:
+            //   `col`, "col", col, table.col, "table"."col", col AS alias, col alias
+            // We remove the column token plus any surrounding comma, being careful about
+            // SELECT * (we leave * unchanged — the policy is best-effort for explicit lists).
+            const escaped = col.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+            // Match: optional comma+space before, or comma+space after the column expression
+            const pattern = new RegExp(`(,\\s*(?:"?${escaped}"?|\\w+\\.${escaped})(?:\\s+(?:AS\\s+)?\\w+)?|(?:"?${escaped}"?|\\w+\\.${escaped})(?:\\s+(?:AS\\s+)?\\w+)?\\s*,?)`, "gi");
+            result = result.replace(pattern, "");
+        }
+        // Clean up any double commas or trailing commas before FROM
+        result = result.replace(/,\s*,/g, ",");
+        result = result.replace(/,\s*(FROM\b)/gi, " $1");
+        result = result.replace(/(SELECT\s+),/gi, "$1");
+    }
+    // ── Row-level filtering ───────────────────────────────────────────────────
+    if (policy.row_filter) {
+        if (/\bWHERE\b/i.test(result)) {
+            result = result.replace(/\bWHERE\b/i, `WHERE (${policy.row_filter}) AND`);
+        }
+        else {
+            // Insert WHERE before ORDER BY / GROUP BY / LIMIT / end of string
+            if (/\b(ORDER BY|GROUP BY|LIMIT|OFFSET)\b/i.test(result)) {
+                result = result.replace(/\b(ORDER BY|GROUP BY|LIMIT|OFFSET)\b/i, `WHERE ${policy.row_filter} $1`);
+            }
+            else {
+                result = `${result.trimEnd()} WHERE ${policy.row_filter}`;
+            }
+        }
+    }
+    return result;
+}

package/dist/auth.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import type { RequestHandler } from "express";
+/**
+ * Express middleware for API key and JWT authentication.
+ *
+ * - If MCP_API_KEY is set, validates the X-API-Key header.
+ * - If MCP_JWT_SECRET is set, validates the Authorization: Bearer <token> header using HMAC-SHA256.
+ * - If neither env var is set, all requests pass through (open access).
+ */
+export declare function createAuthMiddleware(): RequestHandler;

package/dist/auth.js ADDED Viewed

@@ -0,0 +1,51 @@
+import * as crypto from "node:crypto";
+/**
+ * Express middleware for API key and JWT authentication.
+ *
+ * - If MCP_API_KEY is set, validates the X-API-Key header.
+ * - If MCP_JWT_SECRET is set, validates the Authorization: Bearer <token> header using HMAC-SHA256.
+ * - If neither env var is set, all requests pass through (open access).
+ */
+export function createAuthMiddleware() {
+    return (req, res, next) => {
+        const apiKey = process.env["MCP_API_KEY"];
+        const jwtSecret = process.env["MCP_JWT_SECRET"];
+        // If neither guard is configured, pass through
+        if (!apiKey && !jwtSecret) {
+            next();
+            return;
+        }
+        // Check API key if configured
+        if (apiKey) {
+            const providedKey = req.headers["x-api-key"];
+            if (!providedKey || providedKey !== apiKey) {
+                res.status(401).json({ error: "Unauthorized: invalid or missing API key" });
+                return;
+            }
+        }
+        // Check JWT if configured
+        if (jwtSecret) {
+            const authHeader = req.headers["authorization"];
+            if (!authHeader || !authHeader.startsWith("Bearer ")) {
+                res.status(401).json({ error: "Unauthorized: missing Bearer token" });
+                return;
+            }
+            const token = authHeader.slice("Bearer ".length);
+            const parts = token.split(".");
+            if (parts.length !== 3) {
+                res.status(401).json({ error: "Unauthorized: malformed JWT" });
+                return;
+            }
+            const [h, p, s] = parts;
+            const expected = crypto
+                .createHmac("sha256", jwtSecret)
+                .update(`${h}.${p}`)
+                .digest("base64url");
+            if (expected !== s) {
+                res.status(401).json({ error: "Unauthorized: invalid JWT signature" });
+                return;
+            }
+        }
+        next();
+    };
+}

package/dist/config-loader.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import type { DataSourcesConfig } from "./types.js";
+/**
+ * Substitute ${ENV_VAR_NAME} placeholders with their environment variable values.
+ * Throws a descriptive error if a referenced variable is not set.
+ */
+export declare function substituteEnvVars(value: string): string;
+/**
+ * Load data sources from a YAML config file.
+ * Performs env var substitution on all string values.
+ *
+ * @param configPath Path to the YAML file. Defaults to ~/.mcp/data-sources.yaml
+ * @returns Parsed and substituted DataSourcesConfig, or null if the file doesn't exist
+ */
+export declare function loadConfig(configPath?: string): DataSourcesConfig | null;

package/dist/config-loader.js ADDED Viewed

@@ -0,0 +1,91 @@
+import { readFileSync, existsSync } from "node:fs";
+import { resolve } from "node:path";
+import { homedir } from "node:os";
+import yaml from "js-yaml";
+const DEFAULT_CONFIG_PATH = "~/.mcp/data-sources.yaml";
+/**
+ * Substitute ${ENV_VAR_NAME} placeholders with their environment variable values.
+ * Throws a descriptive error if a referenced variable is not set.
+ */
+export function substituteEnvVars(value) {
+    return value.replace(/\$\{([^}]+)\}/g, (_, name) => {
+        const val = process.env[name];
+        if (val === undefined || val === "") {
+            throw new Error(`Environment variable ${name} is not set`);
+        }
+        return val;
+    });
+}
+/**
+ * Recursively walk an object/array and apply env var substitution to all string values.
+ */
+function substituteDeep(obj) {
+    if (typeof obj === "string") {
+        return substituteEnvVars(obj);
+    }
+    if (Array.isArray(obj)) {
+        return obj.map(substituteDeep);
+    }
+    if (obj !== null && typeof obj === "object") {
+        const result = {};
+        for (const [key, val] of Object.entries(obj)) {
+            result[key] = substituteDeep(val);
+        }
+        return result;
+    }
+    return obj;
+}
+/**
+ * Resolve ~ to the user's home directory in a file path string.
+ */
+function resolveConfigPath(configPath) {
+    if (configPath.startsWith("~")) {
+        return resolve(homedir(), configPath.slice(2));
+    }
+    return resolve(configPath);
+}
+/**
+ * Load data sources from a YAML config file.
+ * Performs env var substitution on all string values.
+ *
+ * @param configPath Path to the YAML file. Defaults to ~/.mcp/data-sources.yaml
+ * @returns Parsed and substituted DataSourcesConfig, or null if the file doesn't exist
+ */
+export function loadConfig(configPath) {
+    const effectivePath = resolveConfigPath(configPath ?? DEFAULT_CONFIG_PATH);
+    if (!existsSync(effectivePath)) {
+        return null;
+    }
+    let raw;
+    try {
+        raw = readFileSync(effectivePath, "utf-8");
+    }
+    catch (err) {
+        throw new Error(`Failed to read config file '${effectivePath}': ${err.message}`);
+    }
+    let parsed;
+    try {
+        parsed = yaml.load(raw);
+    }
+    catch (err) {
+        throw new Error(`Failed to parse YAML config '${effectivePath}': ${err.message}`);
+    }
+    if (!parsed || typeof parsed !== "object" || !("sources" in parsed)) {
+        throw new Error(`Config file '${effectivePath}' must have a top-level 'sources' array`);
+    }
+    // Apply env var substitution to all string values
+    const substituted = substituteDeep(parsed);
+    if (!Array.isArray(substituted.sources)) {
+        throw new Error(`Config file '${effectivePath}': 'sources' must be an array`);
+    }
+    // Validate each source has required fields
+    for (const source of substituted.sources) {
+        if (!source.name || typeof source.name !== "string") {
+            throw new Error(`Each source in config must have a 'name' string field`);
+        }
+        if (!source.type || !["csv", "postgres", "rest"].includes(source.type)) {
+            throw new Error(`Source '${source.name}': type must be one of 'csv', 'postgres', 'rest'`);
+        }
+    }
+    return substituted;
+}

package/dist/connectors/airtable-connector.d.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import type { DataConnector } from "./base.js";
+import type { ColumnInfo, QueryResult } from "../types.js";
+export interface AirtableConfig {
+    type: "airtable";
+    base_id: string;
+    table_name: string;
+}
+export declare class AirtableConnector implements DataConnector {
+    readonly name: string;
+    readonly type = "airtable";
+    private config;
+    private connected;
+    private apiKey;
+    constructor(config: AirtableConfig & {
+        name: string;
+    });
+    connect(config?: AirtableConfig): Promise<void>;
+    disconnect(): Promise<void>;
+    isConnected(): boolean;
+    healthCheck(): Promise<boolean>;
+    listTables(): Promise<string[]>;
+    getSchema(table: string): Promise<ColumnInfo[]>;
+    query(sql: string, maxRows: number): Promise<QueryResult>;
+}