@pdpp/mcp-server 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -70,10 +70,15 @@ Plus one resource template: `pdpp://stream/{name}` → `GET /v1/streams/{name}`.
70
70
 
71
71
  `search` preserves the RS envelope in `structuredContent.data` and also returns
72
72
  ChatGPT-compatible `structuredContent.results[]` entries with `id`, `title`, `url`,
73
- and available source handles such as `connection_id`. Its `content[]` text also
73
+ and available source handles such as `connection_id`. Result ids are
74
+ self-contained fetch handles: when a hit carries a connection, the id is
75
+ `connection_id/stream:record_id`, so `fetch(id)` needs no separate
76
+ `connection_id` argument even on multi-source grants. Its `content[]` text also
74
77
  previews a bounded set of top hits so clients that cannot inspect structured
75
78
  tool output can still fetch a result.
76
- `fetch` accepts result ids in `stream:record_id` form and follows the
79
+ `fetch` accepts result ids in both the self-contained
80
+ `connection_id/stream:record_id` form and the legacy `stream:record_id` form
81
+ (optionally scoped by a `connection_id` argument) and follows the
77
82
  MCP/OpenAI search-fetch document contract: `structuredContent` is exactly
78
83
  `id`, `title`, `text`, `url`, and `metadata`, and `content[]` contains the same
79
84
  object as JSON text for hosts that hide structured output. It does not return a
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pdpp/mcp-server",
3
- "version": "0.1.1",
3
+ "version": "0.2.0",
4
4
  "description": "Local stdio MCP adapter for grant-scoped PDPP reads and event-subscription management.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/server.js CHANGED
@@ -18,7 +18,7 @@ export const DEFAULT_SERVER_VERSION = '0.0.0';
18
18
  // here; tool descriptions stay concise and routing-specific.
19
19
  export const PDPP_MCP_INSTRUCTIONS =
20
20
  'PDPP tools are grant-scoped. Start with `schema`, then call `schema(stream)` after choosing a stream; add `connection_id` when a stream name appears under multiple sources or before full schema. Use `connection_id` from schema results or `available_connections` errors to disambiguate sources. Filters must be typed objects, not bracket strings. Page and narrow with `limit`, `cursor`, and `fields`; prefer `aggregate` or lexical `search` for exact terms. ' +
21
- 'The configured bearer limits every result; do not use owner or control-plane tokens for normal MCP access. Schema advertises valid fields, filter operators, expand relations, sort/count support, connection identities, and connector keys. Persist `connection_id`, not `grant_id`, across reconnects. ' +
21
+ 'The configured bearer limits every result; do not use owner or control-plane tokens for normal MCP access. Schema advertises valid fields, filter operators, expand relations, sort/count support, connection identities, and connector keys. Persist `connection_id`, not `grant_id`, across reconnects. Search result ids are self-contained `fetch` handles; pass them to `fetch` unchanged. ' +
22
22
  '`content[]` is the reliable model-visible guide and includes next cursors/bookmarks when present; `structuredContent` is a host-dependent machine envelope, not the only place to find next-step handles.';
23
23
 
24
24
  /**
package/src/tools.js CHANGED
@@ -156,6 +156,20 @@ class MalformedExpandLimitError extends Error {
156
156
  }
157
157
  }
158
158
 
159
+ // Thrown when a self-contained fetch id embeds one connection while the
160
+ // explicit `connection_id` argument names another. Silently preferring either
161
+ // handle could read the wrong source, so the disagreement is rejected with a
162
+ // typed, actionable error (`server.js` `toolHandlerError` reads `.code`).
163
+ class ConflictingConnectionIdError extends Error {
164
+ constructor(embedded, explicit) {
165
+ super(
166
+ `id embeds connection_id '${embedded}' but the connection_id argument is '${explicit}'; pass the self-contained id alone, or make both handles agree`,
167
+ );
168
+ this.name = 'ConflictingConnectionIdError';
169
+ this.code = 'conflicting_connection_id';
170
+ }
171
+ }
172
+
159
173
  // Translate a typed filter object into `[bracketKey, value]` query entries the
160
174
  // RsClient appends verbatim (`filter[field]=value`, `filter[field][op]=value`).
161
175
  function filterObjectToBracketEntries(filter) {
@@ -273,7 +287,7 @@ const SEARCH_OUTPUT_SCHEMA_SHAPE = {
273
287
  }).passthrough(),
274
288
  )
275
289
  .describe(
276
- 'ChatGPT-compatible flattened search results. Each entry carries `id` (default `stream:record_id`), `title`, `url`, and available source handles such as `connection_id`. Use `data` for compact envelope metadata.',
290
+ 'ChatGPT-compatible flattened search results. Each entry carries `id` (a self-contained fetch handle, `connection_id/stream:record_id` when the hit has a connection), `title`, `url`, and available source handles such as `connection_id`. Use `data` for compact envelope metadata.',
277
291
  ),
278
292
  };
279
293
 
@@ -494,7 +508,7 @@ export function buildTools({ rs, providerUrl }) {
494
508
  name: 'search',
495
509
  title: 'Search PDPP records',
496
510
  description:
497
- 'Search records via `GET /v1/search` (lexical), `/v1/search/semantic`, or `/v1/search/hybrid` per `mode`. Use lexical for exact known terms; semantic is approximate retrieval for conceptual matches. `structuredContent.results` carries the flattened page; `structuredContent.data` carries compact envelope metadata, not a duplicate hit array. Hits carry `connection_id` and `connector_key`. Pass `connection_id` to scope, omit to fan in. Page default is 25 hits; `limit` is capped at 100 (enforced at input, and fan-in packages apply it globally). Page forward with `cursor` (lexical/semantic; hybrid does not page). Per-mode capability support is advertised by `GET /v1/schema`. Read-only.',
511
+ 'Search records via `GET /v1/search` (lexical), `/v1/search/semantic`, or `/v1/search/hybrid` per `mode`. Use lexical for exact known terms; semantic is approximate retrieval for conceptual matches. `structuredContent.results` carries the flattened page; `structuredContent.data` carries compact envelope metadata, not a duplicate hit array. Hit ids are self-contained `fetch` handles (the connection is encoded in the id); hits also carry `connection_id` and `connector_key`. Pass `connection_id` to scope, omit to fan in. Page default is 25 hits; `limit` is capped at 100 (enforced at input, and fan-in packages apply it globally). Page forward with `cursor` (lexical/semantic; hybrid does not page). Per-mode capability support is advertised by `GET /v1/schema`. Read-only.',
498
512
  annotations: READ_ONLY_ANNOTATIONS,
499
513
  inputSchema: z
500
514
  .object({
@@ -528,11 +542,14 @@ export function buildTools({ rs, providerUrl }) {
528
542
  name: 'fetch',
529
543
  title: 'Fetch PDPP search result',
530
544
  description:
531
- 'Fetch a single OpenAI-compatible document by a result id from `search`. Id format: `stream:record_id` `GET /v1/streams/{stream}/records/{record_id}`. Returns document fields only (`id`, `title`, `text`, `url`, `metadata`); use `query_records` for canonical PDPP record envelopes. Use `fields` to project the source record before rendering document text/metadata; if the projection excludes every text-like field (`text`, `content`, `body`, `summary`), `text` contains compact JSON for the projected record rather than the full document body. Operational source handles (`id`, stream, `connection_id`, `connector_key`) remain available in `metadata`. On `ambiguous_connection` (409), pick a `connection_id` from `available_connections` in the error and retry. Read-only.',
545
+ 'Fetch a single OpenAI-compatible document by a result id from `search`. Id formats: self-contained `connection_id/stream:record_id` (pass it unchanged — no other argument is needed) or legacy `stream:record_id` plus an optional `connection_id` argument. Both resolve to `GET /v1/streams/{stream}/records/{record_id}`. Returns document fields only (`id`, `title`, `text`, `url`, `metadata`); use `query_records` for canonical PDPP record envelopes. Use `fields` to project the source record before rendering document text/metadata; if the projection excludes every text-like field (`text`, `content`, `body`, `summary`), `text` contains compact JSON for the projected record rather than the full document body. Operational source handles (`id`, stream, `connection_id`, `connector_key`) remain available in `metadata`. On `ambiguous_connection` (409), pick a `connection_id` from `available_connections` in the error and retry. Read-only.',
532
546
  annotations: READ_ONLY_ANNOTATIONS,
533
547
  inputSchema: z
534
548
  .object({
535
- id: z.string().min(1).describe('Search result id, usually `stream:record_id`.'),
549
+ id: z
550
+ .string()
551
+ .min(1)
552
+ .describe('Search result id: `connection_id/stream:record_id` (self-contained) or legacy `stream:record_id`.'),
536
553
  expand: z.array(z.string()).optional().describe(EXPAND_DESCRIPTION),
537
554
  expand_limit: z
538
555
  .record(z.string(), z.number().int().positive())
@@ -545,7 +562,15 @@ export function buildTools({ rs, providerUrl }) {
545
562
  outputSchema: z.object(FETCH_OUTPUT_SCHEMA_SHAPE),
546
563
  handler: async (args) => {
547
564
  const ref = parseRecordResultId(args.id);
565
+ if (ref.connectionId && typeof args?.connection_id === 'string' && args.connection_id !== ref.connectionId) {
566
+ throw new ConflictingConnectionIdError(ref.connectionId, args.connection_id);
567
+ }
548
568
  const query = applyExpandLimitToQuery(pickQuery(args, SUPPORTED_QUERY_KEYS), args?.expand_limit);
569
+ // A self-contained id carries its own connection scope; forward it so a
570
+ // multi-source grant resolves without a second model-carried handle.
571
+ if (ref.connectionId && query.connection_id === undefined) {
572
+ query.connection_id = ref.connectionId;
573
+ }
549
574
  const response = await rs.getJson(
550
575
  `/v1/streams/${encodeURIComponent(ref.stream)}/records/${encodeURIComponent(ref.recordId)}`,
551
576
  { query }
@@ -1576,6 +1601,10 @@ function truncateText(value, limit) {
1576
1601
  const SEARCH_TEXT_PREVIEW_LIMIT = 3;
1577
1602
  const SEARCH_TEXT_SNIPPET_CHAR_LIMIT = 140;
1578
1603
  const SEARCH_RESULT_SNIPPET_CHAR_LIMIT = 320;
1604
+ // A truncated id is a dead fetch handle, so the preview id bound must
1605
+ // comfortably exceed realistic `{connection_id}/{stream}:{record_id}` handles;
1606
+ // it exists only to keep pathological record keys from blowing the text budget.
1607
+ const SEARCH_TEXT_ID_CHAR_LIMIT = 200;
1579
1608
 
1580
1609
  function summarizeSearch(body, results) {
1581
1610
  const hasMore = envelopeField(body, 'has_more') === true ? ' has_more=true.' : '';
@@ -1585,7 +1614,7 @@ function summarizeSearch(body, results) {
1585
1614
  const previews = results.slice(0, SEARCH_TEXT_PREVIEW_LIMIT).map(formatSearchPreviewLine);
1586
1615
  const previewText = previews.length > 0 ? ` Top results:\n${previews.join('\n')}` : '';
1587
1616
  const fetchHint = previews.length > 0
1588
- ? '\nFetch a hit with `fetch` using the shown id; include connection_id when shown.'
1617
+ ? '\nFetch a hit with `fetch` using the shown id as-is; ids are self-contained. Pass connection_id only when shown separately.'
1589
1618
  : '';
1590
1619
  return `search: ${results.length} hit(s).${hasMore}${cursorText}${sourceMixText}${previewText}${fetchHint} Search envelope metadata: structuredContent.data; flattened results: structuredContent.results.`;
1591
1620
  }
@@ -1609,8 +1638,12 @@ function formatSearchSourceMix(body) {
1609
1638
  }
1610
1639
 
1611
1640
  function formatSearchPreviewLine(result, index) {
1612
- const parts = [`${index + 1}. id=${formatInlineValue(truncateText(result.id, 80))}`];
1613
- if (result.connection_id) parts.push(`connection_id=${formatInlineValue(truncateText(result.connection_id, 80))}`);
1641
+ const parts = [`${index + 1}. id=${formatInlineValue(truncateText(result.id, SEARCH_TEXT_ID_CHAR_LIMIT))}`];
1642
+ // The connection is normally embedded in the self-contained id; repeat it as
1643
+ // a separate handle only when the id could not encode it.
1644
+ if (result.connection_id && !String(result.id).startsWith(`${result.connection_id}/`)) {
1645
+ parts.push(`connection_id=${formatInlineValue(truncateText(result.connection_id, 80))}`);
1646
+ }
1614
1647
  if (result.connector_key) parts.push(`connector_key=${formatInlineValue(truncateText(result.connector_key, 60))}`);
1615
1648
  if (result.stream) parts.push(`stream=${formatInlineValue(truncateText(result.stream, 60))}`);
1616
1649
  if (result.title && result.title !== result.id) parts.push(`title=${formatScalar(truncateText(result.title, 80))}`);
@@ -1648,11 +1681,14 @@ function envelopeCount(body) {
1648
1681
  function normalizeSearchResults(body) {
1649
1682
  const candidates = searchCandidatesFromBody(body);
1650
1683
  return candidates.map((hit, index) => {
1651
- const id = resultIdForHit(hit, index);
1652
1684
  const source = objectValue(hit?.source) || {};
1653
1685
  const stream = streamForHit(hit);
1654
1686
  const recordKey = recordKeyForHit(hit);
1655
1687
  const connectionId = firstString(hit?.connection_id, hit?.connector_instance_id, source.connection_id);
1688
+ // The id is the single opaque handle a model carries into `fetch`; encode
1689
+ // the hit's connection so multi-source grants resolve without a second
1690
+ // model-carried `connection_id` field.
1691
+ const id = selfContainedResultId(resultIdForHit(hit, index), connectionId);
1656
1692
  const displayName = firstString(hit?.display_name, source.display_name);
1657
1693
  const connectorKey = firstString(hit?.connector_key, hit?.connector_id, source.connector_key, source.connector_id);
1658
1694
  const snippet = snippetForSearchHit(hit);
@@ -1767,11 +1803,34 @@ function snippetForSearchHit(hit) {
1767
1803
  );
1768
1804
  }
1769
1805
 
1806
+ // Result-id grammar. Two forms round-trip through `search` -> `fetch`:
1807
+ // self-contained: `{connection_id}/{stream}:{record_id}`
1808
+ // legacy: `{stream}:{record_id}`
1809
+ // `/` is the connection separator because `requireSafeName` guarantees it can
1810
+ // never appear inside a connection id, stream name, or record id — so a `/`
1811
+ // unambiguously marks the self-contained form and every legacy id keeps
1812
+ // parsing exactly as before. See:
1813
+ // openspec/changes/make-mcp-result-ids-self-contained
1770
1814
  function parseRecordResultId(id) {
1815
+ if (typeof id !== 'string' || id.length === 0) {
1816
+ throw new Error('id is required');
1817
+ }
1818
+ const connectionSeparator = id.indexOf('/');
1819
+ if (connectionSeparator === -1) {
1820
+ return { ...parseStreamRecordId(id), connectionId: null };
1821
+ }
1822
+ const connectionId = requireSafeName(id.slice(0, connectionSeparator), 'connection_id');
1823
+ return {
1824
+ ...parseStreamRecordId(id.slice(connectionSeparator + 1)),
1825
+ connectionId,
1826
+ };
1827
+ }
1828
+
1829
+ function parseStreamRecordId(id) {
1771
1830
  const value = requireSafeName(id, 'id');
1772
1831
  const separator = value.indexOf(':');
1773
1832
  if (separator <= 0 || separator === value.length - 1) {
1774
- throw new Error('id must use stream:record_id format');
1833
+ throw new Error('id must use connection_id/stream:record_id or stream:record_id format');
1775
1834
  }
1776
1835
  return {
1777
1836
  stream: requireSafeName(value.slice(0, separator), 'stream'),
@@ -1779,6 +1838,20 @@ function parseRecordResultId(id) {
1779
1838
  };
1780
1839
  }
1781
1840
 
1841
+ // Build the self-contained result id for a search hit. Only record-shaped
1842
+ // base ids (`stream:record_id`) are wrapped — opaque fallbacks (URLs,
1843
+ // `result:N`) and ids that already carry a connection segment pass through —
1844
+ // and connection ids that could not survive the grammar (`/` or `:` inside)
1845
+ // never produce a malformed handle.
1846
+ function selfContainedResultId(baseId, connectionId) {
1847
+ if (typeof baseId !== 'string' || baseId.length === 0) return baseId;
1848
+ if (typeof connectionId !== 'string' || connectionId.length === 0) return baseId;
1849
+ if (connectionId.includes('/') || connectionId.includes(':')) return baseId;
1850
+ if (baseId.includes('/')) return baseId;
1851
+ if (!parseRecordResultIdOrNull(baseId)) return baseId;
1852
+ return `${connectionId}/${baseId}`;
1853
+ }
1854
+
1782
1855
  function normalizeFetchedDocument(record, requestedId, providerUrl) {
1783
1856
  const payload = objectValue(record?.data);
1784
1857
  const id =
@@ -1953,7 +2026,10 @@ function urlForRecord(record, fallbackId, providerUrl) {
1953
2026
  const recordRef = parseRecordResultIdOrNull(fallbackId);
1954
2027
  if (recordRef) {
1955
2028
  const base = providerUrl.replace(/\/$/, '');
1956
- return `${base}/v1/streams/${encodeURIComponent(recordRef.stream)}/records/${encodeURIComponent(recordRef.recordId)}`;
2029
+ const recordUrl = `${base}/v1/streams/${encodeURIComponent(recordRef.stream)}/records/${encodeURIComponent(recordRef.recordId)}`;
2030
+ return recordRef.connectionId
2031
+ ? `${recordUrl}?connection_id=${encodeURIComponent(recordRef.connectionId)}`
2032
+ : recordUrl;
1957
2033
  }
1958
2034
  }
1959
2035
  return `pdpp://record/${encodeURIComponent(fallbackId)}`;