@pdpp/mcp-server 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -2
- package/package.json +1 -1
- package/src/server.js +1 -1
- package/src/tools.js +86 -10
package/README.md
CHANGED
|
@@ -70,10 +70,15 @@ Plus one resource template: `pdpp://stream/{name}` → `GET /v1/streams/{name}`.
|
|
|
70
70
|
|
|
71
71
|
`search` preserves the RS envelope in `structuredContent.data` and also returns
|
|
72
72
|
ChatGPT-compatible `structuredContent.results[]` entries with `id`, `title`, `url`,
|
|
73
|
-
and available source handles such as `connection_id`.
|
|
73
|
+
and available source handles such as `connection_id`. Result ids are
|
|
74
|
+
self-contained fetch handles: when a hit carries a connection, the id is
|
|
75
|
+
`connection_id/stream:record_id`, so `fetch(id)` needs no separate
|
|
76
|
+
`connection_id` argument even on multi-source grants. Its `content[]` text also
|
|
74
77
|
previews a bounded set of top hits so clients that cannot inspect structured
|
|
75
78
|
tool output can still fetch a result.
|
|
76
|
-
`fetch` accepts result ids in
|
|
79
|
+
`fetch` accepts result ids in both the self-contained
|
|
80
|
+
`connection_id/stream:record_id` form and the legacy `stream:record_id` form
|
|
81
|
+
(optionally scoped by a `connection_id` argument) and follows the
|
|
77
82
|
MCP/OpenAI search-fetch document contract: `structuredContent` is exactly
|
|
78
83
|
`id`, `title`, `text`, `url`, and `metadata`, and `content[]` contains the same
|
|
79
84
|
object as JSON text for hosts that hide structured output. It does not return a
|
package/package.json
CHANGED
package/src/server.js
CHANGED
|
@@ -18,7 +18,7 @@ export const DEFAULT_SERVER_VERSION = '0.0.0';
|
|
|
18
18
|
// here; tool descriptions stay concise and routing-specific.
|
|
19
19
|
export const PDPP_MCP_INSTRUCTIONS =
|
|
20
20
|
'PDPP tools are grant-scoped. Start with `schema`, then call `schema(stream)` after choosing a stream; add `connection_id` when a stream name appears under multiple sources or before full schema. Use `connection_id` from schema results or `available_connections` errors to disambiguate sources. Filters must be typed objects, not bracket strings. Page and narrow with `limit`, `cursor`, and `fields`; prefer `aggregate` or lexical `search` for exact terms. ' +
|
|
21
|
-
'The configured bearer limits every result; do not use owner or control-plane tokens for normal MCP access. Schema advertises valid fields, filter operators, expand relations, sort/count support, connection identities, and connector keys. Persist `connection_id`, not `grant_id`, across reconnects. ' +
|
|
21
|
+
'The configured bearer limits every result; do not use owner or control-plane tokens for normal MCP access. Schema advertises valid fields, filter operators, expand relations, sort/count support, connection identities, and connector keys. Persist `connection_id`, not `grant_id`, across reconnects. Search result ids are self-contained `fetch` handles; pass them to `fetch` unchanged. ' +
|
|
22
22
|
'`content[]` is the reliable model-visible guide and includes next cursors/bookmarks when present; `structuredContent` is a host-dependent machine envelope, not the only place to find next-step handles.';
|
|
23
23
|
|
|
24
24
|
/**
|
package/src/tools.js
CHANGED
|
@@ -156,6 +156,20 @@ class MalformedExpandLimitError extends Error {
|
|
|
156
156
|
}
|
|
157
157
|
}
|
|
158
158
|
|
|
159
|
+
// Thrown when a self-contained fetch id embeds one connection while the
|
|
160
|
+
// explicit `connection_id` argument names another. Silently preferring either
|
|
161
|
+
// handle could read the wrong source, so the disagreement is rejected with a
|
|
162
|
+
// typed, actionable error (`server.js` `toolHandlerError` reads `.code`).
|
|
163
|
+
class ConflictingConnectionIdError extends Error {
|
|
164
|
+
constructor(embedded, explicit) {
|
|
165
|
+
super(
|
|
166
|
+
`id embeds connection_id '${embedded}' but the connection_id argument is '${explicit}'; pass the self-contained id alone, or make both handles agree`,
|
|
167
|
+
);
|
|
168
|
+
this.name = 'ConflictingConnectionIdError';
|
|
169
|
+
this.code = 'conflicting_connection_id';
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
159
173
|
// Translate a typed filter object into `[bracketKey, value]` query entries the
|
|
160
174
|
// RsClient appends verbatim (`filter[field]=value`, `filter[field][op]=value`).
|
|
161
175
|
function filterObjectToBracketEntries(filter) {
|
|
@@ -273,7 +287,7 @@ const SEARCH_OUTPUT_SCHEMA_SHAPE = {
|
|
|
273
287
|
}).passthrough(),
|
|
274
288
|
)
|
|
275
289
|
.describe(
|
|
276
|
-
'ChatGPT-compatible flattened search results. Each entry carries `id` (
|
|
290
|
+
'ChatGPT-compatible flattened search results. Each entry carries `id` (a self-contained fetch handle, `connection_id/stream:record_id` when the hit has a connection), `title`, `url`, and available source handles such as `connection_id`. Use `data` for compact envelope metadata.',
|
|
277
291
|
),
|
|
278
292
|
};
|
|
279
293
|
|
|
@@ -494,7 +508,7 @@ export function buildTools({ rs, providerUrl }) {
|
|
|
494
508
|
name: 'search',
|
|
495
509
|
title: 'Search PDPP records',
|
|
496
510
|
description:
|
|
497
|
-
'Search records via `GET /v1/search` (lexical), `/v1/search/semantic`, or `/v1/search/hybrid` per `mode`. Use lexical for exact known terms; semantic is approximate retrieval for conceptual matches. `structuredContent.results` carries the flattened page; `structuredContent.data` carries compact envelope metadata, not a duplicate hit array.
|
|
511
|
+
'Search records via `GET /v1/search` (lexical), `/v1/search/semantic`, or `/v1/search/hybrid` per `mode`. Use lexical for exact known terms; semantic is approximate retrieval for conceptual matches. `structuredContent.results` carries the flattened page; `structuredContent.data` carries compact envelope metadata, not a duplicate hit array. Hit ids are self-contained `fetch` handles (the connection is encoded in the id); hits also carry `connection_id` and `connector_key`. Pass `connection_id` to scope, omit to fan in. Page default is 25 hits; `limit` is capped at 100 (enforced at input, and fan-in packages apply it globally). Page forward with `cursor` (lexical/semantic; hybrid does not page). Per-mode capability support is advertised by `GET /v1/schema`. Read-only.',
|
|
498
512
|
annotations: READ_ONLY_ANNOTATIONS,
|
|
499
513
|
inputSchema: z
|
|
500
514
|
.object({
|
|
@@ -528,11 +542,14 @@ export function buildTools({ rs, providerUrl }) {
|
|
|
528
542
|
name: 'fetch',
|
|
529
543
|
title: 'Fetch PDPP search result',
|
|
530
544
|
description:
|
|
531
|
-
'Fetch a single OpenAI-compatible document by a result id from `search`. Id
|
|
545
|
+
'Fetch a single OpenAI-compatible document by a result id from `search`. Id formats: self-contained `connection_id/stream:record_id` (pass it unchanged — no other argument is needed) or legacy `stream:record_id` plus an optional `connection_id` argument. Both resolve to `GET /v1/streams/{stream}/records/{record_id}`. Returns document fields only (`id`, `title`, `text`, `url`, `metadata`); use `query_records` for canonical PDPP record envelopes. Use `fields` to project the source record before rendering document text/metadata; if the projection excludes every text-like field (`text`, `content`, `body`, `summary`), `text` contains compact JSON for the projected record rather than the full document body. Operational source handles (`id`, stream, `connection_id`, `connector_key`) remain available in `metadata`. On `ambiguous_connection` (409), pick a `connection_id` from `available_connections` in the error and retry. Read-only.',
|
|
532
546
|
annotations: READ_ONLY_ANNOTATIONS,
|
|
533
547
|
inputSchema: z
|
|
534
548
|
.object({
|
|
535
|
-
id: z
|
|
549
|
+
id: z
|
|
550
|
+
.string()
|
|
551
|
+
.min(1)
|
|
552
|
+
.describe('Search result id: `connection_id/stream:record_id` (self-contained) or legacy `stream:record_id`.'),
|
|
536
553
|
expand: z.array(z.string()).optional().describe(EXPAND_DESCRIPTION),
|
|
537
554
|
expand_limit: z
|
|
538
555
|
.record(z.string(), z.number().int().positive())
|
|
@@ -545,7 +562,15 @@ export function buildTools({ rs, providerUrl }) {
|
|
|
545
562
|
outputSchema: z.object(FETCH_OUTPUT_SCHEMA_SHAPE),
|
|
546
563
|
handler: async (args) => {
|
|
547
564
|
const ref = parseRecordResultId(args.id);
|
|
565
|
+
if (ref.connectionId && typeof args?.connection_id === 'string' && args.connection_id !== ref.connectionId) {
|
|
566
|
+
throw new ConflictingConnectionIdError(ref.connectionId, args.connection_id);
|
|
567
|
+
}
|
|
548
568
|
const query = applyExpandLimitToQuery(pickQuery(args, SUPPORTED_QUERY_KEYS), args?.expand_limit);
|
|
569
|
+
// A self-contained id carries its own connection scope; forward it so a
|
|
570
|
+
// multi-source grant resolves without a second model-carried handle.
|
|
571
|
+
if (ref.connectionId && query.connection_id === undefined) {
|
|
572
|
+
query.connection_id = ref.connectionId;
|
|
573
|
+
}
|
|
549
574
|
const response = await rs.getJson(
|
|
550
575
|
`/v1/streams/${encodeURIComponent(ref.stream)}/records/${encodeURIComponent(ref.recordId)}`,
|
|
551
576
|
{ query }
|
|
@@ -1576,6 +1601,10 @@ function truncateText(value, limit) {
|
|
|
1576
1601
|
const SEARCH_TEXT_PREVIEW_LIMIT = 3;
|
|
1577
1602
|
const SEARCH_TEXT_SNIPPET_CHAR_LIMIT = 140;
|
|
1578
1603
|
const SEARCH_RESULT_SNIPPET_CHAR_LIMIT = 320;
|
|
1604
|
+
// A truncated id is a dead fetch handle, so the preview id bound must
|
|
1605
|
+
// comfortably exceed realistic `{connection_id}/{stream}:{record_id}` handles;
|
|
1606
|
+
// it exists only to keep pathological record keys from blowing the text budget.
|
|
1607
|
+
const SEARCH_TEXT_ID_CHAR_LIMIT = 200;
|
|
1579
1608
|
|
|
1580
1609
|
function summarizeSearch(body, results) {
|
|
1581
1610
|
const hasMore = envelopeField(body, 'has_more') === true ? ' has_more=true.' : '';
|
|
@@ -1585,7 +1614,7 @@ function summarizeSearch(body, results) {
|
|
|
1585
1614
|
const previews = results.slice(0, SEARCH_TEXT_PREVIEW_LIMIT).map(formatSearchPreviewLine);
|
|
1586
1615
|
const previewText = previews.length > 0 ? ` Top results:\n${previews.join('\n')}` : '';
|
|
1587
1616
|
const fetchHint = previews.length > 0
|
|
1588
|
-
? '\nFetch a hit with `fetch` using the shown id;
|
|
1617
|
+
? '\nFetch a hit with `fetch` using the shown id as-is; ids are self-contained. Pass connection_id only when shown separately.'
|
|
1589
1618
|
: '';
|
|
1590
1619
|
return `search: ${results.length} hit(s).${hasMore}${cursorText}${sourceMixText}${previewText}${fetchHint} Search envelope metadata: structuredContent.data; flattened results: structuredContent.results.`;
|
|
1591
1620
|
}
|
|
@@ -1609,8 +1638,12 @@ function formatSearchSourceMix(body) {
|
|
|
1609
1638
|
}
|
|
1610
1639
|
|
|
1611
1640
|
function formatSearchPreviewLine(result, index) {
|
|
1612
|
-
const parts = [`${index + 1}. id=${formatInlineValue(truncateText(result.id,
|
|
1613
|
-
|
|
1641
|
+
const parts = [`${index + 1}. id=${formatInlineValue(truncateText(result.id, SEARCH_TEXT_ID_CHAR_LIMIT))}`];
|
|
1642
|
+
// The connection is normally embedded in the self-contained id; repeat it as
|
|
1643
|
+
// a separate handle only when the id could not encode it.
|
|
1644
|
+
if (result.connection_id && !String(result.id).startsWith(`${result.connection_id}/`)) {
|
|
1645
|
+
parts.push(`connection_id=${formatInlineValue(truncateText(result.connection_id, 80))}`);
|
|
1646
|
+
}
|
|
1614
1647
|
if (result.connector_key) parts.push(`connector_key=${formatInlineValue(truncateText(result.connector_key, 60))}`);
|
|
1615
1648
|
if (result.stream) parts.push(`stream=${formatInlineValue(truncateText(result.stream, 60))}`);
|
|
1616
1649
|
if (result.title && result.title !== result.id) parts.push(`title=${formatScalar(truncateText(result.title, 80))}`);
|
|
@@ -1648,11 +1681,14 @@ function envelopeCount(body) {
|
|
|
1648
1681
|
function normalizeSearchResults(body) {
|
|
1649
1682
|
const candidates = searchCandidatesFromBody(body);
|
|
1650
1683
|
return candidates.map((hit, index) => {
|
|
1651
|
-
const id = resultIdForHit(hit, index);
|
|
1652
1684
|
const source = objectValue(hit?.source) || {};
|
|
1653
1685
|
const stream = streamForHit(hit);
|
|
1654
1686
|
const recordKey = recordKeyForHit(hit);
|
|
1655
1687
|
const connectionId = firstString(hit?.connection_id, hit?.connector_instance_id, source.connection_id);
|
|
1688
|
+
// The id is the single opaque handle a model carries into `fetch`; encode
|
|
1689
|
+
// the hit's connection so multi-source grants resolve without a second
|
|
1690
|
+
// model-carried `connection_id` field.
|
|
1691
|
+
const id = selfContainedResultId(resultIdForHit(hit, index), connectionId);
|
|
1656
1692
|
const displayName = firstString(hit?.display_name, source.display_name);
|
|
1657
1693
|
const connectorKey = firstString(hit?.connector_key, hit?.connector_id, source.connector_key, source.connector_id);
|
|
1658
1694
|
const snippet = snippetForSearchHit(hit);
|
|
@@ -1767,11 +1803,34 @@ function snippetForSearchHit(hit) {
|
|
|
1767
1803
|
);
|
|
1768
1804
|
}
|
|
1769
1805
|
|
|
1806
|
+
// Result-id grammar. Two forms round-trip through `search` -> `fetch`:
|
|
1807
|
+
// self-contained: `{connection_id}/{stream}:{record_id}`
|
|
1808
|
+
// legacy: `{stream}:{record_id}`
|
|
1809
|
+
// `/` is the connection separator because `requireSafeName` guarantees it can
|
|
1810
|
+
// never appear inside a connection id, stream name, or record id — so a `/`
|
|
1811
|
+
// unambiguously marks the self-contained form and every legacy id keeps
|
|
1812
|
+
// parsing exactly as before. See:
|
|
1813
|
+
// openspec/changes/make-mcp-result-ids-self-contained
|
|
1770
1814
|
function parseRecordResultId(id) {
|
|
1815
|
+
if (typeof id !== 'string' || id.length === 0) {
|
|
1816
|
+
throw new Error('id is required');
|
|
1817
|
+
}
|
|
1818
|
+
const connectionSeparator = id.indexOf('/');
|
|
1819
|
+
if (connectionSeparator === -1) {
|
|
1820
|
+
return { ...parseStreamRecordId(id), connectionId: null };
|
|
1821
|
+
}
|
|
1822
|
+
const connectionId = requireSafeName(id.slice(0, connectionSeparator), 'connection_id');
|
|
1823
|
+
return {
|
|
1824
|
+
...parseStreamRecordId(id.slice(connectionSeparator + 1)),
|
|
1825
|
+
connectionId,
|
|
1826
|
+
};
|
|
1827
|
+
}
|
|
1828
|
+
|
|
1829
|
+
function parseStreamRecordId(id) {
|
|
1771
1830
|
const value = requireSafeName(id, 'id');
|
|
1772
1831
|
const separator = value.indexOf(':');
|
|
1773
1832
|
if (separator <= 0 || separator === value.length - 1) {
|
|
1774
|
-
throw new Error('id must use stream:record_id format');
|
|
1833
|
+
throw new Error('id must use connection_id/stream:record_id or stream:record_id format');
|
|
1775
1834
|
}
|
|
1776
1835
|
return {
|
|
1777
1836
|
stream: requireSafeName(value.slice(0, separator), 'stream'),
|
|
@@ -1779,6 +1838,20 @@ function parseRecordResultId(id) {
|
|
|
1779
1838
|
};
|
|
1780
1839
|
}
|
|
1781
1840
|
|
|
1841
|
+
// Build the self-contained result id for a search hit. Only record-shaped
|
|
1842
|
+
// base ids (`stream:record_id`) are wrapped — opaque fallbacks (URLs,
|
|
1843
|
+
// `result:N`) and ids that already carry a connection segment pass through —
|
|
1844
|
+
// and connection ids that could not survive the grammar (`/` or `:` inside)
|
|
1845
|
+
// never produce a malformed handle.
|
|
1846
|
+
function selfContainedResultId(baseId, connectionId) {
|
|
1847
|
+
if (typeof baseId !== 'string' || baseId.length === 0) return baseId;
|
|
1848
|
+
if (typeof connectionId !== 'string' || connectionId.length === 0) return baseId;
|
|
1849
|
+
if (connectionId.includes('/') || connectionId.includes(':')) return baseId;
|
|
1850
|
+
if (baseId.includes('/')) return baseId;
|
|
1851
|
+
if (!parseRecordResultIdOrNull(baseId)) return baseId;
|
|
1852
|
+
return `${connectionId}/${baseId}`;
|
|
1853
|
+
}
|
|
1854
|
+
|
|
1782
1855
|
function normalizeFetchedDocument(record, requestedId, providerUrl) {
|
|
1783
1856
|
const payload = objectValue(record?.data);
|
|
1784
1857
|
const id =
|
|
@@ -1953,7 +2026,10 @@ function urlForRecord(record, fallbackId, providerUrl) {
|
|
|
1953
2026
|
const recordRef = parseRecordResultIdOrNull(fallbackId);
|
|
1954
2027
|
if (recordRef) {
|
|
1955
2028
|
const base = providerUrl.replace(/\/$/, '');
|
|
1956
|
-
|
|
2029
|
+
const recordUrl = `${base}/v1/streams/${encodeURIComponent(recordRef.stream)}/records/${encodeURIComponent(recordRef.recordId)}`;
|
|
2030
|
+
return recordRef.connectionId
|
|
2031
|
+
? `${recordUrl}?connection_id=${encodeURIComponent(recordRef.connectionId)}`
|
|
2032
|
+
: recordUrl;
|
|
1957
2033
|
}
|
|
1958
2034
|
}
|
|
1959
2035
|
return `pdpp://record/${encodeURIComponent(fallbackId)}`;
|