@pdpp/mcp-server 0.0.0 → 0.1.0-beta.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +213 -0
- package/bin/pdpp-mcp-server.js +12 -0
- package/package.json +47 -3
- package/src/credentials.js +99 -0
- package/src/index.js +141 -0
- package/src/rs-client.js +167 -0
- package/src/server.js +162 -0
- package/src/tools.js +2074 -0
package/src/tools.js
ADDED
|
@@ -0,0 +1,2074 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
const READ_ONLY_ANNOTATIONS = {
|
|
4
|
+
readOnlyHint: true,
|
|
5
|
+
destructiveHint: false,
|
|
6
|
+
idempotentHint: true,
|
|
7
|
+
openWorldHint: false,
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export const PDPP_MCP_TOOL_NAMES = Object.freeze([
|
|
11
|
+
'schema',
|
|
12
|
+
'query_records',
|
|
13
|
+
'aggregate',
|
|
14
|
+
'search',
|
|
15
|
+
'fetch',
|
|
16
|
+
]);
|
|
17
|
+
|
|
18
|
+
function selectNormalTools(tools) {
|
|
19
|
+
const expectedNames = PDPP_MCP_TOOL_NAMES;
|
|
20
|
+
const expected = new Set(expectedNames);
|
|
21
|
+
const selected = tools.filter((tool) => expected.has(tool.name));
|
|
22
|
+
const selectedNames = new Set(selected.map((tool) => tool.name));
|
|
23
|
+
const missing = expectedNames.filter((name) => !selectedNames.has(name));
|
|
24
|
+
if (missing.length > 0) {
|
|
25
|
+
throw new Error(`MCP normal surface is missing expected tools: ${missing.join(', ')}`);
|
|
26
|
+
}
|
|
27
|
+
const unexpected = tools.map((tool) => tool.name).filter((name) => !expected.has(name));
|
|
28
|
+
if (unexpected.length > 0) {
|
|
29
|
+
throw new Error(`MCP normal surface has unexpected tools: ${unexpected.join(', ')}`);
|
|
30
|
+
}
|
|
31
|
+
return selected;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// MCP-exposed subset of the REST public read query-param vocabulary. These
|
|
35
|
+
// keys are forwarded to the RS; the MCP layer never silently drops a member.
|
|
36
|
+
// `sort` and `count` are canonical public read primitives advertised by
|
|
37
|
+
// `GET /v1/schema` and implemented by the reference runtime where declared.
|
|
38
|
+
const SUPPORTED_QUERY_KEYS = new Set([
|
|
39
|
+
'limit',
|
|
40
|
+
'cursor',
|
|
41
|
+
'order',
|
|
42
|
+
'sort',
|
|
43
|
+
'count',
|
|
44
|
+
'filter',
|
|
45
|
+
'fields',
|
|
46
|
+
'view',
|
|
47
|
+
'expand',
|
|
48
|
+
'expand_limit',
|
|
49
|
+
'changes_since',
|
|
50
|
+
// Optional public connection identity. Forwarded verbatim to the RS so
|
|
51
|
+
// the resource server enforces grant scope; the MCP layer never invents
|
|
52
|
+
// or rewrites a connection_id. See:
|
|
53
|
+
// openspec/changes/expose-connection-identity-on-public-read
|
|
54
|
+
'connection_id',
|
|
55
|
+
]);
|
|
56
|
+
|
|
57
|
+
// Mirror of the REST aggregate query-param vocabulary
|
|
58
|
+
// (`/v1/streams/{stream}/aggregate`). Forwarded verbatim to the RS so the
|
|
59
|
+
// resource server owns metric/grouping validation; the MCP layer never
|
|
60
|
+
// silently drops a member. See:
|
|
61
|
+
// openspec/changes/add-aggregate-time-buckets-and-distinct
|
|
62
|
+
const SUPPORTED_AGGREGATE_QUERY_KEYS = new Set([
|
|
63
|
+
'metric',
|
|
64
|
+
'field',
|
|
65
|
+
'group_by',
|
|
66
|
+
'group_by_time',
|
|
67
|
+
'granularity',
|
|
68
|
+
'time_zone',
|
|
69
|
+
'limit',
|
|
70
|
+
'filter',
|
|
71
|
+
'connection_id',
|
|
72
|
+
]);
|
|
73
|
+
|
|
74
|
+
const CONNECTION_ID_DESCRIPTION =
|
|
75
|
+
'Optional. Scope this call to one connection. Omit to fan in across all granted connections. Obtain from `schema` or the `available_connections` field in a typed 409 error — each entry includes `connector_key` and `connection_id`. Persist `connection_id` (not `grant_id`) across reconnects.';
|
|
76
|
+
|
|
77
|
+
const LIMIT_DESCRIPTION =
|
|
78
|
+
'Records per page. Omit for the default page of 25; the maximum is 100 (the spec-core §8 contract). Values above 100 are rejected here rather than silently clamped, so the page size you request is always the page size you get. Page forward with the returned `cursor` instead of asking for a larger page.';
|
|
79
|
+
|
|
80
|
+
const SEARCH_LIMIT_DESCRIPTION =
|
|
81
|
+
'Hits per page. Omit for the default page of 25; the maximum is 100 — the bound the published `/v1/search`, `/v1/search/semantic`, and `/v1/search/hybrid` contract declares and every mode honors (mirrored as `capabilities.{lexical,semantic,hybrid}_retrieval.max_limit` in `/.well-known/oauth-protected-resource` and `GET /v1/schema`). Values above 100 are rejected here rather than forwarded to be silently clamped by the RS, so the page size you request is always the page size you get. Page forward with the returned `cursor` (lexical and semantic page; hybrid does not) instead of asking for a larger page.';
|
|
82
|
+
|
|
83
|
+
const FIELDS_DESCRIPTION =
|
|
84
|
+
'Field allowlist for projection. Field paths must be declared by the stream; advertised by `GET /v1/schema` (`field_capabilities`). Unknown paths are rejected by the RS rather than silently widened.';
|
|
85
|
+
|
|
86
|
+
const VIEW_DESCRIPTION =
|
|
87
|
+
'Named projection. A stream-declared view id (advertised by `GET /v1/schema` under each stream\'s `views`) that projects the returned records down to the view\'s field set. Mutually exclusive with `fields` (passing both is rejected by the RS); an unknown view id is rejected rather than silently ignored. Use `view` for a curated projection and `fields` for an ad-hoc one.';
|
|
88
|
+
|
|
89
|
+
const FILTER_DESCRIPTION =
|
|
90
|
+
'Typed per-field filter. Pass an OBJECT keyed by field name — never a pre-encoded query string. Exact match: `{ "user_id": "U123" }`. Range: `{ "created_at": { "gte": "2026-01-01T00:00:00Z", "lt": "2026-02-01T00:00:00Z" } }`, where the operator is one of `gte`, `gt`, `lte`, `lt`. Multiple fields AND together. The adapter encodes this into the RS `filter[field]=value` / `filter[field][op]=value` query shape for you. Allowed fields and operators are advertised by `GET /v1/schema` (`field_capabilities`); unsupported fields or operators are rejected by the RS rather than silently ignored.';
|
|
91
|
+
|
|
92
|
+
const EXPAND_DESCRIPTION =
|
|
93
|
+
'One-hop inline expansion list. Each entry is a manifest-declared parent-to-child relation. Expandable relations and per-relation `expand_limit` caps are advertised by `GET /v1/schema` (`expand_capabilities`); unadvertised relations are rejected by the RS.';
|
|
94
|
+
|
|
95
|
+
const EXPAND_LIMIT_DESCRIPTION =
|
|
96
|
+
'Typed per-relation cap for has-many expansion, keyed by relation name. Pass an object such as `{ "messages": 3 }`; the adapter encodes it into the RS `expand_limit[relation]=N` query shape. The RS clamps to the per-relation `max_limit` advertised by `GET /v1/schema`.';
|
|
97
|
+
|
|
98
|
+
const ORDER_DESCRIPTION =
|
|
99
|
+
'Legacy page order for cursor-based pagination: `asc` or `desc`. Prefer canonical `sort` when `/v1/schema` advertises sortable fields; `order` remains accepted for clients that have not migrated.';
|
|
100
|
+
|
|
101
|
+
const SORT_DESCRIPTION =
|
|
102
|
+
'Canonical sign-prefix sort spec advertised by `GET /v1/schema` (e.g. `sort=-emitted_at`). The reference runtime supports the advertised cursor field; unsupported fields, conflicting directions, or sort/order disagreement are rejected with typed errors rather than treated as no-ops.';
|
|
103
|
+
|
|
104
|
+
const COUNT_DESCRIPTION =
|
|
105
|
+
'Canonical opt-in count grade (`none`, `estimated`, `exact`). Omit or use `none` for no count. `exact` returns `meta.count.kind="exact"` when supported; `estimated` may be upgraded to an exact count. Counts are page-independent and may be more expensive than the page itself.';
|
|
106
|
+
|
|
107
|
+
const CHANGES_SINCE_DESCRIPTION =
|
|
108
|
+
'Projection-safe incremental-sync bookmark. Use `beginning` for the initial changes feed, then pass the opaque `next_changes_since` value returned in the prior response. Do not pass an ISO timestamp; malformed bookmarks are rejected as `invalid_cursor`.';
|
|
109
|
+
|
|
110
|
+
// Supported range operators, mirroring the RS (`record-filters.js`
|
|
111
|
+
// SUPPORTED_RANGE_OPERATORS) and the published query contract
|
|
112
|
+
// (`apps/site/content/docs/spec-data-query-api.md`).
|
|
113
|
+
const SUPPORTED_RANGE_OPERATORS = new Set(['gte', 'gt', 'lte', 'lt']);
|
|
114
|
+
|
|
115
|
+
// A single exact-filter value. The RS coerces by the field's declared JSON
|
|
116
|
+
// Schema type, so a scalar is the only meaningful shape; arrays/objects are not
|
|
117
|
+
// exact matches.
|
|
118
|
+
const FilterScalar = z.union([z.string(), z.number(), z.boolean()]);
|
|
119
|
+
|
|
120
|
+
// Typed filter input object. Each field maps either to a scalar (exact match)
|
|
121
|
+
// or to a range object keyed by `gte`/`gt`/`lte`/`lt`. This mirrors the parsed
|
|
122
|
+
// shape the RS receives from `qs.parse(filter[field][op]=value)`, so the
|
|
123
|
+
// adapter can encode it back into bracket query params with no semantic
|
|
124
|
+
// invention.
|
|
125
|
+
const TypedFilterInput = z.record(
|
|
126
|
+
z.string().min(1),
|
|
127
|
+
z.union([
|
|
128
|
+
FilterScalar,
|
|
129
|
+
z
|
|
130
|
+
.object({
|
|
131
|
+
gte: FilterScalar.optional(),
|
|
132
|
+
gt: FilterScalar.optional(),
|
|
133
|
+
lte: FilterScalar.optional(),
|
|
134
|
+
lt: FilterScalar.optional(),
|
|
135
|
+
})
|
|
136
|
+
.strict(),
|
|
137
|
+
]),
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
// Thrown when a typed filter object is structurally ambiguous. Surfaced as a
|
|
141
|
+
// typed MCP tool error (`server.js` `toolHandlerError` reads `.code`) so the
|
|
142
|
+
// agent gets an actionable instruction instead of a silently-ignored filter.
|
|
143
|
+
class MalformedFilterError extends Error {
|
|
144
|
+
constructor(message) {
|
|
145
|
+
super(message);
|
|
146
|
+
this.name = 'MalformedFilterError';
|
|
147
|
+
this.code = 'invalid_filter';
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
class MalformedExpandLimitError extends Error {
|
|
152
|
+
constructor(message) {
|
|
153
|
+
super(message);
|
|
154
|
+
this.name = 'MalformedExpandLimitError';
|
|
155
|
+
this.code = 'invalid_expand';
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Translate a typed filter object into `[bracketKey, value]` query entries the
|
|
160
|
+
// RsClient appends verbatim (`filter[field]=value`, `filter[field][op]=value`).
|
|
161
|
+
function filterObjectToBracketEntries(filter) {
|
|
162
|
+
if (Object.keys(filter).length === 0) {
|
|
163
|
+
throw new MalformedFilterError(
|
|
164
|
+
'filter object must include at least one field; omit filter entirely or pass a typed object such as filter: { "field": "value" }',
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
const entries = [];
|
|
168
|
+
for (const [field, spec] of Object.entries(filter)) {
|
|
169
|
+
if (field.includes('[') || field.includes(']')) {
|
|
170
|
+
throw new MalformedFilterError(
|
|
171
|
+
`filter field '${field}' must be an advertised field name, not pre-encoded bracket syntax; pass filter: { "field": "value" }`,
|
|
172
|
+
);
|
|
173
|
+
}
|
|
174
|
+
if (spec === undefined || spec === null) continue;
|
|
175
|
+
if (typeof spec === 'object' && !Array.isArray(spec)) {
|
|
176
|
+
const opEntries = Object.entries(spec).filter(([, v]) => v !== undefined && v !== null);
|
|
177
|
+
if (opEntries.length === 0) {
|
|
178
|
+
throw new MalformedFilterError(
|
|
179
|
+
`filter range on '${field}' must include at least one of gte/gt/lte/lt; use the typed filter object, e.g. filter: { "${field}": { "gte": <value> } }`,
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
for (const [op, value] of opEntries) {
|
|
183
|
+
if (!SUPPORTED_RANGE_OPERATORS.has(op)) {
|
|
184
|
+
throw new MalformedFilterError(
|
|
185
|
+
`unsupported range operator '${op}' on '${field}'; supported operators are gte, gt, lte, lt`,
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
entries.push([`filter[${field}][${op}]`, String(value)]);
|
|
189
|
+
}
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
// Scalar exact match.
|
|
193
|
+
entries.push([`filter[${field}]`, String(spec)]);
|
|
194
|
+
}
|
|
195
|
+
return entries;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Resolve the tool `filter` argument into the `filter[...]` query entries the
|
|
199
|
+
// RS expects. Returns [] when no filter was supplied.
|
|
200
|
+
function resolveFilterQueryEntries(filter) {
|
|
201
|
+
if (filter === undefined || filter === null) return [];
|
|
202
|
+
if (typeof filter === 'object' && !Array.isArray(filter)) {
|
|
203
|
+
return filterObjectToBracketEntries(filter);
|
|
204
|
+
}
|
|
205
|
+
throw new MalformedFilterError(
|
|
206
|
+
'filter must be a typed object, e.g. filter: { "field": "value" } or filter: { "field": { "gte": <value> } }',
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// Merge resolved filter bracket entries into a query object built by
|
|
211
|
+
// `pickQuery` (which deliberately drops the raw `filter` key). Mutates and
|
|
212
|
+
// returns `query` for call-site brevity.
|
|
213
|
+
function applyFilterToQuery(query, filter) {
|
|
214
|
+
for (const [key, value] of resolveFilterQueryEntries(filter)) {
|
|
215
|
+
query[key] = value;
|
|
216
|
+
}
|
|
217
|
+
return query;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function applyExpandLimitToQuery(query, expandLimit) {
|
|
221
|
+
if (expandLimit === undefined || expandLimit === null) return query;
|
|
222
|
+
const entries = Object.entries(expandLimit);
|
|
223
|
+
if (entries.length === 0) {
|
|
224
|
+
throw new MalformedExpandLimitError(
|
|
225
|
+
'expand_limit must include at least one relation; omit expand_limit entirely when not setting a cap',
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
for (const [relation, limit] of entries) {
|
|
229
|
+
if (relation.includes('[') || relation.includes(']')) {
|
|
230
|
+
throw new MalformedExpandLimitError(
|
|
231
|
+
`expand_limit relation '${relation}' must be a relation name, not pre-encoded bracket syntax; pass expand_limit: { "relation": 3 }`,
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
query[`expand_limit[${relation}]`] = String(limit);
|
|
235
|
+
}
|
|
236
|
+
return query;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const ConnectionIdInputShape = {
|
|
240
|
+
connection_id: z.string().min(1).describe(CONNECTION_ID_DESCRIPTION).optional(),
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
// Canonical envelope summary referenced from tool descriptions. Kept terse to
|
|
244
|
+
// stay within MCP token-budget norms; the authoritative schema vocabulary
|
|
245
|
+
// lives at `GET /v1/schema` and in the OpenAPI artifacts published by the
|
|
246
|
+
// reference-contract package.
|
|
247
|
+
const CANONICAL_SCHEMA_HINT =
|
|
248
|
+
'Per-stream filter operators, expandable relations, projection support, search modes, count support, granted `connection_id` values, and canonical `connector_key` metadata are advertised by `GET /v1/schema`. Consult it before constructing filter, sort, expand, fields, count, or source-disambiguation arguments.';
|
|
249
|
+
|
|
250
|
+
// outputSchema describes the MCP wrapper around the RS response body. We do
|
|
251
|
+
// NOT bake the RS body shape into the outputSchema because the canonical
|
|
252
|
+
// envelope is the contract source of truth and the RS still ships legacy
|
|
253
|
+
// envelopes during the migration window. Validating `data` as a generic
|
|
254
|
+
// object keeps the MCP wrapper honest without over-promising RS structure.
|
|
255
|
+
const READ_OUTPUT_SCHEMA_SHAPE = {
|
|
256
|
+
data: z
|
|
257
|
+
.union([z.record(z.string(), z.unknown()), z.array(z.unknown())])
|
|
258
|
+
.describe(
|
|
259
|
+
'Canonical RS response body. Follows the public read envelope advertised by `GET /v1/schema` plus operation-specific extensions; source metadata uses canonical `connector_key` and concrete `connection_id` values when present.',
|
|
260
|
+
),
|
|
261
|
+
provider_url: z.string().describe('RS base URL the MCP server was configured with.'),
|
|
262
|
+
request_id: z.string().nullable().describe('RS x-request-id when present.'),
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
const SEARCH_OUTPUT_SCHEMA_SHAPE = {
|
|
266
|
+
...READ_OUTPUT_SCHEMA_SHAPE,
|
|
267
|
+
results: z
|
|
268
|
+
.array(
|
|
269
|
+
z.object({
|
|
270
|
+
id: z.string(),
|
|
271
|
+
title: z.string(),
|
|
272
|
+
url: z.string(),
|
|
273
|
+
}).passthrough(),
|
|
274
|
+
)
|
|
275
|
+
.describe(
|
|
276
|
+
'ChatGPT-compatible flattened search results. Each entry carries `id` (default `stream:record_id`), `title`, `url`, and available source handles such as `connection_id`. Use `data` for compact envelope metadata.',
|
|
277
|
+
),
|
|
278
|
+
};
|
|
279
|
+
|
|
280
|
+
const FETCH_OUTPUT_SCHEMA_SHAPE = {
|
|
281
|
+
id: z.string(),
|
|
282
|
+
title: z.string(),
|
|
283
|
+
text: z.string(),
|
|
284
|
+
url: z.string(),
|
|
285
|
+
metadata: z.record(z.string(), z.unknown()),
|
|
286
|
+
};
|
|
287
|
+
|
|
288
|
+
const DISCOVERY_STREAM_SUMMARY_LIMIT = 50;
|
|
289
|
+
const DISCOVERY_FIELD_SUMMARY_LIMIT = 16;
|
|
290
|
+
const DISCOVERY_CONNECTION_SUMMARY_LIMIT = 8;
|
|
291
|
+
const FIELD_CAPABILITY_FLAG_LEGEND = {
|
|
292
|
+
t: 'declared type',
|
|
293
|
+
eq: 'exact filter supported',
|
|
294
|
+
r: 'range filter operators',
|
|
295
|
+
lex: 'lexical search field',
|
|
296
|
+
sem: 'semantic search field',
|
|
297
|
+
a: 'aggregation capabilities',
|
|
298
|
+
'g=false': 'field is not granted',
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
// The `schema` tool's default `structuredContent.data` is a COMPACT projection
|
|
302
|
+
// of the RS `/v1/schema` document, not the verbatim body. A real owner's
|
|
303
|
+
// grant-scoped schema can exceed 2 MB once every connector advertises
|
|
304
|
+
// per-field JSON Schema, so returning it verbatim as the default agent-facing
|
|
305
|
+
// payload blows the context budget. The compact projection keeps the discovery
|
|
306
|
+
// path `schema -> schema(stream) -> schema(stream, connection_id) -> query_records`
|
|
307
|
+
// cheap by dropping the heavy per-field JSON Schema blobs while preserving the
|
|
308
|
+
// capability flags, connection identity, and connector metadata an agent needs
|
|
309
|
+
// to build a query.
|
|
310
|
+
// Exhaustive JSON remains available for one source via
|
|
311
|
+
// `schema(stream, connection_id, detail: "full")`. See:
|
|
312
|
+
// openspec/changes/expose-connection-identity-on-public-read/tasks.md (§7
|
|
313
|
+
// MCP discovery/schema token-efficiency target).
|
|
314
|
+
const SCHEMA_DETAIL_DESCRIPTION =
|
|
315
|
+
'Response detail grade for `structuredContent.data`. `compact` (default) returns a token-efficient projection: per-stream field names with capability flags, expandable relation names, connection identities, and connector metadata, with the heavy per-field JSON Schema blobs dropped. `full` returns deduped exhaustive schema for one source, preserving raw per-field JSON Schema while removing duplicate top-level stream arrays; it requires `stream`, and `connection_id` when that stream name is shared. The concise `content[]` text summary is identical for both grades.';
|
|
316
|
+
|
|
317
|
+
const SCHEMA_STREAM_DESCRIPTION =
|
|
318
|
+
'Optional stream name from the compact `schema` stream list. Omit to describe every granted stream. Stream names are not globally unique; pair with `connection_id` when you need one configured source.';
|
|
319
|
+
|
|
320
|
+
const SCHEMA_CONNECTION_ID_DESCRIPTION =
|
|
321
|
+
'Optional. Scope schema detail to one configured connection when a stream name is shared by multiple connectors or connections. Obtain from schema results or typed ambiguity errors. This is source identity, not a profile selector.';
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Resolve the `schema` tool `detail` grade defensively. Absent → the compact
|
|
325
|
+
* default; the two valid grades pass through; anything else throws rather than
|
|
326
|
+
* silently coercing to `compact` (defense-in-depth behind the Zod enum).
|
|
327
|
+
*/
|
|
328
|
+
function resolveSchemaDetail(value) {
|
|
329
|
+
if (value == null) return 'compact';
|
|
330
|
+
if (value === 'compact' || value === 'full') return value;
|
|
331
|
+
throw new Error(`Invalid schema detail: ${JSON.stringify(value)} (expected 'compact' or 'full')`);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Build the static tool definitions. Descriptions are constant — they are never derived
|
|
336
|
+
* from manifest, stream, or record data. RS payloads are returned as data; nothing is
|
|
337
|
+
* interpolated into instructions to the model.
|
|
338
|
+
*/
|
|
339
|
+
export function buildTools({ rs, providerUrl }) {
|
|
340
|
+
const tools = [
|
|
341
|
+
{
|
|
342
|
+
name: 'schema',
|
|
343
|
+
title: 'Get PDPP schema',
|
|
344
|
+
description:
|
|
345
|
+
'Return the grant-scoped PDPP schema document from `GET /v1/schema`. This is the canonical capability source: streams, canonical connector-type metadata (`connector_key`), per-field filter operators (`field_capabilities`), expandable relations (`expand_capabilities`), projection support, search modes, pagination support, count support, and granted connection identities (`connection_id`, `display_name`). Defaults to a compact, token-efficient projection (`detail: "compact"`) so the `schema -> schema(stream) -> schema(stream, connection_id) -> query_records` discovery path stays cheap. Stream names are not globally unique; add `connection_id` to narrow a shared stream to one configured source. `detail: "full"` is allowed only with `stream` and returns deduped exhaustive schema for matching stream rows, preserving raw per-field JSON Schema without duplicate stream arrays. Call this before issuing other tools to discover valid filter, sort, expand, fields, count, aggregate, stream, and connection-disambiguation arguments. Read-only.',
|
|
346
|
+
annotations: READ_ONLY_ANNOTATIONS,
|
|
347
|
+
inputSchema: z
|
|
348
|
+
.object({
|
|
349
|
+
detail: z.enum(['compact', 'full']).optional().describe(SCHEMA_DETAIL_DESCRIPTION),
|
|
350
|
+
stream: z.string().min(1).optional().describe(SCHEMA_STREAM_DESCRIPTION),
|
|
351
|
+
connection_id: z.string().min(1).optional().describe(SCHEMA_CONNECTION_ID_DESCRIPTION),
|
|
352
|
+
})
|
|
353
|
+
.strict(),
|
|
354
|
+
outputSchema: z.object(READ_OUTPUT_SCHEMA_SHAPE),
|
|
355
|
+
handler: async (args) => {
|
|
356
|
+
const stream = args?.stream ? requireSafeName(args.stream, 'stream') : null;
|
|
357
|
+
const connectionId = args?.connection_id ? requireSafeName(args.connection_id, 'connection_id') : null;
|
|
358
|
+
// `detail` is normally constrained by the Zod enum to `compact|full`,
|
|
359
|
+
// so a direct MCP call can only land here with `'compact'`, `'full'`,
|
|
360
|
+
// or `undefined` (→ compact default). Resolve it defensively rather
|
|
361
|
+
// than coercing any non-`full` value to `compact`: an unexpected value
|
|
362
|
+
// (a future enum loosening, or a caller that bypassed the Zod parse)
|
|
363
|
+
// fails loudly here instead of silently downgrading the response grade.
|
|
364
|
+
const detail = resolveSchemaDetail(args?.detail);
|
|
365
|
+
if (detail === 'compact') {
|
|
366
|
+
const compactResponse = await rs.getJson('/v1/schema', {
|
|
367
|
+
query: { view: 'compact', ...(stream ? { stream } : {}), ...(connectionId ? { connection_id: connectionId } : {}) },
|
|
368
|
+
});
|
|
369
|
+
if (compactResponse.ok) {
|
|
370
|
+
return toSchemaToolResult(compactResponse, providerUrl, {
|
|
371
|
+
detail,
|
|
372
|
+
stream,
|
|
373
|
+
connectionId,
|
|
374
|
+
alreadyCompact: isCompactSchemaBody(compactResponse.body),
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
if (!shouldFallbackFromCompactSchemaRequest(compactResponse)) {
|
|
378
|
+
return toSchemaToolResult(compactResponse, providerUrl, { detail, stream, connectionId });
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
const response = await rs.getJson('/v1/schema', {
|
|
382
|
+
query: {
|
|
383
|
+
...(detail === 'full' ? { detail: 'full' } : {}),
|
|
384
|
+
...(stream ? { stream } : {}),
|
|
385
|
+
...(connectionId ? { connection_id: connectionId } : {}),
|
|
386
|
+
},
|
|
387
|
+
});
|
|
388
|
+
return toSchemaToolResult(response, providerUrl, { detail, stream, connectionId });
|
|
389
|
+
},
|
|
390
|
+
},
|
|
391
|
+
{
|
|
392
|
+
name: 'query_records',
|
|
393
|
+
title: 'Query PDPP records',
|
|
394
|
+
description:
|
|
395
|
+
'Query records in a stream via `GET /v1/streams/{stream}/records`. Default returns at most 25 records; `limit` is capped at 100 (enforced at input — a REST client that sends `limit>100` gets `limit_clamped` in `meta.warnings[]`). Page forward with `cursor`; narrow with `fields`. `structuredContent.data` carries the machine envelope; with `fields`, record payloads are narrowed to those fields plus required operational handles. `content[]` previews up to the first 5 records. Forwards all args verbatim. ' +
|
|
396
|
+
CANONICAL_SCHEMA_HINT +
|
|
397
|
+
' Read-only.',
|
|
398
|
+
annotations: READ_ONLY_ANNOTATIONS,
|
|
399
|
+
inputSchema: z
|
|
400
|
+
.object({
|
|
401
|
+
stream: z.string().min(1).describe('Stream name advertised by `schema`.'),
|
|
402
|
+
limit: z.number().int().positive().max(100).optional().describe(LIMIT_DESCRIPTION),
|
|
403
|
+
cursor: z.string().optional(),
|
|
404
|
+
order: z.string().optional().describe(ORDER_DESCRIPTION),
|
|
405
|
+
sort: z.string().optional().describe(SORT_DESCRIPTION),
|
|
406
|
+
count: z.enum(['none', 'estimated', 'exact']).optional().describe(COUNT_DESCRIPTION),
|
|
407
|
+
filter: TypedFilterInput.optional().describe(FILTER_DESCRIPTION),
|
|
408
|
+
fields: z.array(z.string()).optional().describe(FIELDS_DESCRIPTION),
|
|
409
|
+
view: z.string().optional().describe(VIEW_DESCRIPTION),
|
|
410
|
+
expand: z.array(z.string()).optional().describe(EXPAND_DESCRIPTION),
|
|
411
|
+
expand_limit: z
|
|
412
|
+
.record(z.string(), z.number().int().positive())
|
|
413
|
+
.optional()
|
|
414
|
+
.describe(EXPAND_LIMIT_DESCRIPTION),
|
|
415
|
+
changes_since: z.string().optional().describe(CHANGES_SINCE_DESCRIPTION),
|
|
416
|
+
...ConnectionIdInputShape,
|
|
417
|
+
})
|
|
418
|
+
.strict(),
|
|
419
|
+
outputSchema: z.object(READ_OUTPUT_SCHEMA_SHAPE),
|
|
420
|
+
handler: async (args) => {
|
|
421
|
+
const stream = requireSafeName(args?.stream, 'stream');
|
|
422
|
+
const query = applyExpandLimitToQuery(
|
|
423
|
+
applyFilterToQuery(pickQuery(args, SUPPORTED_QUERY_KEYS), args?.filter),
|
|
424
|
+
args?.expand_limit,
|
|
425
|
+
);
|
|
426
|
+
const response = await rs.getJson(`/v1/streams/${encodeURIComponent(stream)}/records`, {
|
|
427
|
+
query,
|
|
428
|
+
});
|
|
429
|
+
return toToolResult(response, providerUrl, `records from stream "${stream}"`, {
|
|
430
|
+
previewRecords: true,
|
|
431
|
+
});
|
|
432
|
+
},
|
|
433
|
+
},
|
|
434
|
+
{
|
|
435
|
+
name: 'aggregate',
|
|
436
|
+
title: 'Aggregate PDPP records',
|
|
437
|
+
description:
|
|
438
|
+
'Compute a single-stream aggregation via `GET /v1/streams/{stream}/aggregate`. Prefer over `query_records` when you only need a count, sum, min/max, distinct count, or grouped/time-bucketed rollup — returns small bucket rows, never record bodies. Metrics: `count`, `sum`, `min`, `max`, `count_distinct` (`field` required for all but `count`). Group with one dimension: `group_by` XOR `group_by_time` (requires `granularity`). Groupable fields are advertised by `GET /v1/schema`. Forwards args verbatim. ' +
|
|
439
|
+
CANONICAL_SCHEMA_HINT +
|
|
440
|
+
' Read-only.',
|
|
441
|
+
annotations: READ_ONLY_ANNOTATIONS,
|
|
442
|
+
inputSchema: z
|
|
443
|
+
.object({
|
|
444
|
+
stream: z.string().min(1).describe('Stream name advertised by `schema`.'),
|
|
445
|
+
metric: z
|
|
446
|
+
.enum(['count', 'sum', 'min', 'max', 'count_distinct'])
|
|
447
|
+
.describe('Aggregation metric. `field` is required for sum, min, max, and count_distinct.'),
|
|
448
|
+
field: z
|
|
449
|
+
.string()
|
|
450
|
+
.min(1)
|
|
451
|
+
.optional()
|
|
452
|
+
.describe('Target field for sum/min/max/count_distinct. Must be declared for the metric in `GET /v1/schema`.'),
|
|
453
|
+
group_by: z
|
|
454
|
+
.string()
|
|
455
|
+
.min(1)
|
|
456
|
+
.optional()
|
|
457
|
+
.describe('Scalar field to group counts by. Mutually exclusive with `group_by_time`.'),
|
|
458
|
+
group_by_time: z
|
|
459
|
+
.string()
|
|
460
|
+
.min(1)
|
|
461
|
+
.optional()
|
|
462
|
+
.describe('Declared date/date-time field to bucket counts by. Requires `granularity`. Mutually exclusive with `group_by`.'),
|
|
463
|
+
granularity: z
|
|
464
|
+
.enum(['minute', 'hour', 'day', 'week', 'month', 'quarter', 'year'])
|
|
465
|
+
.optional()
|
|
466
|
+
.describe('Calendar bucket unit for `group_by_time`. Required with `group_by_time`, forbidden otherwise.'),
|
|
467
|
+
time_zone: z
|
|
468
|
+
.string()
|
|
469
|
+
.min(1)
|
|
470
|
+
.optional()
|
|
471
|
+
.describe('IANA time zone for `group_by_time` bucket boundaries. Defaults to UTC; the response echoes the effective zone.'),
|
|
472
|
+
limit: z
|
|
473
|
+
.number()
|
|
474
|
+
.int()
|
|
475
|
+
.positive()
|
|
476
|
+
.max(100)
|
|
477
|
+
.optional()
|
|
478
|
+
.describe('Maximum number of group buckets (1-100). Only valid with `group_by` or `group_by_time`.'),
|
|
479
|
+
filter: TypedFilterInput.optional().describe(FILTER_DESCRIPTION),
|
|
480
|
+
...ConnectionIdInputShape,
|
|
481
|
+
})
|
|
482
|
+
.strict(),
|
|
483
|
+
outputSchema: z.object(READ_OUTPUT_SCHEMA_SHAPE),
|
|
484
|
+
handler: async (args) => {
|
|
485
|
+
const stream = requireSafeName(args?.stream, 'stream');
|
|
486
|
+
const query = applyFilterToQuery(pickQuery(args, SUPPORTED_AGGREGATE_QUERY_KEYS), args?.filter);
|
|
487
|
+
const response = await rs.getJson(`/v1/streams/${encodeURIComponent(stream)}/aggregate`, {
|
|
488
|
+
query,
|
|
489
|
+
});
|
|
490
|
+
return toAggregateToolResult(response, providerUrl, stream);
|
|
491
|
+
},
|
|
492
|
+
},
|
|
493
|
+
{
|
|
494
|
+
name: 'search',
|
|
495
|
+
title: 'Search PDPP records',
|
|
496
|
+
description:
|
|
497
|
+
'Search records via `GET /v1/search` (lexical), `/v1/search/semantic`, or `/v1/search/hybrid` per `mode`. Use lexical for exact known terms; semantic is approximate retrieval for conceptual matches. `structuredContent.results` carries the flattened page; `structuredContent.data` carries compact envelope metadata, not a duplicate hit array. Hits carry `connection_id` and `connector_key`. Pass `connection_id` to scope, omit to fan in. Page default is 25 hits; `limit` is capped at 100 (enforced at input, and fan-in packages apply it globally). Page forward with `cursor` (lexical/semantic; hybrid does not page). Per-mode capability support is advertised by `GET /v1/schema`. Read-only.',
|
|
498
|
+
annotations: READ_ONLY_ANNOTATIONS,
|
|
499
|
+
inputSchema: z
|
|
500
|
+
.object({
|
|
501
|
+
q: z.string().min(1).describe('Search query string.'),
|
|
502
|
+
streams: z.array(z.string()).optional(),
|
|
503
|
+
limit: z.number().int().positive().max(100).optional().describe(SEARCH_LIMIT_DESCRIPTION),
|
|
504
|
+
cursor: z.string().optional(),
|
|
505
|
+
mode: z.enum(['lexical', 'semantic', 'hybrid']).optional(),
|
|
506
|
+
filter: TypedFilterInput.optional().describe(FILTER_DESCRIPTION),
|
|
507
|
+
...ConnectionIdInputShape,
|
|
508
|
+
})
|
|
509
|
+
.strict(),
|
|
510
|
+
outputSchema: z.object(SEARCH_OUTPUT_SCHEMA_SHAPE),
|
|
511
|
+
handler: async (args) => {
|
|
512
|
+
const path = searchPathForMode(args.mode);
|
|
513
|
+
const query = applyFilterToQuery(
|
|
514
|
+
{
|
|
515
|
+
q: args.q,
|
|
516
|
+
streams: args.streams,
|
|
517
|
+
limit: args.limit,
|
|
518
|
+
cursor: args.cursor,
|
|
519
|
+
connection_id: args.connection_id,
|
|
520
|
+
},
|
|
521
|
+
args.filter,
|
|
522
|
+
);
|
|
523
|
+
const response = await rs.getJson(path, { query });
|
|
524
|
+
return toSearchToolResult(response, providerUrl, { limit: args?.limit });
|
|
525
|
+
},
|
|
526
|
+
},
|
|
527
|
+
{
|
|
528
|
+
name: 'fetch',
|
|
529
|
+
title: 'Fetch PDPP search result',
|
|
530
|
+
description:
|
|
531
|
+
'Fetch a single OpenAI-compatible document by a result id from `search`. Id format: `stream:record_id` → `GET /v1/streams/{stream}/records/{record_id}`. Returns document fields only (`id`, `title`, `text`, `url`, `metadata`); use `query_records` for canonical PDPP record envelopes. Use `fields` to project the source record before rendering document text/metadata; if the projection excludes every text-like field (`text`, `content`, `body`, `summary`), `text` contains compact JSON for the projected record rather than the full document body. Operational source handles (`id`, stream, `connection_id`, `connector_key`) remain available in `metadata`. On `ambiguous_connection` (409), pick a `connection_id` from `available_connections` in the error and retry. Read-only.',
|
|
532
|
+
annotations: READ_ONLY_ANNOTATIONS,
|
|
533
|
+
inputSchema: z
|
|
534
|
+
.object({
|
|
535
|
+
id: z.string().min(1).describe('Search result id, usually `stream:record_id`.'),
|
|
536
|
+
expand: z.array(z.string()).optional().describe(EXPAND_DESCRIPTION),
|
|
537
|
+
expand_limit: z
|
|
538
|
+
.record(z.string(), z.number().int().positive())
|
|
539
|
+
.optional()
|
|
540
|
+
.describe(EXPAND_LIMIT_DESCRIPTION),
|
|
541
|
+
fields: z.array(z.string()).optional().describe(FIELDS_DESCRIPTION),
|
|
542
|
+
...ConnectionIdInputShape,
|
|
543
|
+
})
|
|
544
|
+
.strict(),
|
|
545
|
+
outputSchema: z.object(FETCH_OUTPUT_SCHEMA_SHAPE),
|
|
546
|
+
handler: async (args) => {
|
|
547
|
+
const ref = parseRecordResultId(args.id);
|
|
548
|
+
const query = applyExpandLimitToQuery(pickQuery(args, SUPPORTED_QUERY_KEYS), args?.expand_limit);
|
|
549
|
+
const response = await rs.getJson(
|
|
550
|
+
`/v1/streams/${encodeURIComponent(ref.stream)}/records/${encodeURIComponent(ref.recordId)}`,
|
|
551
|
+
{ query }
|
|
552
|
+
);
|
|
553
|
+
return toFetchToolResult(response, providerUrl, args.id);
|
|
554
|
+
},
|
|
555
|
+
},
|
|
556
|
+
];
|
|
557
|
+
|
|
558
|
+
return selectNormalTools(tools);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
function searchPathForMode(mode) {
|
|
562
|
+
if (mode === 'semantic') return '/v1/search/semantic';
|
|
563
|
+
if (mode === 'hybrid') return '/v1/search/hybrid';
|
|
564
|
+
return '/v1/search';
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
export function buildStreamResourceTemplate({ rs, providerUrl }) {
|
|
568
|
+
return {
|
|
569
|
+
uriTemplate: 'pdpp://stream/{name}',
|
|
570
|
+
name: 'pdpp-stream',
|
|
571
|
+
title: 'PDPP stream metadata',
|
|
572
|
+
description:
|
|
573
|
+
'Returns the stream metadata document for a single stream (GET /v1/streams/{name}). Read-only.',
|
|
574
|
+
mimeType: 'application/json',
|
|
575
|
+
read: async (uri, variables) => {
|
|
576
|
+
const streamName = resolveStreamName(uri, variables);
|
|
577
|
+
const response = await rs.getJson(`/v1/streams/${encodeURIComponent(streamName)}`);
|
|
578
|
+
if (response.ok) {
|
|
579
|
+
return {
|
|
580
|
+
contents: [
|
|
581
|
+
{
|
|
582
|
+
uri,
|
|
583
|
+
mimeType: 'application/json',
|
|
584
|
+
text: JSON.stringify(response.body, null, 2),
|
|
585
|
+
},
|
|
586
|
+
],
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
const error = response.error ?? { type: 'rs_error', code: 'unknown', message: 'Unknown RS error' };
|
|
590
|
+
return {
|
|
591
|
+
contents: [
|
|
592
|
+
{
|
|
593
|
+
uri,
|
|
594
|
+
mimeType: 'application/json',
|
|
595
|
+
text: JSON.stringify({ error, provider_url: providerUrl, http_status: response.status }, null, 2),
|
|
596
|
+
},
|
|
597
|
+
],
|
|
598
|
+
};
|
|
599
|
+
},
|
|
600
|
+
};
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
function resolveStreamName(uri, variables) {
|
|
604
|
+
const rawFromVariables = variables?.name;
|
|
605
|
+
if (typeof rawFromVariables === 'string' && rawFromVariables.length > 0) {
|
|
606
|
+
return requireSafeName(decodeIfEncoded(rawFromVariables), 'stream');
|
|
607
|
+
}
|
|
608
|
+
const match = /^pdpp:\/\/stream\/([^/]+)$/.exec(uri);
|
|
609
|
+
if (!match) {
|
|
610
|
+
throw new InvalidResourceUriError(`Resource URI ${uri} does not match pdpp://stream/{name}.`);
|
|
611
|
+
}
|
|
612
|
+
return requireSafeName(decodeURIComponent(match[1]), 'stream');
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
function decodeIfEncoded(value) {
|
|
616
|
+
try {
|
|
617
|
+
return decodeURIComponent(value);
|
|
618
|
+
} catch {
|
|
619
|
+
return value;
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
export class InvalidResourceUriError extends Error {
|
|
624
|
+
constructor(message) {
|
|
625
|
+
super(message);
|
|
626
|
+
this.name = 'InvalidResourceUriError';
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
function requireSafeName(value, label) {
|
|
631
|
+
if (typeof value !== 'string' || value.length === 0) {
|
|
632
|
+
throw new Error(`${label} is required`);
|
|
633
|
+
}
|
|
634
|
+
// Reject path-traversal and slash-bearing inputs. The RS validates names too, but a
|
|
635
|
+
// defensive check here keeps the resource template URI surface narrow.
|
|
636
|
+
if (value.includes('/') || value.includes('\\') || value === '.' || value === '..' || value.includes('..')) {
|
|
637
|
+
throw new Error(`${label} contains invalid characters`);
|
|
638
|
+
}
|
|
639
|
+
return value;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
function pickQuery(args, supportedKeys) {
|
|
643
|
+
if (!args || typeof args !== 'object') {
|
|
644
|
+
return {};
|
|
645
|
+
}
|
|
646
|
+
const out = {};
|
|
647
|
+
for (const key of Object.keys(args)) {
|
|
648
|
+
if (key === 'stream') continue;
|
|
649
|
+
// `filter` is never forwarded as a flat param: the RS expects bracketed
|
|
650
|
+
// `filter[field]=value` query keys, which callers build via
|
|
651
|
+
// `applyFilterToQuery`. Forwarding the raw value here would re-introduce the
|
|
652
|
+
// silent bare-`filter=` no-op this change fixes.
|
|
653
|
+
if (key === 'filter') continue;
|
|
654
|
+
// `expand_limit` mirrors the same nested REST query shape:
|
|
655
|
+
// `expand_limit[relation]=N`. Forwarding the raw object would become a JSON
|
|
656
|
+
// string under URLSearchParams instead of the query key the RS parses.
|
|
657
|
+
if (key === 'expand_limit') continue;
|
|
658
|
+
if (!supportedKeys.has(key)) continue;
|
|
659
|
+
out[key] = args[key];
|
|
660
|
+
}
|
|
661
|
+
return out;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// `content[]` is intentionally a concise human summary — the canonical
|
|
665
|
+
// `structuredContent` envelope is the contract for programmatic consumers.
|
|
666
|
+
// See:
|
|
667
|
+
// openspec/changes/canonicalize-public-read-contract (5.3 prose content[] is
|
|
668
|
+
// a concise summary only and not a second divergent JSON contract).
|
|
669
|
+
function toToolResult(response, providerUrl, label = 'response', options = {}) {
|
|
670
|
+
if (response.ok) {
|
|
671
|
+
const body = response.body;
|
|
672
|
+
return {
|
|
673
|
+
content: [
|
|
674
|
+
{
|
|
675
|
+
type: 'text',
|
|
676
|
+
text: summarizeBody(body, label, options),
|
|
677
|
+
},
|
|
678
|
+
],
|
|
679
|
+
structuredContent: { data: body, provider_url: providerUrl, request_id: response.requestId },
|
|
680
|
+
};
|
|
681
|
+
}
|
|
682
|
+
return errorToolResult(response, providerUrl);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// Build the `schema` tool result. The text summary is always the compact,
|
|
686
|
+
// parseable discovery line. The `structuredContent.data` payload is a compact
|
|
687
|
+
// projection by default and the verbatim RS body only when `detail === "full"`.
|
|
688
|
+
// When `stream`/`connection_id` are supplied, both the summary and the
|
|
689
|
+
// structured payload are scoped so an agent can fetch one source's capabilities
|
|
690
|
+
// without pulling the whole document.
|
|
691
|
+
function toSchemaToolResult(response, providerUrl, { detail = 'compact', stream = null, connectionId = null, alreadyCompact = false } = {}) {
|
|
692
|
+
if (!response.ok) {
|
|
693
|
+
return errorToolResult(response, providerUrl);
|
|
694
|
+
}
|
|
695
|
+
const data = detail === 'full'
|
|
696
|
+
? dedupeFullSchemaDocument(response.body)
|
|
697
|
+
: compactSchemaDocument(response.body, { includeFieldDetail: Boolean(stream) });
|
|
698
|
+
const schemaDocument = unwrapSchemaBody(data);
|
|
699
|
+
return {
|
|
700
|
+
content: [
|
|
701
|
+
{
|
|
702
|
+
type: 'text',
|
|
703
|
+
text: summarizeSchemaDiscovery(
|
|
704
|
+
schemaDocument,
|
|
705
|
+
'PDPP schema',
|
|
706
|
+
{ includeFieldDetail: Boolean(stream), ...(connectionId ? { connectionId } : {}) },
|
|
707
|
+
),
|
|
708
|
+
},
|
|
709
|
+
],
|
|
710
|
+
structuredContent: { data: schemaDocument, provider_url: providerUrl, request_id: response.requestId },
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
function isCompactSchemaBody(body) {
|
|
715
|
+
return unwrapSchemaBody(body)?.detail === 'compact';
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
function shouldFallbackFromCompactSchemaRequest(response) {
|
|
719
|
+
if (response.ok) return false;
|
|
720
|
+
const code = response.error?.code ?? response.error?.type ?? '';
|
|
721
|
+
return response.status === 400 && ['bad_request', 'invalid_request', 'unsupported_query'].includes(code);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// Compact projection of the schema document. Drops the heavy per-field JSON
|
|
725
|
+
// Schema (`field_capabilities.*.schema`) and any other verbose nested blobs,
|
|
726
|
+
// keeping the field name, declared type, grant flag, and usable capability
|
|
727
|
+
// flags an agent needs to build filter/sort/expand/fields/count arguments.
|
|
728
|
+
// Connection identity (`connection_id`, `display_name`) and canonical connector
|
|
729
|
+
// metadata (`connector_key`) are preserved. Deprecated REST aliases are omitted
|
|
730
|
+
// from this default MCP projection. The envelope shape (top-level `data` wrapper,
|
|
731
|
+
// `connectors[]` grouping) is preserved so the payload is structurally a
|
|
732
|
+
// schema document, just lighter.
|
|
733
|
+
function compactSchemaDocument(body, { includeFieldDetail = false } = {}) {
|
|
734
|
+
const wrapped =
|
|
735
|
+
body && typeof body === 'object' && body.data && typeof body.data === 'object' && !Array.isArray(body.data);
|
|
736
|
+
const schema = unwrapSchemaBody(body);
|
|
737
|
+
if (!schema || typeof schema !== 'object') {
|
|
738
|
+
return body;
|
|
739
|
+
}
|
|
740
|
+
const connectors = extractSchemaConnectors(schema);
|
|
741
|
+
let compactSchema;
|
|
742
|
+
if (connectors.length > 0) {
|
|
743
|
+
compactSchema = {
|
|
744
|
+
...stripSchemaStreamArrays(schema),
|
|
745
|
+
field_capability_legend: FIELD_CAPABILITY_FLAG_LEGEND,
|
|
746
|
+
connectors: connectors.map((connector) => compactSchemaConnector(connector, { includeFieldDetail })),
|
|
747
|
+
};
|
|
748
|
+
} else if (Array.isArray(schema.streams)) {
|
|
749
|
+
compactSchema = {
|
|
750
|
+
...schema,
|
|
751
|
+
field_capability_legend: FIELD_CAPABILITY_FLAG_LEGEND,
|
|
752
|
+
streams: schema.streams.map((entry) => compactSchemaStream(entry, { includeFieldDetail })),
|
|
753
|
+
};
|
|
754
|
+
} else {
|
|
755
|
+
compactSchema = schema;
|
|
756
|
+
}
|
|
757
|
+
compactSchema = { ...compactSchema, detail: 'compact' };
|
|
758
|
+
return wrapped ? { ...body, data: compactSchema } : compactSchema;
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
function dedupeFullSchemaDocument(body) {
|
|
762
|
+
const wrapped =
|
|
763
|
+
body && typeof body === 'object' && body.data && typeof body.data === 'object' && !Array.isArray(body.data);
|
|
764
|
+
const schema = unwrapSchemaBody(body);
|
|
765
|
+
if (!schema || typeof schema !== 'object') return body;
|
|
766
|
+
const connectors = extractSchemaConnectors(schema);
|
|
767
|
+
if (connectors.length === 0) return body;
|
|
768
|
+
const deduped = stripSchemaStreamArrays(schema);
|
|
769
|
+
return wrapped ? { ...body, data: deduped } : deduped;
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
function stripSchemaStreamArrays(schema) {
|
|
773
|
+
const { streams: _streams, ...rest } = schema;
|
|
774
|
+
return rest;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
function compactSchemaConnector(connector, { includeFieldDetail = false } = {}) {
|
|
778
|
+
if (!connector || typeof connector !== 'object') return connector;
|
|
779
|
+
const streams = Array.isArray(connector.streams) ? connector.streams : [];
|
|
780
|
+
const { shared, sharedKey } = pickSharedGrantedConnections(streams);
|
|
781
|
+
const hasShared = shared !== null;
|
|
782
|
+
return {
|
|
783
|
+
...connector,
|
|
784
|
+
...(shared ? { granted_connections: shared } : {}),
|
|
785
|
+
streams: streams.map((entry) => compactSchemaStream(entry, { hasShared, sharedKey, includeFieldDetail })),
|
|
786
|
+
};
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// Project a single stream-metadata entry to its compact form. Whitelisted
|
|
790
|
+
// identity/metadata fields pass through verbatim; `field_capabilities` and
|
|
791
|
+
// `expand_capabilities` are compacted; everything else is dropped to keep the
|
|
792
|
+
// payload bounded.
|
|
793
|
+
function compactSchemaStream(entry, { hasShared = false, sharedKey = '', includeFieldDetail = false } = {}) {
|
|
794
|
+
if (!entry || typeof entry !== 'object') return entry;
|
|
795
|
+
const out = {};
|
|
796
|
+
const passthrough = [
|
|
797
|
+
'name',
|
|
798
|
+
'stream',
|
|
799
|
+
'stream_name',
|
|
800
|
+
'connector_key',
|
|
801
|
+
'connector_id',
|
|
802
|
+
'connector_display_name',
|
|
803
|
+
'display_name',
|
|
804
|
+
'connection_display_name',
|
|
805
|
+
'connection_id',
|
|
806
|
+
'record_count',
|
|
807
|
+
'granted',
|
|
808
|
+
'primary_key',
|
|
809
|
+
'cursor_field',
|
|
810
|
+
'source',
|
|
811
|
+
];
|
|
812
|
+
for (const key of passthrough) {
|
|
813
|
+
if (entry[key] !== undefined) out[key] = entry[key];
|
|
814
|
+
}
|
|
815
|
+
if (entry.granted_connections !== undefined) {
|
|
816
|
+
const compactGrantedConnections = compactSchemaGrantedConnections(entry.granted_connections);
|
|
817
|
+
const streamKey = Array.isArray(entry.granted_connections)
|
|
818
|
+
? grantedConnectionsKey(entry.granted_connections)
|
|
819
|
+
: null;
|
|
820
|
+
if (!hasShared || streamKey === null || streamKey !== sharedKey) {
|
|
821
|
+
out.granted_connections = compactGrantedConnections;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
if (includeFieldDetail && entry.field_capabilities !== undefined) {
|
|
825
|
+
out.field_capabilities = compactFieldCapabilities(entry.field_capabilities);
|
|
826
|
+
}
|
|
827
|
+
if (includeFieldDetail && entry.expand_capabilities !== undefined) {
|
|
828
|
+
out.expand_capabilities = compactExpandCapabilities(entry.expand_capabilities);
|
|
829
|
+
}
|
|
830
|
+
return out;
|
|
831
|
+
}
|
|
832
|
+
|
|
833
|
+
// Compact a `field_capabilities` map. Each field collapses to the same terse,
|
|
834
|
+
// agent-usable capability flag string the `content[]` summary already
|
|
835
|
+
// advertises (e.g. `t=string,eq,r=gte|lt,a=group_by_time`).
|
|
836
|
+
// Two size drivers are removed at the compact grade: the per-field JSON Schema
|
|
837
|
+
// blob and the five verbose `{declared, usable}` capability sub-objects per
|
|
838
|
+
// field. The flag string preserves every usable capability an agent needs to
|
|
839
|
+
// build filter / sort / expand / fields / count / aggregate arguments.
|
|
840
|
+
// `detail: "full"` remains the path to the raw per-field JSON Schema and the
|
|
841
|
+
// structured capability sub-objects. Preserves the map vs array container shape.
|
|
842
|
+
function compactFieldCapabilities(fieldCapabilities) {
|
|
843
|
+
const entries = fieldCapabilityEntries(fieldCapabilities);
|
|
844
|
+
if (entries.length === 0) return fieldCapabilities;
|
|
845
|
+
const isArray = Array.isArray(fieldCapabilities);
|
|
846
|
+
if (isArray) {
|
|
847
|
+
return entries.map(([name, capabilities]) => ({ name, flags: formatFieldCapabilityFlags(capabilities) }));
|
|
848
|
+
}
|
|
849
|
+
const out = {};
|
|
850
|
+
for (const [name, capabilities] of entries) {
|
|
851
|
+
out[name] = formatFieldCapabilityFlags(capabilities);
|
|
852
|
+
}
|
|
853
|
+
return out;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
function compactExpandCapabilities(expandCapabilities) {
|
|
857
|
+
if (!Array.isArray(expandCapabilities)) return expandCapabilities;
|
|
858
|
+
return expandCapabilities.map((relation) => {
|
|
859
|
+
if (!relation || typeof relation !== 'object') return relation;
|
|
860
|
+
const out = {};
|
|
861
|
+
for (const key of [
|
|
862
|
+
'name',
|
|
863
|
+
'relation',
|
|
864
|
+
'stream',
|
|
865
|
+
'target_stream',
|
|
866
|
+
'cardinality',
|
|
867
|
+
'granted',
|
|
868
|
+
'usable',
|
|
869
|
+
'foreign_key',
|
|
870
|
+
'max_limit',
|
|
871
|
+
'default_limit',
|
|
872
|
+
'reason',
|
|
873
|
+
]) {
|
|
874
|
+
if (relation[key] !== undefined) out[key] = relation[key];
|
|
875
|
+
}
|
|
876
|
+
return Object.keys(out).length > 0 ? out : relation;
|
|
877
|
+
});
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
function grantedConnectionsKey(value) {
|
|
881
|
+
if (!Array.isArray(value)) return '';
|
|
882
|
+
const entries = value.map((entry) => {
|
|
883
|
+
if (!entry || typeof entry !== 'object') return JSON.stringify(entry);
|
|
884
|
+
const id = typeof entry.connection_id === 'string' ? entry.connection_id : '';
|
|
885
|
+
const label = typeof entry.display_name === 'string' ? entry.display_name : '';
|
|
886
|
+
return JSON.stringify([id, label]);
|
|
887
|
+
});
|
|
888
|
+
entries.sort();
|
|
889
|
+
return entries.join('\n');
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
function compactSchemaGrantedConnections(value) {
|
|
893
|
+
if (!Array.isArray(value)) return value;
|
|
894
|
+
return value.map((entry) => {
|
|
895
|
+
if (!entry || typeof entry !== 'object' || Array.isArray(entry)) return entry;
|
|
896
|
+
const { connector_instance_id: _deprecatedAlias, ...rest } = entry;
|
|
897
|
+
return rest;
|
|
898
|
+
});
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
function pickSharedGrantedConnections(streams) {
|
|
902
|
+
const byKey = new Map();
|
|
903
|
+
for (const stream of streams) {
|
|
904
|
+
if (!stream || typeof stream !== 'object' || !Array.isArray(stream.granted_connections)) continue;
|
|
905
|
+
if (stream.granted_connections.length === 0) continue;
|
|
906
|
+
const key = grantedConnectionsKey(stream.granted_connections);
|
|
907
|
+
const existing = byKey.get(key);
|
|
908
|
+
if (existing) {
|
|
909
|
+
existing.count += 1;
|
|
910
|
+
} else {
|
|
911
|
+
byKey.set(key, { value: compactSchemaGrantedConnections(stream.granted_connections), count: 1 });
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
let bestKey = '';
|
|
915
|
+
let best = null;
|
|
916
|
+
for (const [key, candidate] of byKey) {
|
|
917
|
+
if (!best || candidate.count > best.count) {
|
|
918
|
+
best = candidate;
|
|
919
|
+
bestKey = key;
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
return { shared: best ? best.value : null, sharedKey: best ? bestKey : '' };
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
function toSearchToolResult(response, providerUrl, options = {}) {
|
|
926
|
+
if (!response.ok) {
|
|
927
|
+
return errorToolResult(response, providerUrl);
|
|
928
|
+
}
|
|
929
|
+
const allResults = normalizeSearchResults(response.body);
|
|
930
|
+
const limit = requestedSearchLimit(options.limit);
|
|
931
|
+
const results = allResults.slice(0, limit);
|
|
932
|
+
const summaryBody = allResults.length > results.length
|
|
933
|
+
? { ...response.body, has_more: true }
|
|
934
|
+
: response.body;
|
|
935
|
+
const data = compactSearchEnvelope(summaryBody, { resultCount: results.length });
|
|
936
|
+
return {
|
|
937
|
+
content: [
|
|
938
|
+
{
|
|
939
|
+
type: 'text',
|
|
940
|
+
text: summarizeSearch(summaryBody, results),
|
|
941
|
+
},
|
|
942
|
+
],
|
|
943
|
+
structuredContent: {
|
|
944
|
+
data,
|
|
945
|
+
results,
|
|
946
|
+
provider_url: providerUrl,
|
|
947
|
+
request_id: response.requestId,
|
|
948
|
+
},
|
|
949
|
+
};
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
function toFetchToolResult(response, providerUrl, requestedId) {
|
|
953
|
+
if (!response.ok) {
|
|
954
|
+
return errorToolResult(response, providerUrl);
|
|
955
|
+
}
|
|
956
|
+
const document = normalizeFetchedDocument(response.body, requestedId, providerUrl);
|
|
957
|
+
const text = JSON.stringify(document);
|
|
958
|
+
return {
|
|
959
|
+
content: [
|
|
960
|
+
{
|
|
961
|
+
type: 'text',
|
|
962
|
+
text,
|
|
963
|
+
},
|
|
964
|
+
],
|
|
965
|
+
structuredContent: document,
|
|
966
|
+
};
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
// Aggregate results must surface the numeric answer in `content[]` text, not
|
|
970
|
+
// only in `structuredContent.data`: some hosted agents cannot reliably read
|
|
971
|
+
// `structuredContent`. The text stays compact (metric, stream, scalar value or
|
|
972
|
+
// a short preview of grouped buckets) — the full envelope remains canonical in
|
|
973
|
+
// `structuredContent.data`. See validation criterion 3 in the lane brief.
|
|
974
|
+
function toAggregateToolResult(response, providerUrl, stream) {
|
|
975
|
+
if (!response.ok) {
|
|
976
|
+
return errorToolResult(response, providerUrl);
|
|
977
|
+
}
|
|
978
|
+
return {
|
|
979
|
+
content: [
|
|
980
|
+
{
|
|
981
|
+
type: 'text',
|
|
982
|
+
text: summarizeAggregate(response.body, stream),
|
|
983
|
+
},
|
|
984
|
+
],
|
|
985
|
+
structuredContent: { data: response.body, provider_url: providerUrl, request_id: response.requestId },
|
|
986
|
+
};
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
const AGGREGATE_GROUP_PREVIEW_LIMIT = 5;
|
|
990
|
+
|
|
991
|
+
function summarizeAggregate(body, stream) {
|
|
992
|
+
const agg = unwrapAggregateBody(body);
|
|
993
|
+
const metric = typeof agg.metric === 'string' && agg.metric.length > 0 ? agg.metric : 'aggregate';
|
|
994
|
+
const field = typeof agg.field === 'string' && agg.field.length > 0 ? ` field=${agg.field}` : '';
|
|
995
|
+
const head = `${metric}(${stream})${field}`;
|
|
996
|
+
|
|
997
|
+
const groups = Array.isArray(agg.groups) ? agg.groups : null;
|
|
998
|
+
if (groups) {
|
|
999
|
+
const timeZone = firstString(agg.effective_time_zone, agg.time_zone);
|
|
1000
|
+
const timeZoneSuffix = agg.group_by_time && timeZone ? ` time_zone=${formatScalar(timeZone)}` : '';
|
|
1001
|
+
const dimension = agg.group_by_time
|
|
1002
|
+
? `group_by_time=${formatScalar(agg.group_by_time)} granularity=${formatScalar(agg.granularity)}${timeZoneSuffix}`
|
|
1003
|
+
: `group_by=${formatScalar(agg.group_by)}`;
|
|
1004
|
+
if (groups.length === 0) {
|
|
1005
|
+
return `${head} ${dimension}: 0 group(s). See structuredContent.data for the canonical envelope.`;
|
|
1006
|
+
}
|
|
1007
|
+
const shown = groups.slice(0, AGGREGATE_GROUP_PREVIEW_LIMIT).map((g) => {
|
|
1008
|
+
const key = g && typeof g === 'object' ? g.key : g;
|
|
1009
|
+
const count = g && typeof g === 'object' ? g.count : undefined;
|
|
1010
|
+
return `${formatScalar(key)}=${count == null ? '?' : count}`;
|
|
1011
|
+
});
|
|
1012
|
+
const more = groups.length > AGGREGATE_GROUP_PREVIEW_LIMIT ? ` more_groups=${groups.length - AGGREGATE_GROUP_PREVIEW_LIMIT};` : '';
|
|
1013
|
+
return `${head} ${dimension}: ${groups.length} group(s) [${shown.join(', ')}]${more} canonical envelope in structuredContent.data`;
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
// Ungrouped: the scalar answer lives in `value`. Fall back to
|
|
1017
|
+
// `filtered_record_count` for a count when `value` is absent.
|
|
1018
|
+
const value = agg.value !== undefined ? agg.value : agg.filtered_record_count;
|
|
1019
|
+
return `${head} = ${formatAggregateValue(value)}. canonical envelope in structuredContent.data`;
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
// Render the scalar aggregate answer. Numbers stay unquoted (the common
|
|
1023
|
+
// count/sum/min/max case) so the text reads as the numeric result; strings are
|
|
1024
|
+
// quoted for disambiguation.
|
|
1025
|
+
function formatAggregateValue(value) {
|
|
1026
|
+
if (value === undefined || value === null) return 'null';
|
|
1027
|
+
if (typeof value === 'number' && Number.isFinite(value)) return String(value);
|
|
1028
|
+
return formatScalar(value);
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
function unwrapAggregateBody(body) {
|
|
1032
|
+
if (!body || typeof body !== 'object') return {};
|
|
1033
|
+
if (body.object === 'aggregation') return body;
|
|
1034
|
+
if (body.data && typeof body.data === 'object' && !Array.isArray(body.data)) {
|
|
1035
|
+
return body.data;
|
|
1036
|
+
}
|
|
1037
|
+
return body;
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
function summarizeBody(body, label, options = {}) {
|
|
1041
|
+
if (label === 'PDPP schema') {
|
|
1042
|
+
return summarizeSchemaDiscovery(body, label);
|
|
1043
|
+
}
|
|
1044
|
+
if (label === 'PDPP streams') {
|
|
1045
|
+
return summarizeStreamsDiscovery(body, label);
|
|
1046
|
+
}
|
|
1047
|
+
if (options.previewRecords) {
|
|
1048
|
+
return summarizeRecordEnvelope(body, label);
|
|
1049
|
+
}
|
|
1050
|
+
if (Array.isArray(body)) {
|
|
1051
|
+
return `${label}: ${body.length} item(s). See structuredContent.data for the canonical envelope.`;
|
|
1052
|
+
}
|
|
1053
|
+
if (body && typeof body === 'object') {
|
|
1054
|
+
const dataLen = Array.isArray(body.data)
|
|
1055
|
+
? body.data.length
|
|
1056
|
+
: Array.isArray(body.records)
|
|
1057
|
+
? body.records.length
|
|
1058
|
+
: Array.isArray(body.streams)
|
|
1059
|
+
? body.streams.length
|
|
1060
|
+
: null;
|
|
1061
|
+
const hasMore = body.has_more === true ? ' has_more=true.' : '';
|
|
1062
|
+
if (dataLen !== null) {
|
|
1063
|
+
return `${label}: ${dataLen} item(s).${hasMore} See structuredContent.data for the canonical envelope.`;
|
|
1064
|
+
}
|
|
1065
|
+
return `${label}: see structuredContent.data for the canonical envelope.`;
|
|
1066
|
+
}
|
|
1067
|
+
return `${label}: see structuredContent.data for the canonical envelope.`;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
const RECORD_PREVIEW_LIMIT = 5;
|
|
1071
|
+
// Hard ceiling on the whole text preview, including the header and any trailing
|
|
1072
|
+
// markers. The token-efficiency tests assert the preview stays below 1800
|
|
1073
|
+
// chars, so this is the load-bearing bound.
|
|
1074
|
+
const RECORD_PREVIEW_CHAR_LIMIT = 1792;
|
|
1075
|
+
const RECORD_PREVIEW_FOOTER_RESERVE = 96;
|
|
1076
|
+
const RECORD_PREVIEW_MIN_RECORD_CHARS = 24;
|
|
1077
|
+
const RECORD_PREVIEW_TRUNCATED_MARKER =
|
|
1078
|
+
'record_preview_truncated=true; machine envelope in structuredContent.data';
|
|
1079
|
+
|
|
1080
|
+
function summarizeRecordEnvelope(body, label) {
|
|
1081
|
+
const records = extractRecordRows(body);
|
|
1082
|
+
const hasMore = envelopeField(body, 'has_more') === true ? ' has_more=true.' : '';
|
|
1083
|
+
const handles = formatRecordEnvelopeHandles(body);
|
|
1084
|
+
if (records.length === 0) {
|
|
1085
|
+
return `${label}: 0 record(s).${handles}`;
|
|
1086
|
+
}
|
|
1087
|
+
const shown = Math.min(records.length, RECORD_PREVIEW_LIMIT);
|
|
1088
|
+
const lines = [`${label}: ${records.length} record(s).${hasMore}${handles} Showing up to ${shown}:`];
|
|
1089
|
+
const contentCeiling = RECORD_PREVIEW_CHAR_LIMIT - RECORD_PREVIEW_FOOTER_RESERVE;
|
|
1090
|
+
let used = lines[0].length;
|
|
1091
|
+
let truncated = false;
|
|
1092
|
+
for (const [index, record] of records.slice(0, RECORD_PREVIEW_LIMIT).entries()) {
|
|
1093
|
+
const prefix = `record[${index}] `;
|
|
1094
|
+
const budget = contentCeiling - used - prefix.length - 1;
|
|
1095
|
+
if (budget < RECORD_PREVIEW_MIN_RECORD_CHARS) {
|
|
1096
|
+
truncated = true;
|
|
1097
|
+
break;
|
|
1098
|
+
}
|
|
1099
|
+
const rendered = `${prefix}${truncateText(stableInlineJson(record), budget)}`;
|
|
1100
|
+
lines.push(rendered);
|
|
1101
|
+
used += rendered.length + 1;
|
|
1102
|
+
}
|
|
1103
|
+
if (truncated) {
|
|
1104
|
+
lines.push(RECORD_PREVIEW_TRUNCATED_MARKER);
|
|
1105
|
+
} else if (records.length > RECORD_PREVIEW_LIMIT) {
|
|
1106
|
+
lines.push(`more_records=${records.length - RECORD_PREVIEW_LIMIT}; machine envelope in structuredContent.data`);
|
|
1107
|
+
}
|
|
1108
|
+
return lines.join('\n');
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
function formatRecordEnvelopeHandles(body) {
|
|
1112
|
+
const parts = [];
|
|
1113
|
+
const nextCursor = envelopeStringField(body, 'next_cursor');
|
|
1114
|
+
const nextChangesSince = envelopeStringField(body, 'next_changes_since');
|
|
1115
|
+
if (nextCursor) parts.push(`next_cursor=${formatScalar(nextCursor)}`);
|
|
1116
|
+
if (nextChangesSince) parts.push(`next_changes_since=${formatScalar(nextChangesSince)}`);
|
|
1117
|
+
const count = envelopeCount(body);
|
|
1118
|
+
if (count) parts.push(`count=${count}`);
|
|
1119
|
+
return parts.length > 0 ? ` ${parts.join(' ')}.` : '';
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
function extractRecordRows(body) {
|
|
1123
|
+
if (Array.isArray(body)) return body;
|
|
1124
|
+
if (!body || typeof body !== 'object') return [];
|
|
1125
|
+
if (Array.isArray(body.records)) return body.records;
|
|
1126
|
+
if (Array.isArray(body.data)) return body.data;
|
|
1127
|
+
if (body.data && typeof body.data === 'object' && Array.isArray(body.data.records)) {
|
|
1128
|
+
return body.data.records;
|
|
1129
|
+
}
|
|
1130
|
+
return [];
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
function summarizeStreamsDiscovery(body, label) {
|
|
1134
|
+
const streams = extractListRows(body);
|
|
1135
|
+
if (streams.length === 0) {
|
|
1136
|
+
return `${label}: 0 stream(s)`;
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
const lines = streams
|
|
1140
|
+
.slice(0, DISCOVERY_STREAM_SUMMARY_LIMIT)
|
|
1141
|
+
.map((stream) => formatStreamListSummary(stream));
|
|
1142
|
+
if (streams.length > DISCOVERY_STREAM_SUMMARY_LIMIT) {
|
|
1143
|
+
lines.push(`more_streams=${streams.length - DISCOVERY_STREAM_SUMMARY_LIMIT}`);
|
|
1144
|
+
}
|
|
1145
|
+
return `${label}: ${streams.length} stream(s)\n${lines.join('\n')}`;
|
|
1146
|
+
}
|
|
1147
|
+
|
|
1148
|
+
// When the package-level schema spans many streams, the per-field flag segment
|
|
1149
|
+
// (`fields=...`) per stream dominates the text summary and pushes it into tens
|
|
1150
|
+
// of KB — the same token-budget problem the structured compaction solves. Field
|
|
1151
|
+
// flags are emitted in the text only when the document is scoped to a stream
|
|
1152
|
+
// (the `schema(stream, connection_id?)` discovery middle step). For multi-stream package
|
|
1153
|
+
// summaries the text lists streams + connection + connector_key and points the
|
|
1154
|
+
// agent at `schema(stream, connection_id?)` for per-field capability flags.
|
|
1155
|
+
// Callers can force inclusion via `includeFieldDetail`.
|
|
1156
|
+
function summarizeSchemaDiscovery(body, label, { includeFieldDetail, connectionId } = {}) {
|
|
1157
|
+
const schema = unwrapSchemaBody(body);
|
|
1158
|
+
const streamRefs = extractSchemaStreamRefs(schema);
|
|
1159
|
+
const connectorCount = extractSchemaConnectors(schema).length || numberValue(schema?.connector_count) || 0;
|
|
1160
|
+
|
|
1161
|
+
if (streamRefs.length === 0) {
|
|
1162
|
+
const streamNames = extractSchemaStreamNames(schema);
|
|
1163
|
+
if (streamNames.length > 0) {
|
|
1164
|
+
return `${label}: connectors=${connectorCount} streams=${streamNames.length}\n${streamNames
|
|
1165
|
+
.slice(0, DISCOVERY_STREAM_SUMMARY_LIMIT)
|
|
1166
|
+
.map((name) => `stream name=${formatScalar(name)}`)
|
|
1167
|
+
.join('\n')}`;
|
|
1168
|
+
}
|
|
1169
|
+
return `${label}: connectors=${connectorCount} streams=0`;
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
const withFields = includeFieldDetail ?? streamRefs.length <= 1;
|
|
1173
|
+
const indexLines = streamRefs.length > DISCOVERY_STREAM_SUMMARY_LIMIT
|
|
1174
|
+
? formatSchemaStreamIndex(streamRefs)
|
|
1175
|
+
: [];
|
|
1176
|
+
const legendLines = withFields ? [formatFieldCapabilityLegend()] : [];
|
|
1177
|
+
const scopedLines = connectionId ? [`schema_scope connection_id=${formatScalar(connectionId)}`] : [];
|
|
1178
|
+
const lines = streamRefs
|
|
1179
|
+
.slice(0, DISCOVERY_STREAM_SUMMARY_LIMIT)
|
|
1180
|
+
.map(({ stream, connector }) => formatSchemaStreamSummary(stream, connector, { includeFieldDetail: withFields }));
|
|
1181
|
+
if (streamRefs.length > DISCOVERY_STREAM_SUMMARY_LIMIT) {
|
|
1182
|
+
lines.push(`more_streams=${streamRefs.length - DISCOVERY_STREAM_SUMMARY_LIMIT}`);
|
|
1183
|
+
}
|
|
1184
|
+
const hint = withFields
|
|
1185
|
+
? ''
|
|
1186
|
+
: '\ncall schema(stream, connection_id?) for per-field capability flags (filter/sort/expand/fields/count/aggregate)';
|
|
1187
|
+
return `${label}: connectors=${connectorCount} streams=${streamRefs.length}\n${[
|
|
1188
|
+
...legendLines,
|
|
1189
|
+
...scopedLines,
|
|
1190
|
+
...indexLines,
|
|
1191
|
+
...lines,
|
|
1192
|
+
].join('\n')}${hint}`;
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
function formatFieldCapabilityLegend() {
|
|
1196
|
+
return 'field_capability_legend t=declared_type eq=exact_filter r=range_filter_ops lex=lexical_search sem=semantic_search a=aggregation_caps g=false=not_granted';
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
function formatSchemaStreamIndex(streamRefs) {
|
|
1200
|
+
const byConnector = new Map();
|
|
1201
|
+
for (const { stream, connector } of streamRefs) {
|
|
1202
|
+
const connectorKey = connectorKeyFor(stream, connector) || 'unknown';
|
|
1203
|
+
const name = streamName(stream);
|
|
1204
|
+
if (!name) continue;
|
|
1205
|
+
if (!byConnector.has(connectorKey)) {
|
|
1206
|
+
byConnector.set(connectorKey, []);
|
|
1207
|
+
}
|
|
1208
|
+
const names = byConnector.get(connectorKey);
|
|
1209
|
+
if (!names.includes(name)) {
|
|
1210
|
+
names.push(name);
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
return [...byConnector.entries()].map(([connectorKey, names]) =>
|
|
1214
|
+
`stream_index connector_key=${formatScalar(connectorKey)} stream_count=${names.length} streams=${names.map(formatInlineValue).join('|')}`,
|
|
1215
|
+
);
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
function extractListRows(body) {
|
|
1219
|
+
if (Array.isArray(body)) return body;
|
|
1220
|
+
if (!body || typeof body !== 'object') return [];
|
|
1221
|
+
if (Array.isArray(body.data)) return body.data;
|
|
1222
|
+
if (Array.isArray(body.streams)) return body.streams;
|
|
1223
|
+
if (body.data && typeof body.data === 'object' && Array.isArray(body.data.streams)) {
|
|
1224
|
+
return body.data.streams;
|
|
1225
|
+
}
|
|
1226
|
+
return [];
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
function unwrapSchemaBody(body) {
|
|
1230
|
+
if (!body || typeof body !== 'object') return {};
|
|
1231
|
+
if (body.data && typeof body.data === 'object' && !Array.isArray(body.data)) {
|
|
1232
|
+
const data = body.data;
|
|
1233
|
+
if (
|
|
1234
|
+
Array.isArray(data.connectors) ||
|
|
1235
|
+
Array.isArray(data.streams) ||
|
|
1236
|
+
Array.isArray(data.granted_connections) ||
|
|
1237
|
+
data.object === 'schema'
|
|
1238
|
+
) {
|
|
1239
|
+
return data;
|
|
1240
|
+
}
|
|
1241
|
+
}
|
|
1242
|
+
return body;
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
function extractSchemaConnectors(schema) {
|
|
1246
|
+
return Array.isArray(schema?.connectors) ? schema.connectors.filter((item) => item && typeof item === 'object') : [];
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
function extractSchemaStreamRefs(schema) {
|
|
1250
|
+
const connectors = extractSchemaConnectors(schema);
|
|
1251
|
+
if (connectors.length > 0) {
|
|
1252
|
+
return connectors.flatMap((connector) => {
|
|
1253
|
+
const streams = Array.isArray(connector.streams) ? connector.streams : [];
|
|
1254
|
+
return streams.map((stream) => ({ stream, connector }));
|
|
1255
|
+
});
|
|
1256
|
+
}
|
|
1257
|
+
const streams = Array.isArray(schema?.streams) ? schema.streams : [];
|
|
1258
|
+
return streams
|
|
1259
|
+
.filter((stream) => stream && typeof stream === 'object')
|
|
1260
|
+
.map((stream) => ({ stream, connector: null }));
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
function extractSchemaStreamNames(schema) {
|
|
1264
|
+
const streams = Array.isArray(schema?.streams) ? schema.streams : [];
|
|
1265
|
+
return streams
|
|
1266
|
+
.map((stream) => streamName(stream))
|
|
1267
|
+
.filter(Boolean);
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
function formatStreamListSummary(stream) {
|
|
1271
|
+
const source = objectValue(stream?.source);
|
|
1272
|
+
const name = streamName(stream) || 'unknown';
|
|
1273
|
+
const connectionId = firstString(
|
|
1274
|
+
stream?.connection_id,
|
|
1275
|
+
stream?.connector_instance_id,
|
|
1276
|
+
source?.connection_id,
|
|
1277
|
+
);
|
|
1278
|
+
const connectorKey = connectorKeyFor(stream, null);
|
|
1279
|
+
const displayName = firstString(
|
|
1280
|
+
stream?.display_name,
|
|
1281
|
+
stream?.connection_display_name,
|
|
1282
|
+
source?.display_name,
|
|
1283
|
+
stream?.connector_display_name,
|
|
1284
|
+
);
|
|
1285
|
+
const parts = [
|
|
1286
|
+
`stream name=${formatScalar(name)}`,
|
|
1287
|
+
`connection_id=${formatScalar(connectionId)}`,
|
|
1288
|
+
`connector_key=${formatScalar(connectorKey)}`,
|
|
1289
|
+
`display_name=${formatScalar(displayName)}`,
|
|
1290
|
+
];
|
|
1291
|
+
const recordCount = numberValue(stream?.record_count);
|
|
1292
|
+
if (recordCount !== null) {
|
|
1293
|
+
parts.push(`record_count=${recordCount}`);
|
|
1294
|
+
}
|
|
1295
|
+
return parts.join(' ');
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
function formatSchemaStreamSummary(stream, connector, { includeFieldDetail = true } = {}) {
|
|
1299
|
+
const name = streamName(stream) || 'unknown';
|
|
1300
|
+
const connectorKey = connectorKeyFor(stream, connector);
|
|
1301
|
+
const displayName = displayNameFor(stream, connector);
|
|
1302
|
+
const connections = grantedConnectionsFor(stream, connector);
|
|
1303
|
+
const parts = [
|
|
1304
|
+
`stream name=${formatScalar(name)}`,
|
|
1305
|
+
`connector_key=${formatScalar(connectorKey)}`,
|
|
1306
|
+
`display_name=${formatScalar(displayName)}`,
|
|
1307
|
+
`connections=${formatConnections(connections)}`,
|
|
1308
|
+
];
|
|
1309
|
+
if (includeFieldDetail) {
|
|
1310
|
+
parts.push(`fields=${formatFieldCapabilities(stream?.field_capabilities)}`);
|
|
1311
|
+
const aggregations = formatAggregationCapabilities(stream?.field_capabilities);
|
|
1312
|
+
if (aggregations !== 'none') {
|
|
1313
|
+
parts.push(`aggregations=${aggregations}`);
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
return parts.join(' ');
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
function streamName(stream) {
|
|
1320
|
+
if (typeof stream === 'string' && stream.length > 0) return stream;
|
|
1321
|
+
return firstString(stream?.name, stream?.stream, stream?.stream_name, stream?.streamName);
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1324
|
+
function connectorKeyFor(stream, connector) {
|
|
1325
|
+
const streamSource = objectValue(stream?.source);
|
|
1326
|
+
const connectorSource = objectValue(connector?.source);
|
|
1327
|
+
return firstString(
|
|
1328
|
+
stream?.connector_key,
|
|
1329
|
+
stream?.connector_id,
|
|
1330
|
+
streamSource?.connector_key,
|
|
1331
|
+
streamSource?.connector_id,
|
|
1332
|
+
streamSource?.id,
|
|
1333
|
+
connector?.connector_key,
|
|
1334
|
+
connector?.connector_id,
|
|
1335
|
+
connectorSource?.connector_key,
|
|
1336
|
+
connectorSource?.connector_id,
|
|
1337
|
+
connectorSource?.id,
|
|
1338
|
+
);
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
function displayNameFor(stream, connector) {
|
|
1342
|
+
const streamSource = objectValue(stream?.source);
|
|
1343
|
+
const connectorSource = objectValue(connector?.source);
|
|
1344
|
+
return firstString(
|
|
1345
|
+
stream?.display_name,
|
|
1346
|
+
stream?.connection_display_name,
|
|
1347
|
+
streamSource?.display_name,
|
|
1348
|
+
connector?.display_name,
|
|
1349
|
+
connectorSource?.display_name,
|
|
1350
|
+
connector?.connector_display_name,
|
|
1351
|
+
stream?.connector_display_name,
|
|
1352
|
+
);
|
|
1353
|
+
}
|
|
1354
|
+
|
|
1355
|
+
function grantedConnectionsFor(stream, connector) {
|
|
1356
|
+
const explicit = Array.isArray(stream?.granted_connections) ? stream.granted_connections : [];
|
|
1357
|
+
if (explicit.length > 0) {
|
|
1358
|
+
return explicit.filter((connection) => connection && typeof connection === 'object');
|
|
1359
|
+
}
|
|
1360
|
+
const shared = Array.isArray(connector?.granted_connections) ? connector.granted_connections : [];
|
|
1361
|
+
if (shared.length > 0) {
|
|
1362
|
+
return shared.filter((connection) => connection && typeof connection === 'object');
|
|
1363
|
+
}
|
|
1364
|
+
|
|
1365
|
+
const source = objectValue(stream?.source);
|
|
1366
|
+
const connectionId = firstString(stream?.connection_id, stream?.connector_instance_id, source?.connection_id);
|
|
1367
|
+
if (!connectionId) return [];
|
|
1368
|
+
return [
|
|
1369
|
+
{
|
|
1370
|
+
connection_id: connectionId,
|
|
1371
|
+
display_name: firstString(stream?.display_name, source?.display_name),
|
|
1372
|
+
connector_key: connectorKeyFor(stream, null),
|
|
1373
|
+
},
|
|
1374
|
+
];
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
function formatConnections(connections) {
|
|
1378
|
+
if (!connections || connections.length === 0) return 'none';
|
|
1379
|
+
const rendered = connections
|
|
1380
|
+
.slice(0, DISCOVERY_CONNECTION_SUMMARY_LIMIT)
|
|
1381
|
+
.map((connection) => {
|
|
1382
|
+
const id = firstString(connection?.connection_id, connection?.connector_instance_id);
|
|
1383
|
+
const displayName = firstString(connection?.display_name, connection?.name);
|
|
1384
|
+
const connectorKey = firstString(connection?.connector_key, connection?.connector_id, objectValue(connection?.source)?.connector_key);
|
|
1385
|
+
const parts = [`connection_id:${formatInlineValue(id)}`];
|
|
1386
|
+
if (displayName) parts.push(`display_name:${formatInlineValue(displayName)}`);
|
|
1387
|
+
if (connectorKey) parts.push(`connector_key:${formatInlineValue(connectorKey)}`);
|
|
1388
|
+
return `{${parts.join(',')}}`;
|
|
1389
|
+
});
|
|
1390
|
+
if (connections.length > DISCOVERY_CONNECTION_SUMMARY_LIMIT) {
|
|
1391
|
+
rendered.push(`more:${connections.length - DISCOVERY_CONNECTION_SUMMARY_LIMIT}`);
|
|
1392
|
+
}
|
|
1393
|
+
return rendered.join('|');
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
function formatFieldCapabilities(fieldCapabilities) {
|
|
1397
|
+
const entries = fieldCapabilityEntries(fieldCapabilities);
|
|
1398
|
+
if (entries.length === 0) return 'none';
|
|
1399
|
+
|
|
1400
|
+
const rendered = entries
|
|
1401
|
+
.slice(0, DISCOVERY_FIELD_SUMMARY_LIMIT)
|
|
1402
|
+
.map(([field, capabilities]) => `${formatFieldName(field)}[${formatFieldCapabilityFlags(capabilities)}]`);
|
|
1403
|
+
if (entries.length > DISCOVERY_FIELD_SUMMARY_LIMIT) {
|
|
1404
|
+
rendered.push(`more:${entries.length - DISCOVERY_FIELD_SUMMARY_LIMIT}`);
|
|
1405
|
+
}
|
|
1406
|
+
return rendered.join(';');
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
function fieldCapabilityEntries(fieldCapabilities) {
|
|
1410
|
+
if (!fieldCapabilities || typeof fieldCapabilities !== 'object') return [];
|
|
1411
|
+
if (Array.isArray(fieldCapabilities)) {
|
|
1412
|
+
return fieldCapabilities
|
|
1413
|
+
.map((entry) => {
|
|
1414
|
+
const name = firstString(entry?.name, entry?.field, entry?.path);
|
|
1415
|
+
return name ? [name, entry] : null;
|
|
1416
|
+
})
|
|
1417
|
+
.filter(Boolean);
|
|
1418
|
+
}
|
|
1419
|
+
return Object.entries(fieldCapabilities);
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
function formatFieldCapabilityFlags(capabilities) {
|
|
1423
|
+
if (typeof capabilities === 'string' && capabilities.length > 0) return capabilities;
|
|
1424
|
+
if (!capabilities || typeof capabilities !== 'object') return 'declared';
|
|
1425
|
+
if (typeof capabilities.flags === 'string' && capabilities.flags.length > 0) return capabilities.flags;
|
|
1426
|
+
const flags = [];
|
|
1427
|
+
const schema = objectValue(capabilities.schema);
|
|
1428
|
+
const type = firstString(capabilities.type, schemaType(schema));
|
|
1429
|
+
if (type) flags.push(`t=${formatInlineValue(type)}`);
|
|
1430
|
+
if (capabilities.granted === false) {
|
|
1431
|
+
flags.push('g=false');
|
|
1432
|
+
}
|
|
1433
|
+
addCapabilityFlag(flags, 'eq', capabilities.exact_filter);
|
|
1434
|
+
addRangeCapabilityFlag(flags, capabilities.range_filter);
|
|
1435
|
+
addCapabilityFlag(flags, 'lex', capabilities.lexical_search);
|
|
1436
|
+
addCapabilityFlag(flags, 'sem', capabilities.semantic_search);
|
|
1437
|
+
addAggregationCapabilityFlags(flags, capabilities.aggregation);
|
|
1438
|
+
return flags.length > 0 ? flags.join(',') : 'declared';
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
function addCapabilityFlag(flags, name, capability) {
|
|
1442
|
+
if (!capability || typeof capability !== 'object') return;
|
|
1443
|
+
if (capability.usable === true) {
|
|
1444
|
+
flags.push(name);
|
|
1445
|
+
} else if (capability.declared === true && capability.usable === false) {
|
|
1446
|
+
flags.push(`${name}=unusable${reasonSuffix(capability.reason)}`);
|
|
1447
|
+
}
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
function addRangeCapabilityFlag(flags, capability) {
|
|
1451
|
+
if (!capability || typeof capability !== 'object') return;
|
|
1452
|
+
const operators = Array.isArray(capability.operators) && capability.operators.length > 0
|
|
1453
|
+
? capability.operators.join('|')
|
|
1454
|
+
: null;
|
|
1455
|
+
if (capability.usable === true) {
|
|
1456
|
+
flags.push(operators ? `r=${formatInlineValue(operators)}` : 'r');
|
|
1457
|
+
} else if (capability.declared === true && capability.usable === false) {
|
|
1458
|
+
flags.push(`r=unusable${reasonSuffix(capability.reason)}`);
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
function addAggregationCapabilityFlags(flags, aggregation) {
|
|
1463
|
+
if (!aggregation || typeof aggregation !== 'object') return;
|
|
1464
|
+
const usable = orderedAggregationKinds(Object.entries(aggregation)
|
|
1465
|
+
.filter(([, capability]) => capability && typeof capability === 'object' && capability.usable === true)
|
|
1466
|
+
.map(([name]) => name));
|
|
1467
|
+
if (usable.length > 0) {
|
|
1468
|
+
flags.push(`a=${formatInlineValue(usable.join('|'))}`);
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
|
|
1472
|
+
const AGGREGATION_SUMMARY_KINDS = ['count_distinct', 'group_by', 'group_by_time', 'sum', 'min', 'max'];
|
|
1473
|
+
const AGGREGATION_FIELD_SUMMARY_LIMIT = 12;
|
|
1474
|
+
|
|
1475
|
+
function formatAggregationCapabilities(fieldCapabilities) {
|
|
1476
|
+
const entries = fieldCapabilityEntries(fieldCapabilities);
|
|
1477
|
+
if (entries.length === 0) return 'none';
|
|
1478
|
+
const byKind = new Map(AGGREGATION_SUMMARY_KINDS.map((kind) => [kind, []]));
|
|
1479
|
+
for (const [field, capabilities] of entries) {
|
|
1480
|
+
for (const kind of aggregationKindsForField(capabilities)) {
|
|
1481
|
+
if (byKind.has(kind)) {
|
|
1482
|
+
byKind.get(kind).push(formatFieldName(field));
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
const parts = [];
|
|
1487
|
+
for (const kind of AGGREGATION_SUMMARY_KINDS) {
|
|
1488
|
+
const fields = byKind.get(kind) || [];
|
|
1489
|
+
if (fields.length === 0) continue;
|
|
1490
|
+
const shown = fields.slice(0, AGGREGATION_FIELD_SUMMARY_LIMIT);
|
|
1491
|
+
const more = fields.length > AGGREGATION_FIELD_SUMMARY_LIMIT ? `|more:${fields.length - AGGREGATION_FIELD_SUMMARY_LIMIT}` : '';
|
|
1492
|
+
parts.push(`${kind}=${shown.join('|')}${more}`);
|
|
1493
|
+
}
|
|
1494
|
+
return parts.length > 0 ? parts.join(';') : 'none';
|
|
1495
|
+
}
|
|
1496
|
+
|
|
1497
|
+
function aggregationKindsForField(capabilities) {
|
|
1498
|
+
if (typeof capabilities === 'string') return aggregationKindsFromFlags(capabilities);
|
|
1499
|
+
if (!capabilities || typeof capabilities !== 'object') return [];
|
|
1500
|
+
if (typeof capabilities.flags === 'string') return aggregationKindsFromFlags(capabilities.flags);
|
|
1501
|
+
const aggregation = objectValue(capabilities.aggregation);
|
|
1502
|
+
if (!aggregation) return [];
|
|
1503
|
+
return orderedAggregationKinds(Object.entries(aggregation)
|
|
1504
|
+
.filter(([, capability]) => capability && typeof capability === 'object' && capability.usable === true)
|
|
1505
|
+
.map(([kind]) => kind));
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
function aggregationKindsFromFlags(flags) {
|
|
1509
|
+
const match = /(?:^|,)a=([^,]+)/.exec(flags);
|
|
1510
|
+
if (!match) return [];
|
|
1511
|
+
return orderedAggregationKinds(match[1].split('|').map((part) => part.trim()).filter(Boolean));
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1514
|
+
function orderedAggregationKinds(kinds) {
|
|
1515
|
+
const seen = new Set(kinds);
|
|
1516
|
+
return [
|
|
1517
|
+
...AGGREGATION_SUMMARY_KINDS.filter((kind) => seen.has(kind)),
|
|
1518
|
+
...kinds.filter((kind) => !AGGREGATION_SUMMARY_KINDS.includes(kind)),
|
|
1519
|
+
];
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
function reasonSuffix(reason) {
|
|
1523
|
+
return typeof reason === 'string' && reason.length > 0 ? `:${reason}` : '';
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
function schemaType(schema) {
|
|
1527
|
+
if (!schema || typeof schema !== 'object') return undefined;
|
|
1528
|
+
if (typeof schema.type === 'string') return schema.type;
|
|
1529
|
+
if (Array.isArray(schema.type)) return schema.type.filter((item) => typeof item === 'string').join('|') || undefined;
|
|
1530
|
+
return undefined;
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
function objectValue(value) {
|
|
1534
|
+
return value && typeof value === 'object' && !Array.isArray(value) ? value : null;
|
|
1535
|
+
}
|
|
1536
|
+
|
|
1537
|
+
function firstString(...values) {
|
|
1538
|
+
for (const value of values) {
|
|
1539
|
+
if (typeof value === 'string' && value.length > 0) return value;
|
|
1540
|
+
}
|
|
1541
|
+
return undefined;
|
|
1542
|
+
}
|
|
1543
|
+
|
|
1544
|
+
function numberValue(value) {
|
|
1545
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : null;
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
function formatScalar(value) {
|
|
1549
|
+
return value === undefined || value === null ? 'null' : JSON.stringify(String(value));
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
function formatInlineValue(value) {
|
|
1553
|
+
if (value === undefined || value === null) return 'null';
|
|
1554
|
+
return String(value).replace(/[;,\[\]{}]/g, '_').replace(/\s+/g, '_');
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
function formatFieldName(value) {
|
|
1558
|
+
return String(value).replace(/[;,\[\]{}]/g, '_');
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
function stableInlineJson(value) {
|
|
1562
|
+
try {
|
|
1563
|
+
return JSON.stringify(value);
|
|
1564
|
+
} catch {
|
|
1565
|
+
return JSON.stringify(String(value));
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
function truncateText(value, limit) {
|
|
1570
|
+
const safeLimit = Math.max(0, limit);
|
|
1571
|
+
if (value.length <= safeLimit) return value;
|
|
1572
|
+
if (safeLimit <= 1) return '…';
|
|
1573
|
+
return `${value.slice(0, safeLimit - 1)}…`;
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
const SEARCH_TEXT_PREVIEW_LIMIT = 3;
|
|
1577
|
+
const SEARCH_TEXT_SNIPPET_CHAR_LIMIT = 140;
|
|
1578
|
+
const SEARCH_RESULT_SNIPPET_CHAR_LIMIT = 320;
|
|
1579
|
+
|
|
1580
|
+
function summarizeSearch(body, results) {
|
|
1581
|
+
const hasMore = envelopeField(body, 'has_more') === true ? ' has_more=true.' : '';
|
|
1582
|
+
const nextCursor = envelopeStringField(body, 'next_cursor');
|
|
1583
|
+
const cursorText = nextCursor ? ` next_cursor=${formatScalar(nextCursor)}.` : '';
|
|
1584
|
+
const sourceMixText = formatSearchSourceMix(body);
|
|
1585
|
+
const previews = results.slice(0, SEARCH_TEXT_PREVIEW_LIMIT).map(formatSearchPreviewLine);
|
|
1586
|
+
const previewText = previews.length > 0 ? ` Top results:\n${previews.join('\n')}` : '';
|
|
1587
|
+
const fetchHint = previews.length > 0
|
|
1588
|
+
? '\nFetch a hit with `fetch` using the shown id; include connection_id when shown.'
|
|
1589
|
+
: '';
|
|
1590
|
+
return `search: ${results.length} hit(s).${hasMore}${cursorText}${sourceMixText}${previewText}${fetchHint} Search envelope metadata: structuredContent.data; flattened results: structuredContent.results.`;
|
|
1591
|
+
}
|
|
1592
|
+
|
|
1593
|
+
function formatSearchSourceMix(body) {
|
|
1594
|
+
const sourceMix = body?.meta?.package?.source_mix;
|
|
1595
|
+
if (!Array.isArray(sourceMix) || sourceMix.length === 0) return '';
|
|
1596
|
+
const rendered = sourceMix
|
|
1597
|
+
.slice(0, 8)
|
|
1598
|
+
.map((entry) => {
|
|
1599
|
+
const parts = [
|
|
1600
|
+
`connection_id:${formatInlineValue(entry?.connection_id)}`,
|
|
1601
|
+
`connector_key:${formatInlineValue(entry?.connector_key)}`,
|
|
1602
|
+
`count:${formatInlineValue(entry?.count)}`,
|
|
1603
|
+
];
|
|
1604
|
+
if (entry?.display_name) parts.push(`display_name:${formatInlineValue(entry.display_name)}`);
|
|
1605
|
+
return `{${parts.join(',')}}`;
|
|
1606
|
+
});
|
|
1607
|
+
if (sourceMix.length > 8) rendered.push(`more:${sourceMix.length - 8}`);
|
|
1608
|
+
return ` source_mix=${rendered.join('|')}.`;
|
|
1609
|
+
}
|
|
1610
|
+
|
|
1611
|
+
function formatSearchPreviewLine(result, index) {
|
|
1612
|
+
const parts = [`${index + 1}. id=${formatInlineValue(truncateText(result.id, 80))}`];
|
|
1613
|
+
if (result.connection_id) parts.push(`connection_id=${formatInlineValue(truncateText(result.connection_id, 80))}`);
|
|
1614
|
+
if (result.connector_key) parts.push(`connector_key=${formatInlineValue(truncateText(result.connector_key, 60))}`);
|
|
1615
|
+
if (result.stream) parts.push(`stream=${formatInlineValue(truncateText(result.stream, 60))}`);
|
|
1616
|
+
if (result.title && result.title !== result.id) parts.push(`title=${formatScalar(truncateText(result.title, 80))}`);
|
|
1617
|
+
if (result.display_name) parts.push(`display_name=${formatScalar(truncateText(result.display_name, 60))}`);
|
|
1618
|
+
if (result.snippet) parts.push(`snippet=${formatScalar(truncateText(result.snippet, SEARCH_TEXT_SNIPPET_CHAR_LIMIT))}`);
|
|
1619
|
+
return parts.join(' ');
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
function envelopeField(body, field) {
|
|
1623
|
+
if (!body || typeof body !== 'object') return undefined;
|
|
1624
|
+
if (Object.prototype.hasOwnProperty.call(body, field)) return body[field];
|
|
1625
|
+
if (body.data && typeof body.data === 'object' && !Array.isArray(body.data)) {
|
|
1626
|
+
return body.data[field];
|
|
1627
|
+
}
|
|
1628
|
+
return undefined;
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
function envelopeStringField(body, field) {
|
|
1632
|
+
const value = envelopeField(body, field);
|
|
1633
|
+
return typeof value === 'string' && value.length > 0 ? value : null;
|
|
1634
|
+
}
|
|
1635
|
+
|
|
1636
|
+
function envelopeCount(body) {
|
|
1637
|
+
const meta = objectValue(envelopeField(body, 'meta'));
|
|
1638
|
+
const count = objectValue(meta?.count);
|
|
1639
|
+
if (!count) return null;
|
|
1640
|
+
const kind = firstString(count.kind, count.type);
|
|
1641
|
+
const value = count.value ?? count.count ?? count.total;
|
|
1642
|
+
if (kind && value !== undefined && value !== null) return `${formatInlineValue(kind)}:${formatInlineValue(value)}`;
|
|
1643
|
+
if (kind) return formatInlineValue(kind);
|
|
1644
|
+
if (value !== undefined && value !== null) return formatInlineValue(value);
|
|
1645
|
+
return null;
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1648
|
+
function normalizeSearchResults(body) {
|
|
1649
|
+
const candidates = searchCandidatesFromBody(body);
|
|
1650
|
+
return candidates.map((hit, index) => {
|
|
1651
|
+
const id = resultIdForHit(hit, index);
|
|
1652
|
+
const source = objectValue(hit?.source) || {};
|
|
1653
|
+
const stream = streamForHit(hit);
|
|
1654
|
+
const recordKey = recordKeyForHit(hit);
|
|
1655
|
+
const connectionId = firstString(hit?.connection_id, hit?.connector_instance_id, source.connection_id);
|
|
1656
|
+
const displayName = firstString(hit?.display_name, source.display_name);
|
|
1657
|
+
const connectorKey = firstString(hit?.connector_key, hit?.connector_id, source.connector_key, source.connector_id);
|
|
1658
|
+
const snippet = snippetForSearchHit(hit);
|
|
1659
|
+
const normalized = {
|
|
1660
|
+
id,
|
|
1661
|
+
title: titleForSearchHit(hit, id, { stream, recordKey, connectionId, displayName, connectorKey }),
|
|
1662
|
+
url: urlForRecord(hit, id),
|
|
1663
|
+
};
|
|
1664
|
+
if (stream) normalized.stream = stream;
|
|
1665
|
+
if (recordKey) normalized.record_key = recordKey;
|
|
1666
|
+
if (connectionId) normalized.connection_id = connectionId;
|
|
1667
|
+
if (displayName) normalized.display_name = displayName;
|
|
1668
|
+
if (connectorKey) normalized.connector_key = connectorKey;
|
|
1669
|
+
if (snippet) normalized.snippet = truncateText(snippet, SEARCH_RESULT_SNIPPET_CHAR_LIMIT);
|
|
1670
|
+
return normalized;
|
|
1671
|
+
});
|
|
1672
|
+
}
|
|
1673
|
+
|
|
1674
|
+
function searchCandidatesFromBody(body) {
|
|
1675
|
+
if (!body || typeof body !== 'object') return [];
|
|
1676
|
+
if (Array.isArray(body.results)) return body.results;
|
|
1677
|
+
if (Array.isArray(body.hits)) return body.hits;
|
|
1678
|
+
if (Array.isArray(body.data)) return body.data;
|
|
1679
|
+
if (body.data && typeof body.data === 'object' && Array.isArray(body.data.results)) return body.data.results;
|
|
1680
|
+
if (body.data && typeof body.data === 'object' && Array.isArray(body.data.data)) return body.data.data;
|
|
1681
|
+
return [];
|
|
1682
|
+
}
|
|
1683
|
+
|
|
1684
|
+
function requestedSearchLimit(value) {
|
|
1685
|
+
if (typeof value === 'number' && Number.isInteger(value) && value > 0) {
|
|
1686
|
+
return Math.min(value, 100);
|
|
1687
|
+
}
|
|
1688
|
+
return 25;
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
function compactSearchEnvelope(body, { resultCount } = {}) {
|
|
1692
|
+
if (!body || typeof body !== 'object') return body;
|
|
1693
|
+
if (Array.isArray(body)) {
|
|
1694
|
+
return { object: 'list', results_ref: 'structuredContent.results', result_count: resultCount ?? body.length };
|
|
1695
|
+
}
|
|
1696
|
+
const out = { ...body };
|
|
1697
|
+
if (Array.isArray(out.results)) {
|
|
1698
|
+
out.result_count = resultCount ?? out.results.length;
|
|
1699
|
+
delete out.results;
|
|
1700
|
+
out.results_ref = 'structuredContent.results';
|
|
1701
|
+
}
|
|
1702
|
+
if (Array.isArray(out.hits)) {
|
|
1703
|
+
out.result_count = resultCount ?? out.hits.length;
|
|
1704
|
+
delete out.hits;
|
|
1705
|
+
out.results_ref = 'structuredContent.results';
|
|
1706
|
+
}
|
|
1707
|
+
if (Array.isArray(out.data)) {
|
|
1708
|
+
out.result_count = resultCount ?? out.data.length;
|
|
1709
|
+
delete out.data;
|
|
1710
|
+
out.results_ref = 'structuredContent.results';
|
|
1711
|
+
} else if (out.data && typeof out.data === 'object') {
|
|
1712
|
+
out.data = compactSearchEnvelopeDataObject(out.data, { resultCount });
|
|
1713
|
+
}
|
|
1714
|
+
return out;
|
|
1715
|
+
}
|
|
1716
|
+
|
|
1717
|
+
function compactSearchEnvelopeDataObject(data, { resultCount } = {}) {
|
|
1718
|
+
const out = { ...data };
|
|
1719
|
+
if (Array.isArray(out.results)) {
|
|
1720
|
+
out.result_count = resultCount ?? out.results.length;
|
|
1721
|
+
delete out.results;
|
|
1722
|
+
out.results_ref = 'structuredContent.results';
|
|
1723
|
+
}
|
|
1724
|
+
if (Array.isArray(out.hits)) {
|
|
1725
|
+
out.result_count = resultCount ?? out.hits.length;
|
|
1726
|
+
delete out.hits;
|
|
1727
|
+
out.results_ref = 'structuredContent.results';
|
|
1728
|
+
}
|
|
1729
|
+
if (Array.isArray(out.data)) {
|
|
1730
|
+
out.result_count = resultCount ?? out.data.length;
|
|
1731
|
+
delete out.data;
|
|
1732
|
+
out.results_ref = 'structuredContent.results';
|
|
1733
|
+
}
|
|
1734
|
+
return out;
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
function resultIdForHit(hit, index) {
|
|
1738
|
+
const directId = stringValue(hit?.result_id ?? hit?.resultId);
|
|
1739
|
+
if (directId) return directId;
|
|
1740
|
+
|
|
1741
|
+
const stream = streamForHit(hit);
|
|
1742
|
+
const recordId = stringValue(hit?.id ?? hit?.record_id ?? hit?.recordId ?? hit?.record_key ?? hit?.recordKey);
|
|
1743
|
+
if (stream && recordId) {
|
|
1744
|
+
return `${stream}:${recordId}`;
|
|
1745
|
+
}
|
|
1746
|
+
|
|
1747
|
+
const fallback = stringValue(hit?.id ?? hit?.url);
|
|
1748
|
+
return fallback || `result:${index + 1}`;
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
function streamForHit(hit) {
|
|
1752
|
+
return firstString(hit?.stream, hit?.stream_name, hit?.streamName);
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
function recordKeyForHit(hit) {
|
|
1756
|
+
return firstString(hit?.record_key, hit?.recordKey, hit?.record_id, hit?.recordId, hit?.id);
|
|
1757
|
+
}
|
|
1758
|
+
|
|
1759
|
+
function snippetForSearchHit(hit) {
|
|
1760
|
+
const snippet = objectValue(hit?.snippet);
|
|
1761
|
+
return firstString(
|
|
1762
|
+
snippet?.text,
|
|
1763
|
+
typeof hit?.snippet === 'string' ? hit.snippet : undefined,
|
|
1764
|
+
hit?.snippet_text,
|
|
1765
|
+
hit?.summary,
|
|
1766
|
+
hit?.text,
|
|
1767
|
+
);
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
function parseRecordResultId(id) {
|
|
1771
|
+
const value = requireSafeName(id, 'id');
|
|
1772
|
+
const separator = value.indexOf(':');
|
|
1773
|
+
if (separator <= 0 || separator === value.length - 1) {
|
|
1774
|
+
throw new Error('id must use stream:record_id format');
|
|
1775
|
+
}
|
|
1776
|
+
return {
|
|
1777
|
+
stream: requireSafeName(value.slice(0, separator), 'stream'),
|
|
1778
|
+
recordId: requireSafeName(value.slice(separator + 1), 'record_id'),
|
|
1779
|
+
};
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
function normalizeFetchedDocument(record, requestedId, providerUrl) {
|
|
1783
|
+
const payload = objectValue(record?.data);
|
|
1784
|
+
const id =
|
|
1785
|
+
stringValue(record?.id ?? record?.record_id ?? record?.recordId) ||
|
|
1786
|
+
stringValue(payload?.id ?? payload?.record_id ?? payload?.recordId) ||
|
|
1787
|
+
requestedId;
|
|
1788
|
+
const stream =
|
|
1789
|
+
stringValue(record?.stream ?? record?.stream_name ?? record?.streamName) ||
|
|
1790
|
+
stringValue(payload?.stream ?? payload?.stream_name ?? payload?.streamName);
|
|
1791
|
+
const resultId = stream && id && !requestedId.includes(':') ? `${stream}:${id}` : requestedId;
|
|
1792
|
+
const title = titleForFetchedRecord(record, payload, resultId);
|
|
1793
|
+
const text = textForFetchedRecord(record, payload);
|
|
1794
|
+
const url = urlForFetchedRecord(record, payload, resultId, providerUrl);
|
|
1795
|
+
const metadata = metadataForRecord(record, { id: resultId, title, url });
|
|
1796
|
+
return { id: resultId, title, text, url, metadata };
|
|
1797
|
+
}
|
|
1798
|
+
|
|
1799
|
+
function titleForFetchedRecord(record, payload, fallbackId) {
|
|
1800
|
+
const payloadTitle = payload ? titleForRecord(payload, '') : '';
|
|
1801
|
+
return payloadTitle || titleForRecord(record, '') || titleFromSourceIdentity(record, payload, fallbackId);
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
function titleForRecord(record, fallbackId) {
|
|
1805
|
+
return (
|
|
1806
|
+
stringValue(record?.title) ||
|
|
1807
|
+
stringValue(record?.name) ||
|
|
1808
|
+
stringValue(record?.subject) ||
|
|
1809
|
+
stringValue(record?.summary) ||
|
|
1810
|
+
fallbackId
|
|
1811
|
+
);
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
function titleForSearchHit(record, fallbackId, source = {}) {
|
|
1815
|
+
const explicit = titleForRecord(record, '');
|
|
1816
|
+
if (explicit) return explicit;
|
|
1817
|
+
const timestamp = titleTimestampForRecord(record);
|
|
1818
|
+
const label = source.displayName || source.connectorKey || source.connectionId;
|
|
1819
|
+
const parts = [label, source.stream, timestamp].filter(Boolean);
|
|
1820
|
+
return parts.length > 0 ? parts.join(' / ') : fallbackId;
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
function titleFromSourceIdentity(record, payload, fallbackId) {
|
|
1824
|
+
const source = objectValue(record?.source) || objectValue(payload?.source) || {};
|
|
1825
|
+
const label = firstString(
|
|
1826
|
+
record?.display_name,
|
|
1827
|
+
payload?.display_name,
|
|
1828
|
+
record?.connector_key,
|
|
1829
|
+
payload?.connector_key,
|
|
1830
|
+
record?.connector_id,
|
|
1831
|
+
payload?.connector_id,
|
|
1832
|
+
source.display_name,
|
|
1833
|
+
source.connector_key,
|
|
1834
|
+
source.connector_id,
|
|
1835
|
+
source.connection_id,
|
|
1836
|
+
);
|
|
1837
|
+
const stream = firstString(record?.stream, record?.stream_name, payload?.stream, payload?.stream_name, source.stream);
|
|
1838
|
+
const timestamp = titleTimestampForRecord(payload) || titleTimestampForRecord(record);
|
|
1839
|
+
const parts = [label, stream, timestamp].filter(Boolean);
|
|
1840
|
+
return parts.length > 0 ? parts.join(' / ') : fallbackId;
|
|
1841
|
+
}
|
|
1842
|
+
|
|
1843
|
+
function titleTimestampForRecord(record) {
|
|
1844
|
+
const nested = [
|
|
1845
|
+
objectValue(record?.data),
|
|
1846
|
+
objectValue(record?.record),
|
|
1847
|
+
objectValue(record?.metadata),
|
|
1848
|
+
objectValue(record?.source),
|
|
1849
|
+
].filter(Boolean);
|
|
1850
|
+
const authored = firstString(
|
|
1851
|
+
record?.sent_at,
|
|
1852
|
+
record?.sentAt,
|
|
1853
|
+
record?.authored_at,
|
|
1854
|
+
record?.authoredAt,
|
|
1855
|
+
record?.created_at,
|
|
1856
|
+
record?.createdAt,
|
|
1857
|
+
record?.source_created_at,
|
|
1858
|
+
record?.sourceCreatedAt,
|
|
1859
|
+
record?.occurred_at,
|
|
1860
|
+
record?.occurredAt,
|
|
1861
|
+
record?.updated_at,
|
|
1862
|
+
record?.updatedAt,
|
|
1863
|
+
...nested.flatMap((value) => [
|
|
1864
|
+
value.sent_at,
|
|
1865
|
+
value.sentAt,
|
|
1866
|
+
value.authored_at,
|
|
1867
|
+
value.authoredAt,
|
|
1868
|
+
value.created_at,
|
|
1869
|
+
value.createdAt,
|
|
1870
|
+
value.source_created_at,
|
|
1871
|
+
value.sourceCreatedAt,
|
|
1872
|
+
value.occurred_at,
|
|
1873
|
+
value.occurredAt,
|
|
1874
|
+
value.updated_at,
|
|
1875
|
+
value.updatedAt,
|
|
1876
|
+
]),
|
|
1877
|
+
);
|
|
1878
|
+
if (authored) return authored;
|
|
1879
|
+
return firstString(
|
|
1880
|
+
record?.emitted_at,
|
|
1881
|
+
record?.emittedAt,
|
|
1882
|
+
...nested.flatMap((value) => [
|
|
1883
|
+
value.emitted_at,
|
|
1884
|
+
value.emittedAt,
|
|
1885
|
+
]),
|
|
1886
|
+
);
|
|
1887
|
+
}
|
|
1888
|
+
|
|
1889
|
+
function textForFetchedRecord(record, payload) {
|
|
1890
|
+
const declared = (payload ? declaredTextForRecord(payload) : undefined) || declaredTextForRecord(record);
|
|
1891
|
+
if (declared) return declared;
|
|
1892
|
+
return fallbackTextForRecord(payload || record);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
// Hard ceiling on the JSON-stringify fallback for `fetch`'s `text` field. A
|
|
1896
|
+
// real declared text-like field (`text`/`content`/`body`/`summary`) is the
|
|
1897
|
+
// document text ChatGPT consumes and is returned verbatim and unbounded — that
|
|
1898
|
+
// is the contract. The fallback below only fires when a record declares NONE of
|
|
1899
|
+
// those fields; without a cap it pretty-prints an arbitrary structured record
|
|
1900
|
+
// into `text`, turning document fetch into a second record-read path. Bounding
|
|
1901
|
+
// only the fallback keeps `fetch` document-shaped while pointing agents to the
|
|
1902
|
+
// structured read tools for canonical records; no declared text is ever
|
|
1903
|
+
// truncated and no field an agent needs is dropped.
|
|
1904
|
+
const FETCH_TEXT_FALLBACK_CHAR_LIMIT = 1024;
|
|
1905
|
+
const FETCH_TEXT_FALLBACK_POINTER =
|
|
1906
|
+
'… [record has no text/content/body/summary field; use query_records or fetch(fields) for structured records]';
|
|
1907
|
+
|
|
1908
|
+
function textForRecord(record) {
|
|
1909
|
+
const declared = declaredTextForRecord(record);
|
|
1910
|
+
if (declared) return declared;
|
|
1911
|
+
return fallbackTextForRecord(record);
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
function declaredTextForRecord(record) {
|
|
1915
|
+
return (
|
|
1916
|
+
stringValue(record?.text) ||
|
|
1917
|
+
stringValue(record?.content) ||
|
|
1918
|
+
stringValue(record?.body) ||
|
|
1919
|
+
stringValue(record?.summary)
|
|
1920
|
+
);
|
|
1921
|
+
}
|
|
1922
|
+
|
|
1923
|
+
function fallbackTextForRecord(record) {
|
|
1924
|
+
const serialized = JSON.stringify(record, null, 2);
|
|
1925
|
+
if (serialized.length <= FETCH_TEXT_FALLBACK_CHAR_LIMIT) return serialized;
|
|
1926
|
+
const head = FETCH_TEXT_FALLBACK_CHAR_LIMIT - FETCH_TEXT_FALLBACK_POINTER.length;
|
|
1927
|
+
return `${serialized.slice(0, Math.max(0, head))}${FETCH_TEXT_FALLBACK_POINTER}`;
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
function urlForFetchedRecord(record, payload, fallbackId, providerUrl) {
|
|
1931
|
+
const directUrl = firstString(
|
|
1932
|
+
payload?.url,
|
|
1933
|
+
payload?.record_url,
|
|
1934
|
+
payload?.recordUrl,
|
|
1935
|
+
payload?.href,
|
|
1936
|
+
payload?.source_url,
|
|
1937
|
+
payload?.sourceUrl,
|
|
1938
|
+
record?.url,
|
|
1939
|
+
record?.record_url,
|
|
1940
|
+
record?.recordUrl,
|
|
1941
|
+
record?.href,
|
|
1942
|
+
record?.source_url,
|
|
1943
|
+
record?.sourceUrl,
|
|
1944
|
+
);
|
|
1945
|
+
if (directUrl) return directUrl;
|
|
1946
|
+
return urlForRecord(record, fallbackId, providerUrl);
|
|
1947
|
+
}
|
|
1948
|
+
|
|
1949
|
+
function urlForRecord(record, fallbackId, providerUrl) {
|
|
1950
|
+
const directUrl = stringValue(record?.url ?? record?.record_url ?? record?.recordUrl ?? record?.href ?? record?.source_url ?? record?.sourceUrl);
|
|
1951
|
+
if (directUrl) return directUrl;
|
|
1952
|
+
if (providerUrl && fallbackId) {
|
|
1953
|
+
const recordRef = parseRecordResultIdOrNull(fallbackId);
|
|
1954
|
+
if (recordRef) {
|
|
1955
|
+
const base = providerUrl.replace(/\/$/, '');
|
|
1956
|
+
return `${base}/v1/streams/${encodeURIComponent(recordRef.stream)}/records/${encodeURIComponent(recordRef.recordId)}`;
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
return `pdpp://record/${encodeURIComponent(fallbackId)}`;
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
function parseRecordResultIdOrNull(id) {
|
|
1963
|
+
try {
|
|
1964
|
+
return parseRecordResultId(id);
|
|
1965
|
+
} catch {
|
|
1966
|
+
return null;
|
|
1967
|
+
}
|
|
1968
|
+
}
|
|
1969
|
+
|
|
1970
|
+
function metadataForRecord(record, omitted) {
|
|
1971
|
+
if (!record || typeof record !== 'object') {
|
|
1972
|
+
return {};
|
|
1973
|
+
}
|
|
1974
|
+
const metadata = {};
|
|
1975
|
+
if (record.metadata && typeof record.metadata === 'object' && !Array.isArray(record.metadata)) {
|
|
1976
|
+
for (const [key, value] of Object.entries(record.metadata)) {
|
|
1977
|
+
if (isDocumentMetadataValue(value)) metadata[key] = value;
|
|
1978
|
+
}
|
|
1979
|
+
}
|
|
1980
|
+
const payload = objectValue(record.data);
|
|
1981
|
+
if (payload) {
|
|
1982
|
+
for (const key of [
|
|
1983
|
+
'stream',
|
|
1984
|
+
'stream_name',
|
|
1985
|
+
'streamName',
|
|
1986
|
+
'connection_id',
|
|
1987
|
+
'connector_key',
|
|
1988
|
+
'connector_id',
|
|
1989
|
+
'display_name',
|
|
1990
|
+
]) {
|
|
1991
|
+
if (metadata[key] === undefined && payload[key] !== undefined) {
|
|
1992
|
+
metadata[key] = payload[key];
|
|
1993
|
+
}
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
for (const [key, value] of Object.entries(record)) {
|
|
1997
|
+
if (['metadata', 'data', 'text', 'content', 'body'].includes(key)) continue;
|
|
1998
|
+
if (isOmittedDocumentField(key, value, omitted)) continue;
|
|
1999
|
+
if (!FETCH_METADATA_RECORD_KEYS.has(key)) continue;
|
|
2000
|
+
if (!isDocumentMetadataValue(value)) continue;
|
|
2001
|
+
metadata[key] = value;
|
|
2002
|
+
}
|
|
2003
|
+
return metadata;
|
|
2004
|
+
}
|
|
2005
|
+
|
|
2006
|
+
function isOmittedDocumentField(key, value, omitted) {
|
|
2007
|
+
if (['id', 'record_id', 'recordId'].includes(key)) return value === omitted.id;
|
|
2008
|
+
if (key === 'title') return value === omitted.title;
|
|
2009
|
+
if (['url', 'record_url', 'recordUrl', 'href', 'source_url', 'sourceUrl'].includes(key)) return value === omitted.url;
|
|
2010
|
+
return false;
|
|
2011
|
+
}
|
|
2012
|
+
|
|
2013
|
+
const FETCH_METADATA_RECORD_KEYS = new Set([
|
|
2014
|
+
'object',
|
|
2015
|
+
'id',
|
|
2016
|
+
'record_id',
|
|
2017
|
+
'recordId',
|
|
2018
|
+
'stream',
|
|
2019
|
+
'stream_name',
|
|
2020
|
+
'streamName',
|
|
2021
|
+
'connection_id',
|
|
2022
|
+
'connector_key',
|
|
2023
|
+
'connector_id',
|
|
2024
|
+
'display_name',
|
|
2025
|
+
'emitted_at',
|
|
2026
|
+
'emittedAt',
|
|
2027
|
+
'sent_at',
|
|
2028
|
+
'sentAt',
|
|
2029
|
+
'created_at',
|
|
2030
|
+
'createdAt',
|
|
2031
|
+
'updated_at',
|
|
2032
|
+
'updatedAt',
|
|
2033
|
+
]);
|
|
2034
|
+
|
|
2035
|
+
function isDocumentMetadataValue(value) {
|
|
2036
|
+
return value === null || ['string', 'number', 'boolean'].includes(typeof value);
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
function stringValue(value) {
|
|
2040
|
+
return typeof value === 'string' && value.length > 0 ? value : undefined;
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
function errorToolResult(response, providerUrl) {
|
|
2044
|
+
const error = response.error ?? {
|
|
2045
|
+
type: 'rs_error',
|
|
2046
|
+
code: `http_${response.status}`,
|
|
2047
|
+
message: `Resource server returned HTTP ${response.status}`,
|
|
2048
|
+
};
|
|
2049
|
+
return {
|
|
2050
|
+
isError: true,
|
|
2051
|
+
content: [
|
|
2052
|
+
{
|
|
2053
|
+
type: 'text',
|
|
2054
|
+
text: JSON.stringify(error, null, 2),
|
|
2055
|
+
},
|
|
2056
|
+
],
|
|
2057
|
+
structuredContent: {
|
|
2058
|
+
error,
|
|
2059
|
+
provider_url: providerUrl,
|
|
2060
|
+
http_status: response.status,
|
|
2061
|
+
request_id: response.requestId,
|
|
2062
|
+
},
|
|
2063
|
+
};
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
export const __internal = {
|
|
2067
|
+
requireSafeName,
|
|
2068
|
+
pickQuery,
|
|
2069
|
+
toToolResult,
|
|
2070
|
+
toSearchToolResult,
|
|
2071
|
+
toFetchToolResult,
|
|
2072
|
+
resolveStreamName,
|
|
2073
|
+
resolveSchemaDetail,
|
|
2074
|
+
};
|