@speakspec/astro 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +193 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +12 -0
- package/dist/middleware/index.d.ts +1 -0
- package/dist/middleware/index.js +2 -0
- package/dist/runtime/config.d.ts +46 -0
- package/dist/runtime/config.js +81 -0
- package/dist/runtime/middleware/ai-bot-detect.d.ts +2 -0
- package/dist/runtime/middleware/ai-bot-detect.js +73 -0
- package/dist/runtime/server/cache-store.d.ts +8 -0
- package/dist/runtime/server/cache-store.js +27 -0
- package/dist/runtime/server/routes/webhook.d.ts +2 -0
- package/dist/runtime/server/routes/webhook.js +90 -0
- package/dist/runtime/server/routes/well-known-aidp.d.ts +2 -0
- package/dist/runtime/server/routes/well-known-aidp.js +79 -0
- package/dist/runtime/server/routes/well-known-content.d.ts +2 -0
- package/dist/runtime/server/routes/well-known-content.js +84 -0
- package/dist/runtime/server/routes/well-known-directory.d.ts +2 -0
- package/dist/runtime/server/routes/well-known-directory.js +99 -0
- package/dist/runtime/server/utils/aidp-verify.d.ts +152 -0
- package/dist/runtime/server/utils/aidp-verify.js +332 -0
- package/dist/runtime/server/utils/bot-detect.d.ts +26 -0
- package/dist/runtime/server/utils/bot-detect.js +75 -0
- package/dist/runtime/server/utils/cache.d.ts +35 -0
- package/dist/runtime/server/utils/cache.js +80 -0
- package/dist/runtime/server/utils/content-registry.d.ts +3 -0
- package/dist/runtime/server/utils/content-registry.js +24 -0
- package/dist/runtime/server/utils/fetch-content.d.ts +14 -0
- package/dist/runtime/server/utils/fetch-content.js +53 -0
- package/dist/runtime/server/utils/fetch-directive.d.ts +21 -0
- package/dist/runtime/server/utils/fetch-directive.js +52 -0
- package/dist/runtime/server/utils/fetch-directory.d.ts +21 -0
- package/dist/runtime/server/utils/fetch-directory.js +59 -0
- package/dist/runtime/server/utils/hmac-verify.d.ts +37 -0
- package/dist/runtime/server/utils/hmac-verify.js +63 -0
- package/dist/runtime/server/utils/impression-queue.d.ts +33 -0
- package/dist/runtime/server/utils/impression-queue.js +145 -0
- package/dist/runtime/server/utils/query.d.ts +14 -0
- package/dist/runtime/server/utils/query.js +33 -0
- package/dist/runtime/version.d.ts +2 -0
- package/dist/runtime/version.js +2 -0
- package/package.json +62 -0
- package/src/components/AidpContent.astro +23 -0
- package/src/components/AidpLinks.astro +10 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Crawler "source" buckets. Logged alongside the matched label so
|
|
3
|
+
* observability dashboards can filter by trust provider without
|
|
4
|
+
* maintaining a separate mapping table.
|
|
5
|
+
*/
|
|
6
|
+
export type CrawlerSource = 'openai' | 'anthropic' | 'perplexity' | 'google' | 'commoncrawl' | 'bytedance' | 'cohere' | 'diffbot' | 'apple' | 'meta';
|
|
7
|
+
export interface CrawlerMatch {
|
|
8
|
+
/** Lowercased label identifying the matched crawler. */
|
|
9
|
+
label: string;
|
|
10
|
+
/** Trust provider / vendor bucket — useful for log-side aggregation. */
|
|
11
|
+
source: CrawlerSource;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Returns the matched crawler label when `ua` looks like a known AI
|
|
15
|
+
* bot, or `null` otherwise. Empty / undefined input always returns
|
|
16
|
+
* null (we don't classify unknown UAs).
|
|
17
|
+
*/
|
|
18
|
+
export declare function detectAICrawler(ua: string | null | undefined): CrawlerMatch | null;
|
|
19
|
+
/** Returns true when `ua` matches any known AI crawler pattern. */
|
|
20
|
+
export declare function isAICrawler(ua: string | null | undefined): boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Returns true when `path` matches any of the `excludePaths` prefixes.
|
|
23
|
+
* Comparison is exact-prefix; trailing slashes on the configured
|
|
24
|
+
* prefix are tolerated.
|
|
25
|
+
*/
|
|
26
|
+
export declare function isExcludedPath(path: string, excludePaths?: string[]): boolean;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Pure helper: classify a User-Agent string as an AI crawler when it
|
|
2
|
+
// matches one of the known patterns. Used by the opt-in middleware
|
|
3
|
+
// at `src/runtime/server/middleware/ai-bot-detect.ts` to surface
|
|
4
|
+
// structured impressions for AI traffic.
|
|
5
|
+
//
|
|
6
|
+
// Patterns ordered most-specific-first to keep the match label
|
|
7
|
+
// useful: e.g. `GPTBot/1.0` returns `gptbot`, not the generic
|
|
8
|
+
// `chatgpt-user`. Patterns are matched case-insensitively.
|
|
9
|
+
//
|
|
10
|
+
// Sources for the canonical UA strings:
|
|
11
|
+
// - OpenAI: https://platform.openai.com/docs/bots
|
|
12
|
+
// - Anthropic: https://docs.anthropic.com/en/docs/about-claude/claude-bot
|
|
13
|
+
// - Google AI: https://developers.google.com/search/docs/crawling-indexing/google-extended
|
|
14
|
+
// - Perplexity: https://docs.perplexity.ai/guides/bots
|
|
15
|
+
// - Common Crawl + Bytedance / Cohere / Diffbot: industry references
|
|
16
|
+
const PATTERNS = [
|
|
17
|
+
// OpenAI
|
|
18
|
+
{ label: 'gptbot', source: 'openai', regex: /\bGPTBot\b/i },
|
|
19
|
+
{ label: 'chatgpt-user', source: 'openai', regex: /\bChatGPT-User\b/i },
|
|
20
|
+
{ label: 'oai-searchbot', source: 'openai', regex: /\bOAI-SearchBot\b/i },
|
|
21
|
+
// Anthropic
|
|
22
|
+
{ label: 'claudebot', source: 'anthropic', regex: /\bClaudeBot\b/i },
|
|
23
|
+
{ label: 'claude-web', source: 'anthropic', regex: /\bClaude-Web\b/i },
|
|
24
|
+
{ label: 'anthropic-ai', source: 'anthropic', regex: /\bAnthropic-AI\b/i },
|
|
25
|
+
// Perplexity
|
|
26
|
+
{ label: 'perplexitybot', source: 'perplexity', regex: /\bPerplexityBot\b/i },
|
|
27
|
+
// Google AI
|
|
28
|
+
{ label: 'google-extended', source: 'google', regex: /\bGoogle-Extended\b/i },
|
|
29
|
+
// Common Crawl (training data)
|
|
30
|
+
{ label: 'ccbot', source: 'commoncrawl', regex: /\bCCBot\b/i },
|
|
31
|
+
// ByteDance
|
|
32
|
+
{ label: 'bytespider', source: 'bytedance', regex: /\bBytespider\b/i },
|
|
33
|
+
// Cohere
|
|
34
|
+
{ label: 'cohere-ai', source: 'cohere', regex: /\bcohere-ai\b/i },
|
|
35
|
+
// Diffbot (used by some LLM data pipelines)
|
|
36
|
+
{ label: 'diffbot', source: 'diffbot', regex: /\bDiffbot\b/i },
|
|
37
|
+
// Apple
|
|
38
|
+
{ label: 'applebot-extended', source: 'apple', regex: /\bApplebot-Extended\b/i },
|
|
39
|
+
// Meta
|
|
40
|
+
{ label: 'meta-externalagent', source: 'meta', regex: /\bmeta-externalagent\b/i },
|
|
41
|
+
];
|
|
42
|
+
/**
|
|
43
|
+
* Returns the matched crawler label when `ua` looks like a known AI
|
|
44
|
+
* bot, or `null` otherwise. Empty / undefined input always returns
|
|
45
|
+
* null (we don't classify unknown UAs).
|
|
46
|
+
*/
|
|
47
|
+
export function detectAICrawler(ua) {
|
|
48
|
+
if (!ua)
|
|
49
|
+
return null;
|
|
50
|
+
for (const { label, source, regex } of PATTERNS) {
|
|
51
|
+
if (regex.test(ua)) {
|
|
52
|
+
return { label, source };
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
/** Returns true when `ua` matches any known AI crawler pattern. */
|
|
58
|
+
export function isAICrawler(ua) {
|
|
59
|
+
return detectAICrawler(ua) !== null;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Returns true when `path` matches any of the `excludePaths` prefixes.
|
|
63
|
+
* Comparison is exact-prefix; trailing slashes on the configured
|
|
64
|
+
* prefix are tolerated.
|
|
65
|
+
*/
|
|
66
|
+
export function isExcludedPath(path, excludePaths = []) {
|
|
67
|
+
for (const raw of excludePaths) {
|
|
68
|
+
if (!raw)
|
|
69
|
+
continue;
|
|
70
|
+
const prefix = raw.endsWith('/') ? raw : raw + '/';
|
|
71
|
+
if (path === raw || path.startsWith(prefix))
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export interface CachedBundle<T = unknown> {
|
|
2
|
+
payload: T;
|
|
3
|
+
etag: string;
|
|
4
|
+
expiresAt: number;
|
|
5
|
+
}
|
|
6
|
+
export declare const DEFAULT_CACHE_TTL_MS: number;
|
|
7
|
+
export declare function cacheKey(scope: string, id: string): string;
|
|
8
|
+
export declare function isFresh<T>(bundle: CachedBundle<T> | null): boolean;
|
|
9
|
+
/**
|
|
10
|
+
* RFC 7232 §2.3.2 weak comparison. Strips any `W/` prefix on either
|
|
11
|
+
* side before comparing the opaque value. Wildcard `*` is NOT
|
|
12
|
+
* supported (treated as literal); AIDP agents send specific tags.
|
|
13
|
+
*/
|
|
14
|
+
export declare function etagMatches(inbound: string | undefined | null, current: string | undefined | null): boolean;
|
|
15
|
+
/**
|
|
16
|
+
* True when `err` looks like a fetch failure with a 4xx status.
|
|
17
|
+
* Used by route handlers to distinguish operator-action errors
|
|
18
|
+
* (bad apiKey, removed entity) from transient outages — the former
|
|
19
|
+
* surface as 502 with detail; the latter serve stale.
|
|
20
|
+
*/
|
|
21
|
+
export declare function isUpstream4xx(err: unknown): boolean;
|
|
22
|
+
export interface CacheStorage {
|
|
23
|
+
removeItem: (key: string) => Promise<unknown>;
|
|
24
|
+
getKeys: (base: string) => Promise<string[]>;
|
|
25
|
+
}
|
|
26
|
+
export declare function invalidateEntityCache(storage: CacheStorage, slug: string): Promise<void>;
|
|
27
|
+
export declare function invalidateContentCache(storage: CacheStorage, slug: string, contentId: string): Promise<void>;
|
|
28
|
+
/**
|
|
29
|
+
* Build a JSON Response with cache headers, short-circuiting to 304
|
|
30
|
+
* when the inbound `If-None-Match` matches the response ETag (per
|
|
31
|
+
* AIDP §8.7 + RFC 7232 §2.3.2 weak comparison via `etagMatches`).
|
|
32
|
+
*
|
|
33
|
+
* 304 responses still carry ETag + Cache-Control per RFC 7232 §4.1.
|
|
34
|
+
*/
|
|
35
|
+
export declare function respondWithCache<T>(etag: string, payload: T, cacheControl: string, inboundIfNoneMatch: string | undefined | null): Response;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
// Pure cache helpers for the AIDP SDK's framework-agnostic route
|
|
2
|
+
// handlers (used by both `@speakspec/next` and `@speakspec/astro`).
|
|
3
|
+
//
|
|
4
|
+
// Storage IO is pluggable — these hosts don't expose a built-in
|
|
5
|
+
// storage primitive like Nitro's `useStorage`, so this module stays
|
|
6
|
+
// framework-free and accepts any object satisfying the `CacheStorage`
|
|
7
|
+
// interface. Default implementations (in-memory map, fs-backed, Redis)
|
|
8
|
+
// are exposed from `./cache-store`.
|
|
9
|
+
export const DEFAULT_CACHE_TTL_MS = 5 * 60 * 1000;
|
|
10
|
+
export function cacheKey(scope, id) {
|
|
11
|
+
return `${scope}:${id}`;
|
|
12
|
+
}
|
|
13
|
+
export function isFresh(bundle) {
|
|
14
|
+
return !!bundle && bundle.expiresAt > Date.now();
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* RFC 7232 §2.3.2 weak comparison. Strips any `W/` prefix on either
|
|
18
|
+
* side before comparing the opaque value. Wildcard `*` is NOT
|
|
19
|
+
* supported (treated as literal); AIDP agents send specific tags.
|
|
20
|
+
*/
|
|
21
|
+
export function etagMatches(inbound, current) {
|
|
22
|
+
if (!inbound || !current)
|
|
23
|
+
return false;
|
|
24
|
+
const norm = (e) => (e.startsWith('W/') ? e.slice(2) : e).trim();
|
|
25
|
+
return norm(inbound) === norm(current);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* True when `err` looks like a fetch failure with a 4xx status.
|
|
29
|
+
* Used by route handlers to distinguish operator-action errors
|
|
30
|
+
* (bad apiKey, removed entity) from transient outages — the former
|
|
31
|
+
* surface as 502 with detail; the latter serve stale.
|
|
32
|
+
*/
|
|
33
|
+
export function isUpstream4xx(err) {
|
|
34
|
+
if (!err || typeof err !== 'object')
|
|
35
|
+
return false;
|
|
36
|
+
const status = err.response?.status
|
|
37
|
+
?? err.statusCode;
|
|
38
|
+
return typeof status === 'number' && status >= 400 && status < 500;
|
|
39
|
+
}
|
|
40
|
+
export async function invalidateEntityCache(storage, slug) {
|
|
41
|
+
await storage.removeItem(cacheKey('entity', slug));
|
|
42
|
+
for (const prefix of [cacheKey('content', `${slug}:`), cacheKey('directory', `${slug}:`)]) {
|
|
43
|
+
const keys = await storage.getKeys(prefix);
|
|
44
|
+
for (const key of keys) {
|
|
45
|
+
await storage.removeItem(key);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
export async function invalidateContentCache(storage, slug, contentId) {
|
|
50
|
+
await storage.removeItem(cacheKey('content', `${slug}:${contentId}`));
|
|
51
|
+
const dirPrefix = cacheKey('directory', `${slug}:`);
|
|
52
|
+
const keys = await storage.getKeys(dirPrefix);
|
|
53
|
+
for (const key of keys) {
|
|
54
|
+
await storage.removeItem(key);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Build a JSON Response with cache headers, short-circuiting to 304
|
|
59
|
+
* when the inbound `If-None-Match` matches the response ETag (per
|
|
60
|
+
* AIDP §8.7 + RFC 7232 §2.3.2 weak comparison via `etagMatches`).
|
|
61
|
+
*
|
|
62
|
+
* 304 responses still carry ETag + Cache-Control per RFC 7232 §4.1.
|
|
63
|
+
*/
|
|
64
|
+
export function respondWithCache(etag, payload, cacheControl, inboundIfNoneMatch) {
|
|
65
|
+
const headers = {
|
|
66
|
+
'cache-control': cacheControl,
|
|
67
|
+
};
|
|
68
|
+
if (etag)
|
|
69
|
+
headers.etag = etag;
|
|
70
|
+
if (etagMatches(inboundIfNoneMatch, etag)) {
|
|
71
|
+
return new Response(null, { status: 304, headers });
|
|
72
|
+
}
|
|
73
|
+
return new Response(JSON.stringify(payload), {
|
|
74
|
+
status: 200,
|
|
75
|
+
headers: {
|
|
76
|
+
...headers,
|
|
77
|
+
'content-type': 'application/json; charset=utf-8',
|
|
78
|
+
},
|
|
79
|
+
});
|
|
80
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// In-memory map: path → content_id, populated by the SDK's content
|
|
2
|
+
// helper on SSR-render and read by the bot-detect middleware on
|
|
3
|
+
// subsequent crawler requests so impressions can be enriched with
|
|
4
|
+
// content_id.
|
|
5
|
+
//
|
|
6
|
+
// First-request limitation: the middleware fires BEFORE the page
|
|
7
|
+
// renders, so the very first hit on a path has no registered
|
|
8
|
+
// content_id. AI agents typically revisit; later hits get enriched.
|
|
9
|
+
//
|
|
10
|
+
// Module-scoped Map persists for the lifetime of the host process.
|
|
11
|
+
// In serverless cold-start scenarios the registry resets — acceptable
|
|
12
|
+
// for the analytics signal.
|
|
13
|
+
const pathToContentId = new Map();
|
|
14
|
+
export function registerContent(path, contentId) {
|
|
15
|
+
if (!path || !contentId)
|
|
16
|
+
return;
|
|
17
|
+
pathToContentId.set(path, contentId);
|
|
18
|
+
}
|
|
19
|
+
export function lookupContentId(path) {
|
|
20
|
+
return pathToContentId.get(path);
|
|
21
|
+
}
|
|
22
|
+
export function clearContentRegistry() {
|
|
23
|
+
pathToContentId.clear();
|
|
24
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export interface FetchContentOptions {
|
|
2
|
+
endpoint: string;
|
|
3
|
+
entityId: string;
|
|
4
|
+
contentId: string;
|
|
5
|
+
apiKey?: string;
|
|
6
|
+
ifNoneMatch?: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
}
|
|
9
|
+
export interface FetchContentResult {
|
|
10
|
+
payload: Record<string, unknown> | null;
|
|
11
|
+
etag: string;
|
|
12
|
+
notModified: boolean;
|
|
13
|
+
}
|
|
14
|
+
export declare function fetchContentEnvelope(opts: FetchContentOptions): Promise<FetchContentResult>;
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// Fetches a signed Content envelope (§8.7) from SpeakSpec for a single
|
|
2
|
+
// (entity, content) pair. The envelope already contains the directive
|
|
3
|
+
// overlay, body, media, links, and (when an active signing key exists
|
|
4
|
+
// upstream) a _proof. The SDK serves the result at
|
|
5
|
+
// /.well-known/aidp/content/{id}.json on the customer's own domain.
|
|
6
|
+
//
|
|
7
|
+
// SpeakSpec's public per-content envelope route is:
|
|
8
|
+
// GET {endpoint}/public/entity/{entityId}/content/{contentId}/publish.json
|
|
9
|
+
//
|
|
10
|
+
// Authentication is OPTIONAL — the upstream endpoint accepts requests
|
|
11
|
+
// without an API key, but we attach Authorization when configured so
|
|
12
|
+
// usage shows up under the customer's account in SpeakSpec analytics.
|
|
13
|
+
import { ofetch, FetchError } from 'ofetch';
|
|
14
|
+
import { SDK_USER_AGENT } from '../../version';
|
|
15
|
+
export async function fetchContentEnvelope(opts) {
|
|
16
|
+
const url = `${stripTrailingSlash(opts.endpoint)}/public/entity/${encodeURIComponent(opts.entityId)}/content/${encodeURIComponent(opts.contentId)}/publish.json`;
|
|
17
|
+
const headers = {
|
|
18
|
+
'User-Agent': SDK_USER_AGENT,
|
|
19
|
+
'Accept': 'application/json',
|
|
20
|
+
};
|
|
21
|
+
if (opts.apiKey) {
|
|
22
|
+
headers.Authorization = `Bearer ${opts.apiKey}`;
|
|
23
|
+
}
|
|
24
|
+
if (opts.ifNoneMatch) {
|
|
25
|
+
headers['If-None-Match'] = opts.ifNoneMatch;
|
|
26
|
+
}
|
|
27
|
+
try {
|
|
28
|
+
const response = await ofetch.raw(url, {
|
|
29
|
+
method: 'GET',
|
|
30
|
+
headers,
|
|
31
|
+
retry: 0,
|
|
32
|
+
timeout: opts.timeoutMs ?? 5000,
|
|
33
|
+
});
|
|
34
|
+
return {
|
|
35
|
+
payload: response._data ?? null,
|
|
36
|
+
etag: response.headers.get('etag') ?? '',
|
|
37
|
+
notModified: false,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
catch (err) {
|
|
41
|
+
if (err instanceof FetchError && err.response?.status === 304) {
|
|
42
|
+
return {
|
|
43
|
+
payload: null,
|
|
44
|
+
etag: err.response.headers.get('etag') ?? opts.ifNoneMatch ?? '',
|
|
45
|
+
notModified: true,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
throw err;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function stripTrailingSlash(s) {
|
|
52
|
+
return s.endsWith('/') ? s.slice(0, -1) : s;
|
|
53
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export interface FetchOptions {
|
|
2
|
+
endpoint: string;
|
|
3
|
+
entityId: string;
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
/** Existing ETag, sent as If-None-Match for conditional fetch. */
|
|
6
|
+
ifNoneMatch?: string;
|
|
7
|
+
/** SSR-time fetch budget. Defaults to 5s — long enough to absorb
|
|
8
|
+
* typical TLS/DNS latency, short enough that a slow upstream cannot
|
|
9
|
+
* block downstream rendering past one normal request budget.
|
|
10
|
+
*/
|
|
11
|
+
timeoutMs?: number;
|
|
12
|
+
}
|
|
13
|
+
export interface FetchResult {
|
|
14
|
+
/** Parsed AIDP entity directive payload, OR null on 304. */
|
|
15
|
+
payload: Record<string, unknown> | null;
|
|
16
|
+
/** Server-issued ETag for next round-trip; empty string if missing. */
|
|
17
|
+
etag: string;
|
|
18
|
+
/** True when the upstream returned 304 — caller keeps cached payload. */
|
|
19
|
+
notModified: boolean;
|
|
20
|
+
}
|
|
21
|
+
export declare function fetchEntityDirective(opts: FetchOptions): Promise<FetchResult>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
// Fetches the customer's entity directive from SpeakSpec. The SDK
|
|
2
|
+
// serves the result at /.well-known/aidp.json on the customer's own
|
|
3
|
+
// domain (per AIDP 0.3 §8.5). All fetches are SSR-time only — never
|
|
4
|
+
// build-time baked, so directive changes propagate within one cache
|
|
5
|
+
// TTL plus eventual webhook invalidation (Step 3.1.5).
|
|
6
|
+
//
|
|
7
|
+
// SpeakSpec's public entity-directive route is:
|
|
8
|
+
// GET {endpoint}/public/entity/{entityId}
|
|
9
|
+
// Authentication is OPTIONAL (the route is publicly readable); we
|
|
10
|
+
// pass an Authorization header anyway so usage shows up under the
|
|
11
|
+
// customer's API key in SpeakSpec analytics.
|
|
12
|
+
import { ofetch, FetchError } from 'ofetch';
|
|
13
|
+
import { SDK_USER_AGENT } from '../../version';
|
|
14
|
+
export async function fetchEntityDirective(opts) {
|
|
15
|
+
const url = `${stripTrailingSlash(opts.endpoint)}/public/entity/${encodeURIComponent(opts.entityId)}`;
|
|
16
|
+
const headers = {
|
|
17
|
+
'User-Agent': SDK_USER_AGENT,
|
|
18
|
+
'Accept': 'application/json',
|
|
19
|
+
};
|
|
20
|
+
if (opts.apiKey) {
|
|
21
|
+
headers.Authorization = `Bearer ${opts.apiKey}`;
|
|
22
|
+
}
|
|
23
|
+
if (opts.ifNoneMatch) {
|
|
24
|
+
headers['If-None-Match'] = opts.ifNoneMatch;
|
|
25
|
+
}
|
|
26
|
+
try {
|
|
27
|
+
const response = await ofetch.raw(url, {
|
|
28
|
+
method: 'GET',
|
|
29
|
+
headers,
|
|
30
|
+
retry: 0,
|
|
31
|
+
timeout: opts.timeoutMs ?? 5000,
|
|
32
|
+
});
|
|
33
|
+
return {
|
|
34
|
+
payload: response._data ?? null,
|
|
35
|
+
etag: response.headers.get('etag') ?? '',
|
|
36
|
+
notModified: false,
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
if (err instanceof FetchError && err.response?.status === 304) {
|
|
41
|
+
return {
|
|
42
|
+
payload: null,
|
|
43
|
+
etag: err.response.headers.get('etag') ?? opts.ifNoneMatch ?? '',
|
|
44
|
+
notModified: true,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
throw err;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
function stripTrailingSlash(s) {
|
|
51
|
+
return s.endsWith('/') ? s.slice(0, -1) : s;
|
|
52
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export interface FetchDirectoryOptions {
|
|
2
|
+
endpoint: string;
|
|
3
|
+
entityId: string;
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
page?: number;
|
|
6
|
+
pageSize?: number;
|
|
7
|
+
contentType?: string;
|
|
8
|
+
/** §8.8 optional filter: BCP 47 language tag. */
|
|
9
|
+
language?: string;
|
|
10
|
+
/** §8.8 optional filter: RFC 3339 timestamp; only items updated
|
|
11
|
+
* strictly after this point are returned. */
|
|
12
|
+
updatedSince?: string;
|
|
13
|
+
ifNoneMatch?: string;
|
|
14
|
+
timeoutMs?: number;
|
|
15
|
+
}
|
|
16
|
+
export interface FetchDirectoryResult {
|
|
17
|
+
payload: Record<string, unknown> | null;
|
|
18
|
+
etag: string;
|
|
19
|
+
notModified: boolean;
|
|
20
|
+
}
|
|
21
|
+
export declare function fetchContentDirectory(opts: FetchDirectoryOptions): Promise<FetchDirectoryResult>;
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
// Fetches the paginated content directory (§8.8) from SpeakSpec.
|
|
2
|
+
// SDK serves the result at /.well-known/aidp/content/ on the
|
|
3
|
+
// customer's own domain. The directory itself is not signed (per the
|
|
4
|
+
// proposal, security boundary is per-content envelopes); this fetcher
|
|
5
|
+
// is a thin proxy with the same User-Agent / auth / 304 handling as
|
|
6
|
+
// the entity-directive and content-envelope fetchers.
|
|
7
|
+
import { ofetch, FetchError } from 'ofetch';
|
|
8
|
+
import { SDK_USER_AGENT } from '../../version';
|
|
9
|
+
export async function fetchContentDirectory(opts) {
|
|
10
|
+
const base = `${stripTrailingSlash(opts.endpoint)}/public/entity/${encodeURIComponent(opts.entityId)}/content/directory.json`;
|
|
11
|
+
const params = new URLSearchParams();
|
|
12
|
+
if (typeof opts.page === 'number')
|
|
13
|
+
params.set('page', String(opts.page));
|
|
14
|
+
if (typeof opts.pageSize === 'number')
|
|
15
|
+
params.set('page_size', String(opts.pageSize));
|
|
16
|
+
if (opts.contentType)
|
|
17
|
+
params.set('type', opts.contentType);
|
|
18
|
+
if (opts.language)
|
|
19
|
+
params.set('language', opts.language);
|
|
20
|
+
if (opts.updatedSince)
|
|
21
|
+
params.set('updated_since', opts.updatedSince);
|
|
22
|
+
const url = params.size > 0 ? `${base}?${params.toString()}` : base;
|
|
23
|
+
const headers = {
|
|
24
|
+
'User-Agent': SDK_USER_AGENT,
|
|
25
|
+
'Accept': 'application/json',
|
|
26
|
+
};
|
|
27
|
+
if (opts.apiKey) {
|
|
28
|
+
headers.Authorization = `Bearer ${opts.apiKey}`;
|
|
29
|
+
}
|
|
30
|
+
if (opts.ifNoneMatch) {
|
|
31
|
+
headers['If-None-Match'] = opts.ifNoneMatch;
|
|
32
|
+
}
|
|
33
|
+
try {
|
|
34
|
+
const response = await ofetch.raw(url, {
|
|
35
|
+
method: 'GET',
|
|
36
|
+
headers,
|
|
37
|
+
retry: 0,
|
|
38
|
+
timeout: opts.timeoutMs ?? 5000,
|
|
39
|
+
});
|
|
40
|
+
return {
|
|
41
|
+
payload: response._data ?? null,
|
|
42
|
+
etag: response.headers.get('etag') ?? '',
|
|
43
|
+
notModified: false,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
if (err instanceof FetchError && err.response?.status === 304) {
|
|
48
|
+
return {
|
|
49
|
+
payload: null,
|
|
50
|
+
etag: err.response.headers.get('etag') ?? opts.ifNoneMatch ?? '',
|
|
51
|
+
notModified: true,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
throw err;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function stripTrailingSlash(s) {
|
|
58
|
+
return s.endsWith('/') ? s.slice(0, -1) : s;
|
|
59
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
export interface VerifyOptions {
|
|
2
|
+
/** Pre-shared secret from `runtimeConfig.speakspec.webhookSecret`. */
|
|
3
|
+
secret: string;
|
|
4
|
+
/** `X-AIDP-Timestamp` header value (RFC 3339). */
|
|
5
|
+
timestamp: string;
|
|
6
|
+
/** Raw request body bytes (UTF-8 string). */
|
|
7
|
+
body: string;
|
|
8
|
+
/** `X-AIDP-Signature` header value (`hmac-sha256={hex}`). */
|
|
9
|
+
signature: string;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Returns the canonical signature string for (secret, timestamp, body).
|
|
13
|
+
* Used by `verifyHmacSignature` and exposed for tests / golden fixtures.
|
|
14
|
+
*/
|
|
15
|
+
export declare function computeSignature(secret: string, timestamp: string, body: string): string;
|
|
16
|
+
/**
|
|
17
|
+
* Constant-time signature comparison. Returns false on any mismatch,
|
|
18
|
+
* empty secret, or unequal length.
|
|
19
|
+
*/
|
|
20
|
+
export declare function verifyHmacSignature(opts: VerifyOptions): boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Returns true when `timestamp` parses as ISO 8601 and falls within
|
|
23
|
+
* `windowMs` of the current wall clock. Default window 5 minutes
|
|
24
|
+
* matches the spec recommendation; SDK consumers can tighten it via
|
|
25
|
+
* the optional argument if their delivery latency is consistently
|
|
26
|
+
* sub-minute.
|
|
27
|
+
*/
|
|
28
|
+
export declare function isTimestampFresh(timestamp: string, windowMs?: number): boolean;
|
|
29
|
+
/**
|
|
30
|
+
* Strip the `urn:aidp:entity:` URN prefix to recover the bare slug.
|
|
31
|
+
* If the input doesn't match the URN form, returns it unchanged. The
|
|
32
|
+
* result is validated against the canonical slug rule and rejected
|
|
33
|
+
* with a descriptive error when malformed — that way a future server
|
|
34
|
+
* change to a different URN scheme (e.g. `urn:speakspec:entity:foo`)
|
|
35
|
+
* fails loudly here instead of writing junk into the cache namespace.
|
|
36
|
+
*/
|
|
37
|
+
export declare function urnToSlug(entityId: string): string;
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// HMAC verification for AIDP §8.10 cache-invalidation webhooks.
|
|
2
|
+
//
|
|
3
|
+
// Spec mandates HMAC-SHA256 over `${X-AIDP-Timestamp}\n${raw body}`,
|
|
4
|
+
// presented as the header `X-AIDP-Signature: hmac-sha256={hex}`. We
|
|
5
|
+
// recompute the signature against the SDK's configured webhookSecret
|
|
6
|
+
// and compare in constant time. Timestamp is also bounded to ±5 min
|
|
7
|
+
// (default) so a captured webhook cannot be replayed days later.
|
|
8
|
+
import { createHmac, timingSafeEqual } from 'node:crypto';
|
|
9
|
+
const SIGNATURE_PREFIX = 'hmac-sha256=';
|
|
10
|
+
/**
|
|
11
|
+
* Returns the canonical signature string for (secret, timestamp, body).
|
|
12
|
+
* Used by `verifyHmacSignature` and exposed for tests / golden fixtures.
|
|
13
|
+
*/
|
|
14
|
+
export function computeSignature(secret, timestamp, body) {
|
|
15
|
+
const mac = createHmac('sha256', secret);
|
|
16
|
+
mac.update(timestamp + '\n' + body);
|
|
17
|
+
return SIGNATURE_PREFIX + mac.digest('hex');
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Constant-time signature comparison. Returns false on any mismatch,
|
|
21
|
+
* empty secret, or unequal length.
|
|
22
|
+
*/
|
|
23
|
+
export function verifyHmacSignature(opts) {
|
|
24
|
+
if (!opts.secret || !opts.signature)
|
|
25
|
+
return false;
|
|
26
|
+
const expected = computeSignature(opts.secret, opts.timestamp, opts.body);
|
|
27
|
+
const a = Buffer.from(expected, 'utf8');
|
|
28
|
+
const b = Buffer.from(opts.signature, 'utf8');
|
|
29
|
+
if (a.length !== b.length)
|
|
30
|
+
return false;
|
|
31
|
+
return timingSafeEqual(a, b);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Returns true when `timestamp` parses as ISO 8601 and falls within
|
|
35
|
+
* `windowMs` of the current wall clock. Default window 5 minutes
|
|
36
|
+
* matches the spec recommendation; SDK consumers can tighten it via
|
|
37
|
+
* the optional argument if their delivery latency is consistently
|
|
38
|
+
* sub-minute.
|
|
39
|
+
*/
|
|
40
|
+
export function isTimestampFresh(timestamp, windowMs = 5 * 60 * 1000) {
|
|
41
|
+
const ts = Date.parse(timestamp);
|
|
42
|
+
if (Number.isNaN(ts))
|
|
43
|
+
return false;
|
|
44
|
+
return Math.abs(Date.now() - ts) <= windowMs;
|
|
45
|
+
}
|
|
46
|
+
// Slug rule mirrors aidp-server's slug_validator.go: lowercase
|
|
47
|
+
// alphanumerics and hyphens, no leading or trailing hyphen.
|
|
48
|
+
const SLUG_RE = /^[a-z0-9](?:[a-z0-9-]*[a-z0-9])?$/;
|
|
49
|
+
/**
|
|
50
|
+
* Strip the `urn:aidp:entity:` URN prefix to recover the bare slug.
|
|
51
|
+
* If the input doesn't match the URN form, returns it unchanged. The
|
|
52
|
+
* result is validated against the canonical slug rule and rejected
|
|
53
|
+
* with a descriptive error when malformed — that way a future server
|
|
54
|
+
* change to a different URN scheme (e.g. `urn:speakspec:entity:foo`)
|
|
55
|
+
* fails loudly here instead of writing junk into the cache namespace.
|
|
56
|
+
*/
|
|
57
|
+
export function urnToSlug(entityId) {
|
|
58
|
+
const slug = entityId.replace(/^urn:aidp:entity:/, '');
|
|
59
|
+
if (!SLUG_RE.test(slug)) {
|
|
60
|
+
throw new Error(`urnToSlug: "${entityId}" did not produce a valid AIDP slug (got "${slug}")`);
|
|
61
|
+
}
|
|
62
|
+
return slug;
|
|
63
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
export interface ImpressionRecord {
|
|
2
|
+
msg: string;
|
|
3
|
+
crawler: string;
|
|
4
|
+
crawler_source: string;
|
|
5
|
+
path: string;
|
|
6
|
+
user_agent: string;
|
|
7
|
+
ts: string;
|
|
8
|
+
entity_id?: string;
|
|
9
|
+
content_id?: string;
|
|
10
|
+
client_ip?: string;
|
|
11
|
+
}
|
|
12
|
+
export interface QueueConfig {
|
|
13
|
+
endpoint: string;
|
|
14
|
+
apiKey: string;
|
|
15
|
+
batchSize: number;
|
|
16
|
+
flushIntervalMs: number;
|
|
17
|
+
maxQueueBytes: number;
|
|
18
|
+
onError: 'fallback-stdout' | 'silent';
|
|
19
|
+
/** Override for tests; default uses global fetch. */
|
|
20
|
+
fetcher?: typeof fetch;
|
|
21
|
+
/** Override for tests; default uses console.log. */
|
|
22
|
+
logger?: (line: string) => void;
|
|
23
|
+
}
|
|
24
|
+
export declare function configureQueue(cfg: QueueConfig): void;
|
|
25
|
+
export declare function resetQueue(): void;
|
|
26
|
+
export declare function enqueueImpression(impression: ImpressionRecord): void;
|
|
27
|
+
export declare function flushQueue(): Promise<void>;
|
|
28
|
+
export declare function _peekQueue(): {
|
|
29
|
+
count: number;
|
|
30
|
+
bytes: number;
|
|
31
|
+
consecutiveFailures: number;
|
|
32
|
+
backoffUntil: number;
|
|
33
|
+
};
|