@ozzylabs/feedradar 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/README.ja.md +31 -6
  2. package/README.md +31 -6
  3. package/dist/agents/claude-code.d.ts +12 -1
  4. package/dist/agents/claude-code.d.ts.map +1 -1
  5. package/dist/agents/claude-code.js +9 -5
  6. package/dist/agents/claude-code.js.map +1 -1
  7. package/dist/agents/codex-cli.d.ts +7 -1
  8. package/dist/agents/codex-cli.d.ts.map +1 -1
  9. package/dist/agents/codex-cli.js +9 -5
  10. package/dist/agents/codex-cli.js.map +1 -1
  11. package/dist/agents/copilot.d.ts +7 -1
  12. package/dist/agents/copilot.d.ts.map +1 -1
  13. package/dist/agents/copilot.js +9 -5
  14. package/dist/agents/copilot.js.map +1 -1
  15. package/dist/agents/gemini-cli.d.ts +7 -1
  16. package/dist/agents/gemini-cli.d.ts.map +1 -1
  17. package/dist/agents/gemini-cli.js +9 -5
  18. package/dist/agents/gemini-cli.js.map +1 -1
  19. package/dist/agents/index.d.ts +1 -1
  20. package/dist/agents/index.d.ts.map +1 -1
  21. package/dist/agents/types.d.ts +33 -0
  22. package/dist/agents/types.d.ts.map +1 -1
  23. package/dist/cli/_progress.d.ts +138 -0
  24. package/dist/cli/_progress.d.ts.map +1 -0
  25. package/dist/cli/_progress.js +176 -0
  26. package/dist/cli/_progress.js.map +1 -0
  27. package/dist/cli/doctor.d.ts +20 -0
  28. package/dist/cli/doctor.d.ts.map +1 -1
  29. package/dist/cli/doctor.js +291 -2
  30. package/dist/cli/doctor.js.map +1 -1
  31. package/dist/cli/index.d.ts.map +1 -1
  32. package/dist/cli/index.js +2 -0
  33. package/dist/cli/index.js.map +1 -1
  34. package/dist/cli/research.d.ts +18 -20
  35. package/dist/cli/research.d.ts.map +1 -1
  36. package/dist/cli/research.js +318 -203
  37. package/dist/cli/research.js.map +1 -1
  38. package/dist/cli/respawn.d.ts +53 -0
  39. package/dist/cli/respawn.d.ts.map +1 -0
  40. package/dist/cli/respawn.js +120 -0
  41. package/dist/cli/respawn.js.map +1 -0
  42. package/dist/cli/review.d.ts +7 -0
  43. package/dist/cli/review.d.ts.map +1 -1
  44. package/dist/cli/review.js +46 -1
  45. package/dist/cli/review.js.map +1 -1
  46. package/dist/cli/source.d.ts +23 -2
  47. package/dist/cli/source.d.ts.map +1 -1
  48. package/dist/cli/source.js +425 -7
  49. package/dist/cli/source.js.map +1 -1
  50. package/dist/cli/update.d.ts +7 -0
  51. package/dist/cli/update.d.ts.map +1 -1
  52. package/dist/cli/update.js +41 -1
  53. package/dist/cli/update.js.map +1 -1
  54. package/dist/cli/watch.d.ts.map +1 -1
  55. package/dist/cli/watch.js +65 -3
  56. package/dist/cli/watch.js.map +1 -1
  57. package/dist/cli/workflow/generate-combined.d.ts +100 -0
  58. package/dist/cli/workflow/generate-combined.d.ts.map +1 -0
  59. package/dist/cli/workflow/generate-combined.js +387 -0
  60. package/dist/cli/workflow/generate-combined.js.map +1 -0
  61. package/dist/cli/workflow/generate-watch.d.ts +142 -0
  62. package/dist/cli/workflow/generate-watch.d.ts.map +1 -0
  63. package/dist/cli/workflow/generate-watch.js +338 -0
  64. package/dist/cli/workflow/generate-watch.js.map +1 -0
  65. package/dist/cli/workflow.d.ts +29 -0
  66. package/dist/cli/workflow.d.ts.map +1 -0
  67. package/dist/cli/workflow.js +66 -0
  68. package/dist/cli/workflow.js.map +1 -0
  69. package/dist/core/feeds/_fetch.d.ts +103 -0
  70. package/dist/core/feeds/_fetch.d.ts.map +1 -0
  71. package/dist/core/feeds/_fetch.js +364 -0
  72. package/dist/core/feeds/_fetch.js.map +1 -0
  73. package/dist/core/feeds/_jsonpath.d.ts +57 -0
  74. package/dist/core/feeds/_jsonpath.d.ts.map +1 -0
  75. package/dist/core/feeds/_jsonpath.js +207 -0
  76. package/dist/core/feeds/_jsonpath.js.map +1 -0
  77. package/dist/core/feeds/github-api.d.ts.map +1 -1
  78. package/dist/core/feeds/github-api.js +2 -1
  79. package/dist/core/feeds/github-api.js.map +1 -1
  80. package/dist/core/feeds/html-js.d.ts +29 -0
  81. package/dist/core/feeds/html-js.d.ts.map +1 -1
  82. package/dist/core/feeds/html-js.js +86 -2
  83. package/dist/core/feeds/html-js.js.map +1 -1
  84. package/dist/core/feeds/html.d.ts.map +1 -1
  85. package/dist/core/feeds/html.js +2 -1
  86. package/dist/core/feeds/html.js.map +1 -1
  87. package/dist/core/feeds/index.d.ts +1 -1
  88. package/dist/core/feeds/index.d.ts.map +1 -1
  89. package/dist/core/feeds/index.js +4 -0
  90. package/dist/core/feeds/index.js.map +1 -1
  91. package/dist/core/feeds/json-api.d.ts +3 -0
  92. package/dist/core/feeds/json-api.d.ts.map +1 -0
  93. package/dist/core/feeds/json-api.js +723 -0
  94. package/dist/core/feeds/json-api.js.map +1 -0
  95. package/dist/core/feeds/json-feed.d.ts +11 -0
  96. package/dist/core/feeds/json-feed.d.ts.map +1 -0
  97. package/dist/core/feeds/json-feed.js +242 -0
  98. package/dist/core/feeds/json-feed.js.map +1 -0
  99. package/dist/core/feeds/npm-registry.d.ts.map +1 -1
  100. package/dist/core/feeds/npm-registry.js +2 -1
  101. package/dist/core/feeds/npm-registry.js.map +1 -1
  102. package/dist/core/feeds/rss.d.ts.map +1 -1
  103. package/dist/core/feeds/rss.js +2 -1
  104. package/dist/core/feeds/rss.js.map +1 -1
  105. package/dist/core/feeds/types.d.ts +123 -0
  106. package/dist/core/feeds/types.d.ts.map +1 -1
  107. package/dist/core/progress.d.ts +101 -0
  108. package/dist/core/progress.d.ts.map +1 -0
  109. package/dist/core/progress.js +212 -0
  110. package/dist/core/progress.js.map +1 -0
  111. package/dist/core/proxy.d.ts +87 -0
  112. package/dist/core/proxy.d.ts.map +1 -0
  113. package/dist/core/proxy.js +146 -0
  114. package/dist/core/proxy.js.map +1 -0
  115. package/dist/core/recipes.d.ts +138 -0
  116. package/dist/core/recipes.d.ts.map +1 -0
  117. package/dist/core/recipes.js +238 -0
  118. package/dist/core/recipes.js.map +1 -0
  119. package/dist/core/watcher.d.ts +61 -1
  120. package/dist/core/watcher.d.ts.map +1 -1
  121. package/dist/core/watcher.js +99 -2
  122. package/dist/core/watcher.js.map +1 -1
  123. package/dist/index.js +17 -4
  124. package/dist/index.js.map +1 -1
  125. package/dist/recipes/aws-whats-new.yaml +61 -0
  126. package/dist/recipes/dev-to.yaml +40 -0
  127. package/dist/schemas/index.d.ts +1 -0
  128. package/dist/schemas/index.d.ts.map +1 -1
  129. package/dist/schemas/index.js +1 -0
  130. package/dist/schemas/index.js.map +1 -1
  131. package/dist/schemas/recipe.d.ts +115 -0
  132. package/dist/schemas/recipe.d.ts.map +1 -0
  133. package/dist/schemas/recipe.js +54 -0
  134. package/dist/schemas/recipe.js.map +1 -0
  135. package/dist/schemas/source.d.ts +130 -0
  136. package/dist/schemas/source.d.ts.map +1 -1
  137. package/dist/schemas/source.js +130 -0
  138. package/dist/schemas/source.js.map +1 -1
  139. package/dist/templates/agents/AGENTS.md +31 -3
  140. package/dist/templates/feedradar.md +23 -8
  141. package/dist/templates/workflows/combined.template.yaml.tmpl +110 -0
  142. package/dist/templates/workflows/watch.template.yaml.tmpl +103 -0
  143. package/dist/templates/workflows/watch.yaml +5 -1
  144. package/package.json +2 -3
@@ -0,0 +1,723 @@
1
+ import { createHash } from "node:crypto";
2
+ import { ItemSchema } from "../../schemas/index.js";
3
+ import { fetchWithRetry } from "./_fetch.js";
4
+ import { selectAll, selectOne } from "./_jsonpath.js";
5
+ import { deriveItemId, deriveStableKey } from "./derive-id.js";
6
+ const USER_AGENT = "feedradar/0.0.0 (+https://github.com/ozzy-labs/feedradar)";
7
+ /**
8
+ * Prefix marking a content-hash entry (vs a real ETag) inside `state.lastEtag`.
9
+ * Mirrors `_html-common.ts` so re-fetches without a server ETag still dedup.
10
+ */
11
+ const CONTENT_HASH_PREFIX = "sha256:";
12
+ /**
13
+ * Default selector chain consulted when `jsonSelectors.items` is omitted
14
+ * (ADR-0012 §D2). Resolved against the page-0 response body.
15
+ */
16
+ const DEFAULT_ITEMS_PATHS = [
17
+ "$.items[*]",
18
+ "$.data[*]",
19
+ "$.results[*]",
20
+ "$.posts[*]",
21
+ "$.entries[*]",
22
+ "$[*]",
23
+ ];
24
+ /**
25
+ * Per-field default selector chain consulted when the corresponding
26
+ * `jsonSelectors.<field>` is omitted (#174). For each item element we walk
27
+ * the chain in order and use the first path that yields a non-nullish value;
28
+ * this lets recipes for "simple" APIs (dev.to, generic JSON Feed clones)
29
+ * skip selectors entirely. Adoption is recorded once per fetch (first item)
30
+ * and surfaced via `FeedFetchDiag.selectorAdoption` so users can audit which
31
+ * candidate was picked.
32
+ */
33
+ const DEFAULT_FIELD_PATHS = {
34
+ title: ["$.title", "$.name", "$.headline"],
35
+ link: ["$.url", "$.link", "$.permalink", "$.html_url"],
36
+ publishedAt: ["$.publishedAt", "$.published_at", "$.date", "$.created_at", "$.pubDate"],
37
+ summary: ["$.summary", "$.description", "$.excerpt", "$.body"],
38
+ };
39
+ /**
40
+ * Maximum response body size per page. ADR-0012 §D5a hardcodes this so a
41
+ * malformed recipe cannot blow up memory / context window. The cap is
42
+ * intentionally not user-configurable.
43
+ */
44
+ const RESPONSE_SIZE_CAP_BYTES = 10 * 1024 * 1024; // 10 MB
45
+ /**
46
+ * `${VAR}` env interpolation (ADR-0012 §D5c).
47
+ *
48
+ * - Unresolved variables cause the header to be omitted entirely (degraded
49
+ * fetch), so public APIs work without env wiring while authenticated APIs
50
+ * fail-fast with a 401/403 at runtime.
51
+ * - The returned value MUST NEVER be logged. Callers route it directly into
52
+ * the `headers` map passed to fetch.
53
+ */
54
+ function interpolateHeaderValue(raw, env) {
55
+ // Optimization: most headers contain no `${...}` and pass straight through.
56
+ if (!raw.includes("${"))
57
+ return raw;
58
+ let resolved = "";
59
+ let i = 0;
60
+ while (i < raw.length) {
61
+ const dollar = raw.indexOf("${", i);
62
+ if (dollar === -1) {
63
+ resolved += raw.slice(i);
64
+ break;
65
+ }
66
+ resolved += raw.slice(i, dollar);
67
+ const close = raw.indexOf("}", dollar + 2);
68
+ if (close === -1) {
69
+ // Malformed: treat as literal so we don't accidentally leak `${` markers
70
+ // into outbound requests. Equivalent to "no interpolation needed".
71
+ resolved += raw.slice(dollar);
72
+ break;
73
+ }
74
+ const name = raw.slice(dollar + 2, close);
75
+ const value = env[name];
76
+ if (value === undefined || value.length === 0) {
77
+ // ADR-0012 §D5c: unresolved → drop the entire header.
78
+ return undefined;
79
+ }
80
+ resolved += value;
81
+ i = close + 1;
82
+ }
83
+ return resolved;
84
+ }
85
+ /**
86
+ * Build the outgoing `headers` map from the source recipe.
87
+ *
88
+ * Always includes a `user-agent` and `accept: application/json` so most APIs
89
+ * serve JSON without further config. Recipe-supplied headers take precedence
90
+ * over defaults (callers can override `accept` if a site insists on
91
+ * `application/vnd.api+json` etc.).
92
+ */
93
+ function buildHeaders(source, env) {
94
+ const headers = {
95
+ accept: "application/json, */*;q=0.5",
96
+ "user-agent": USER_AGENT,
97
+ };
98
+ const recipeHeaders = source.http?.headers ?? {};
99
+ for (const [key, raw] of Object.entries(recipeHeaders)) {
100
+ const resolved = interpolateHeaderValue(raw, env);
101
+ if (resolved !== undefined) {
102
+ headers[key.toLowerCase()] = resolved;
103
+ }
104
+ // else: drop unresolved-env header, per ADR-0012 §D5c degraded-fetch policy.
105
+ }
106
+ return headers;
107
+ }
108
+ /**
109
+ * Compute the next URL for `type: link-header` pagination by parsing
110
+ * `Link: <url>; rel="next", <...>; rel="prev"`. Returns `null` when no
111
+ * `rel="next"` is present (= end of pagination).
112
+ *
113
+ * NOTE on SSRF: a malicious or compromised upstream could emit a `Link`
114
+ * header pointing at `http://127.0.0.1:…` / cloud-metadata endpoints. The
115
+ * host-allowlist defense specified in ADR-0012 §D5b lives in the shared
116
+ * fetch wrapper (`src/core/feeds/_fetch.ts`), which sees every request URL
117
+ * regardless of the adapter that produced it; layering the check here would
118
+ * leave the same gap for `cursor` / `token` pagination and direct
119
+ * `source.url`. Tracking that wrapper-level enforcement as cross-cutting
120
+ * work outside this PR's scope.
121
+ */
122
+ function parseLinkHeader(value) {
123
+ if (!value)
124
+ return null;
125
+ // RFC 5988: each link is `<url>; param1=val1; param2=val2`, comma-separated.
126
+ // We do not need a full parser — just the first segment whose rel includes
127
+ // "next". Whitespace in URLs is invalid so we can safely match `<...>`.
128
+ const segments = value.split(",");
129
+ for (const segment of segments) {
130
+ const match = segment.match(/<([^>]+)>\s*;\s*rel\s*=\s*"?([^";]+)"?/i);
131
+ if (!match)
132
+ continue;
133
+ const [, url, rel] = match;
134
+ if (!url || !rel)
135
+ continue;
136
+ // Some servers emit `rel="next first"` — split on whitespace.
137
+ const rels = rel
138
+ .toLowerCase()
139
+ .split(/\s+/)
140
+ .map((s) => s.trim());
141
+ if (rels.includes("next"))
142
+ return url;
143
+ }
144
+ return null;
145
+ }
146
+ /**
147
+ * Apply a query parameter to `url`, replacing any existing one with the same
148
+ * name. Used to thread page / offset / token / pageSize into pagination URLs
149
+ * without re-parsing the recipe URL string each iteration.
150
+ */
151
+ function setQueryParam(url, name, value) {
152
+ const u = new URL(url);
153
+ u.searchParams.set(name, String(value));
154
+ return u.toString();
155
+ }
156
+ /**
157
+ * Resolve `selectors.items` against a page body, falling back to the default
158
+ * selector chain when the recipe omitted the field (ADR-0012 §D2 default
159
+ * chain). Returns the matched item list and the path that produced it (for
160
+ * debug surfaces like `source test`).
161
+ */
162
+ function resolveItemsList(selectors, body) {
163
+ if (selectors.items) {
164
+ return { matches: selectAll(selectors.items, body), path: selectors.items };
165
+ }
166
+ for (const candidate of DEFAULT_ITEMS_PATHS) {
167
+ const matches = selectAll(candidate, body);
168
+ if (matches.length > 0)
169
+ return { matches, path: candidate };
170
+ }
171
+ return { matches: [], path: DEFAULT_ITEMS_PATHS[0] };
172
+ }
173
+ /**
174
+ * Resolve a per-item field with optional default-chain fallback.
175
+ *
176
+ * `explicit` is the recipe-supplied path. When undefined, we walk
177
+ * `DEFAULT_FIELD_PATHS[field]` and return the first candidate that yields
178
+ * a non-nullish value, or `{ value: undefined, path: null }` when every
179
+ * candidate misses.
180
+ *
181
+ * Returning the matched path lets the adapter record adoption once (first
182
+ * item) and surface it via `diag.selectorAdoption` so `source test` can
183
+ * print "title ← $.headline を採用".
184
+ */
185
+ function resolveFieldWithFallback(field, explicit, element) {
186
+ if (explicit) {
187
+ return { value: selectOne(explicit, element), path: explicit };
188
+ }
189
+ for (const candidate of DEFAULT_FIELD_PATHS[field]) {
190
+ const value = selectOne(candidate, element);
191
+ if (value !== undefined && value !== null) {
192
+ return { value, path: candidate };
193
+ }
194
+ }
195
+ return { value: undefined, path: null };
196
+ }
197
+ /**
198
+ * Resolve a `link` value against a base URL (#204).
199
+ *
200
+ * Many JSON APIs (notably AWS What's New) return the per-item link as a
201
+ * relative path like `/about-aws/whats-new/.../` rather than a fully
202
+ * qualified URL. Without resolution `ItemSchema`'s `z.string().url()`
203
+ * silently drops every item.
204
+ *
205
+ * The base is `selectors.linkBase` when set, otherwise `source.url` (which
206
+ * mirrors the html adapter's `new URL(href, source.url)` behavior). Absolute
207
+ * URLs pass through unchanged because `new URL("https://x/y", base)` ignores
208
+ * the base.
209
+ *
210
+ * We swallow `URL` constructor errors so a malformed `link` surfaces as a
211
+ * normal `ItemSchema` validation drop later (preserving the existing "one
212
+ * broken record does not abort the whole page" semantics).
213
+ */
214
+ function resolveLinkUrl(raw, base) {
215
+ try {
216
+ return new URL(raw, base).toString();
217
+ }
218
+ catch {
219
+ return raw;
220
+ }
221
+ }
222
+ /** Coerce a JSON value to a trimmed non-empty string, or `undefined`. */
223
+ function coerceString(value) {
224
+ if (value == null)
225
+ return undefined;
226
+ if (typeof value === "string") {
227
+ const trimmed = value.trim();
228
+ return trimmed.length === 0 ? undefined : trimmed;
229
+ }
230
+ if (typeof value === "number" || typeof value === "boolean")
231
+ return String(value);
232
+ return undefined;
233
+ }
234
+ /** Coerce a JSON value to ISO 8601, returning `undefined` for invalid input. */
235
+ function coerceIsoDate(value) {
236
+ const raw = coerceString(value);
237
+ if (!raw)
238
+ return undefined;
239
+ const date = new Date(raw);
240
+ if (Number.isNaN(date.getTime()))
241
+ return undefined;
242
+ return date.toISOString();
243
+ }
244
+ /**
245
+ * Normalize one element matched by `selectors.items` into our canonical
246
+ * `Item` shape. Returns `null` when the candidate fails schema validation
247
+ * (e.g. missing url) so one broken record does not abort the whole page.
248
+ *
249
+ * `Item.id` derivation follows ADR-0002:
250
+ *
251
+ * 1. `selectors.publisherId` (explicit, most stable)
252
+ * 2. `selectors.link` URL (canonical identifier)
253
+ * 3. `sha1:` hash of title + publishedAt (fallback)
254
+ *
255
+ * `adoption` is mutated in place: for each defaultable field, the first call
256
+ * records the JSONPath candidate that produced a usable value (or `null` if
257
+ * every candidate missed). Subsequent calls leave it alone so adoption
258
+ * reflects the very first item — that is what `source test` reports.
259
+ */
260
+ function elementToItem(element, source, selectors, fetchedAt, adoption) {
261
+ const titleResolved = resolveFieldWithFallback("title", selectors.title, element);
262
+ if (adoption.title === undefined) {
263
+ adoption.title = titleResolved.path;
264
+ }
265
+ const title = coerceString(titleResolved.value) ?? "";
266
+ const linkResolved = resolveFieldWithFallback("link", selectors.link, element);
267
+ if (adoption.link === undefined) {
268
+ adoption.link = linkResolved.path;
269
+ }
270
+ const rawLink = coerceString(linkResolved.value);
271
+ if (!rawLink)
272
+ return null;
273
+ // Resolve relative paths against `linkBase` (or `source.url` as fallback)
274
+ // so APIs that return `/about-aws/whats-new/.../` instead of an absolute
275
+ // URL still produce valid `Item.url` values (#204). Absolute URLs pass
276
+ // through `new URL()` unchanged.
277
+ const url = resolveLinkUrl(rawLink, selectors.linkBase ?? source.url);
278
+ const publisherId = selectors.publisherId
279
+ ? coerceString(selectOne(selectors.publisherId, element))
280
+ : undefined;
281
+ const publishedAtResolved = resolveFieldWithFallback("publishedAt", selectors.publishedAt, element);
282
+ if (adoption.publishedAt === undefined) {
283
+ adoption.publishedAt = publishedAtResolved.path;
284
+ }
285
+ const publishedAt = coerceIsoDate(publishedAtResolved.value);
286
+ const summaryResolved = resolveFieldWithFallback("summary", selectors.summary, element);
287
+ if (adoption.summary === undefined) {
288
+ adoption.summary = summaryResolved.path;
289
+ }
290
+ const summary = coerceString(summaryResolved.value);
291
+ const body = selectors.body ? coerceString(selectOne(selectors.body, element)) : undefined;
292
+ // `selectors.tags` is recognized by the schema but currently silently passed
293
+ // through into `raw` only. The filter pipeline (`buildHaystack`) does not
294
+ // structurally read `Item.tags` for any adapter, so surfacing tags
295
+ // structurally here would not improve filtering. Keep them inside `raw`
296
+ // (already attached below) until a future filter extension consumes them.
297
+ const stableKey = deriveStableKey({
298
+ publisherId,
299
+ url,
300
+ fallbackHashInputs: [title, publishedAt],
301
+ });
302
+ const id = deriveItemId(title, stableKey);
303
+ const candidate = {
304
+ id,
305
+ sourceId: source.id,
306
+ title,
307
+ url,
308
+ fetchedAt,
309
+ raw: element,
310
+ };
311
+ if (publishedAt)
312
+ candidate.publishedAt = publishedAt;
313
+ if (summary)
314
+ candidate.summary = summary;
315
+ // Body is preserved inside `raw`; we surface it through summary when the
316
+ // recipe explicitly mapped a body selector and no summary selector. This
317
+ // keeps the Item schema lean while still letting recipes pull in a long
318
+ // description.
319
+ if (!summary && body)
320
+ candidate.summary = body;
321
+ const result = ItemSchema.safeParse(candidate);
322
+ return result.success ? result.data : null;
323
+ }
324
+ /**
325
+ * One iteration of pagination: issue a GET, decode the JSON, return the body
326
+ * + the URL of the next page (or `null` when traversal is done).
327
+ *
328
+ * Errors are thrown to the caller; the adapter wraps them with source-id
329
+ * context before propagating to the watcher.
330
+ */
331
+ async function fetchPage(url, fetchImpl, headers, pagination, pageIndex, state) {
332
+ // Forward conditional-GET headers only on page 0 — pagination URLs are
333
+ // ephemeral and most servers will not 304 them. ETag-aware short-circuit
334
+ // is mainly useful for the "no items have changed since last run" case.
335
+ // We also skip conditional GET in backfill mode (caller sets
336
+ // `sendConditional: false`) so a stale ETag from a previous normal-mode
337
+ // run does not 304-out the requested full-history traversal.
338
+ const requestHeaders = { ...headers };
339
+ if (pageIndex === 0 &&
340
+ state.sendConditional !== false &&
341
+ state.etag &&
342
+ !state.etag.startsWith(CONTENT_HASH_PREFIX) &&
343
+ !("if-none-match" in requestHeaders)) {
344
+ requestHeaders["if-none-match"] = state.etag;
345
+ }
346
+ const response = await fetchWithRetry(fetchImpl, url, { headers: requestHeaders });
347
+ const etag = response.headers.get("etag");
348
+ const linkNext = pagination.type === "link-header" ? parseLinkHeader(response.headers.get("link")) : null;
349
+ if (response.status === 304) {
350
+ return { body: null, bodyText: "", status: 304, etag, linkNext };
351
+ }
352
+ if (response.status < 200 || response.status >= 300) {
353
+ throw new Error(`json-api adapter: HTTP ${response.status} from ${url}`);
354
+ }
355
+ const bodyText = await response.text();
356
+ if (bodyText.length > RESPONSE_SIZE_CAP_BYTES) {
357
+ throw new Error(`json-api adapter: response too large (${bodyText.length} bytes > ${RESPONSE_SIZE_CAP_BYTES} cap) from ${url}`);
358
+ }
359
+ let parsed;
360
+ try {
361
+ parsed = JSON.parse(bodyText);
362
+ }
363
+ catch (e) {
364
+ throw new Error(`json-api adapter: failed to parse JSON from ${url}: ${e instanceof Error ? e.message : String(e)}`);
365
+ }
366
+ return { body: parsed, bodyText, status: response.status, etag, linkNext };
367
+ }
368
+ /**
369
+ * Compute the next page URL based on the pagination strategy + the current
370
+ * page's body. Returns `null` when traversal should stop (no more pages).
371
+ *
372
+ * `link-header` is handled by the caller (it depends on the response headers,
373
+ * which `fetchPage` reads); we return `null` here so the loop terminates if
374
+ * the recipe says `link-header` but no `Link` header was returned.
375
+ */
376
+ function computeNextUrl(source, pagination, currentUrl, currentBody, currentItemsLength, pageCountSoFar) {
377
+ switch (pagination.type) {
378
+ case "none":
379
+ return null;
380
+ case "link-header":
381
+ // The Link header is read in fetchPage; this branch should never be
382
+ // consulted to compute the next URL directly. Returning null is a safe
383
+ // fallback for buggy recipes that mix `link-header` with explicit
384
+ // `param`.
385
+ return null;
386
+ case "page": {
387
+ if (currentItemsLength === 0)
388
+ return null;
389
+ const param = pagination.param ?? "page";
390
+ const start = pagination.start ?? 0;
391
+ const nextPage = start + pageCountSoFar;
392
+ let url = setQueryParam(currentUrl, param, nextPage);
393
+ if (pagination.pageSize !== undefined) {
394
+ const sizeParam = pagination.pageSizeParam ?? "pageSize";
395
+ url = setQueryParam(url, sizeParam, pagination.pageSize);
396
+ }
397
+ return url;
398
+ }
399
+ case "offset": {
400
+ if (currentItemsLength === 0)
401
+ return null;
402
+ const param = pagination.param ?? "offset";
403
+ const start = pagination.start ?? 0;
404
+ const limit = pagination.pageSize ?? currentItemsLength;
405
+ const nextOffset = start + pageCountSoFar * limit;
406
+ let url = setQueryParam(currentUrl, param, nextOffset);
407
+ if (pagination.pageSize !== undefined) {
408
+ const sizeParam = pagination.pageSizeParam ?? "limit";
409
+ url = setQueryParam(url, sizeParam, limit);
410
+ }
411
+ return url;
412
+ }
413
+ case "cursor":
414
+ case "token": {
415
+ if (!pagination.nextCursorPath)
416
+ return null;
417
+ const cursor = coerceString(selectOne(pagination.nextCursorPath, currentBody));
418
+ if (!cursor)
419
+ return null;
420
+ const param = pagination.param ?? (pagination.type === "cursor" ? "after" : "pageToken");
421
+ return setQueryParam(source.url, param, cursor);
422
+ }
423
+ }
424
+ }
425
+ /**
426
+ * Build the initial (page 0) URL by stamping in `start` / `pageSize` from the
427
+ * recipe. For `cursor` / `token` paginations the start cursor is implicit —
428
+ * the recipe URL should already contain whatever initial cursor / token the
429
+ * site expects (typically none).
430
+ */
431
+ function initialUrl(source, pagination) {
432
+ switch (pagination.type) {
433
+ case "none":
434
+ case "link-header":
435
+ case "cursor":
436
+ case "token":
437
+ // For these types page 0 is just the recipe URL as written.
438
+ return source.url;
439
+ case "page": {
440
+ const param = pagination.param ?? "page";
441
+ const start = pagination.start ?? 0;
442
+ let url = setQueryParam(source.url, param, start);
443
+ if (pagination.pageSize !== undefined) {
444
+ const sizeParam = pagination.pageSizeParam ?? "pageSize";
445
+ url = setQueryParam(url, sizeParam, pagination.pageSize);
446
+ }
447
+ return url;
448
+ }
449
+ case "offset": {
450
+ const param = pagination.param ?? "offset";
451
+ const start = pagination.start ?? 0;
452
+ let url = setQueryParam(source.url, param, start);
453
+ if (pagination.pageSize !== undefined) {
454
+ const sizeParam = pagination.pageSizeParam ?? "limit";
455
+ url = setQueryParam(url, sizeParam, pagination.pageSize);
456
+ }
457
+ return url;
458
+ }
459
+ }
460
+ }
461
+ /**
462
+ * Effective page cap. Normal mode honors the recipe (`pagination.maxPages`).
463
+ * The natural stop conditions inside the loop (lastSeenIds hit, items.length
464
+ * less than pageSize, empty page) terminate normal-mode traversal earlier
465
+ * than the cap for periodic ingest. Backfill mode honors the recipe cap up
466
+ * to the `--max-pages` override.
467
+ */
468
+ function effectiveMaxPages(pagination, backfill, override) {
469
+ const recipeCap = pagination.maxPages;
470
+ if (!backfill) {
471
+ return recipeCap;
472
+ }
473
+ if (override !== undefined)
474
+ return Math.min(recipeCap, override);
475
+ return recipeCap;
476
+ }
477
+ export const jsonApiAdapter = {
478
+ kind: "json-api",
479
+ fetch: async (source, options = {}) => {
480
+ if (!source.pagination) {
481
+ throw new Error(`json-api adapter: source '${source.id}' has no pagination config`);
482
+ }
483
+ const fetchImpl = options.fetch ?? globalThis.fetch;
484
+ if (typeof fetchImpl !== "function") {
485
+ throw new Error("json-api adapter: no fetch implementation available (Node 22+ required)");
486
+ }
487
+ const pagination = source.pagination;
488
+ // `jsonSelectors` is optional in the schema (#174). When omitted, every
489
+ // field falls back to its default chain so trivial APIs (dev.to,
490
+ // generic JSON Feed clones) work without a selector block at all.
491
+ const selectors = source.jsonSelectors ?? {};
492
+ const env = options.env ?? process.env;
493
+ const headers = buildHeaders(source, env);
494
+ const previous = options.state;
495
+ const previousSeen = new Set(previous?.lastSeenIds ?? []);
496
+ const fetchedAt = new Date().toISOString();
497
+ const backfill = options.backfill === true;
498
+ const dryRun = options.dryRun === true;
499
+ const warn = options.warn ?? (() => { });
500
+ const onPage = options.onPage;
501
+ const maxPages = effectiveMaxPages(pagination, backfill, options.maxPagesOverride);
502
+ let currentUrl = initialUrl(source, pagination);
503
+ let pageIndex = 0;
504
+ const items = [];
505
+ let lastEtag = null;
506
+ let firstBodyText = null;
507
+ let firstBody = null;
508
+ let notModified = false;
509
+ // `undefined` means "not seen yet"; once we normalize the first item we
510
+ // overwrite each entry with either the matched path (string) or `null`
511
+ // (no candidate yielded a value). The diag payload reports the final
512
+ // state at end-of-fetch.
513
+ const adoption = {
514
+ title: undefined,
515
+ link: undefined,
516
+ publishedAt: undefined,
517
+ summary: undefined,
518
+ };
519
+ let itemsPath = null;
520
+ let paginationPreview;
521
+ // Effective cap may tighten mid-traversal when `totalPath` resolves to a
522
+ // value smaller than the recipe's `maxPages` (backfill early stop).
523
+ let effectiveCap = maxPages;
524
+ // Dry-run mode short-circuits after page 0: we record the diag preview
525
+ // (next URL / Link header / nextCursor) but never fetch page 1.
526
+ if (dryRun)
527
+ effectiveCap = Math.min(effectiveCap, 1);
528
+ while (pageIndex < effectiveCap) {
529
+ const response = await fetchPage(currentUrl, fetchImpl, headers, pagination, pageIndex, {
530
+ etag: previous?.lastEtag,
531
+ // Skip conditional GET in backfill mode so a stale ETag from a
532
+ // previous normal-mode run does not 304-out a requested full-history
533
+ // traversal.
534
+ sendConditional: !backfill,
535
+ });
536
+ if (pageIndex === 0) {
537
+ firstBody = response.body;
538
+ firstBodyText = response.bodyText;
539
+ lastEtag = response.etag;
540
+ if (response.status === 304) {
541
+ notModified = true;
542
+ break;
543
+ }
544
+ }
545
+ if (response.status === 304) {
546
+ // 304 on a later page is unusual but treat as end-of-pagination.
547
+ break;
548
+ }
549
+ const itemsResult = resolveItemsList(selectors, response.body);
550
+ if (pageIndex === 0)
551
+ itemsPath = itemsResult.path;
552
+ const matches = itemsResult.matches;
553
+ const pageItems = matches
554
+ .map((m) => elementToItem(m, source, selectors, fetchedAt, adoption))
555
+ .filter((i) => i !== null);
556
+ // Surface a pagination preview for `source test` on page 0 only. We
557
+ // compute the *would-be* next URL / cursor / Link header but never
558
+ // actually fetch it in dry-run mode (#174 state-clean invariant).
559
+ if (pageIndex === 0) {
560
+ const linkHeaderNext = pagination.type === "link-header" ? response.linkNext : undefined;
561
+ let nextCursor;
562
+ if ((pagination.type === "cursor" || pagination.type === "token") &&
563
+ pagination.nextCursorPath) {
564
+ nextCursor = coerceString(selectOne(pagination.nextCursorPath, response.body)) ?? null;
565
+ }
566
+ let previewNextUrl;
567
+ if (pagination.type === "link-header") {
568
+ previewNextUrl = response.linkNext;
569
+ }
570
+ else {
571
+ previewNextUrl = computeNextUrl(source, pagination, currentUrl, response.body, pageItems.length, 1);
572
+ }
573
+ paginationPreview = {
574
+ strategy: pagination.type,
575
+ nextUrl: previewNextUrl,
576
+ ...(linkHeaderNext !== undefined ? { linkHeaderNext } : {}),
577
+ ...(nextCursor !== undefined ? { nextCursor } : {}),
578
+ };
579
+ }
580
+ // Normal-mode early stop: if this page contains an id we have already
581
+ // seen, the older pages will all be older still — stop paginating.
582
+ let hitSeen = false;
583
+ if (!backfill && previousSeen.size > 0) {
584
+ for (const item of pageItems) {
585
+ if (previousSeen.has(item.id)) {
586
+ hitSeen = true;
587
+ break;
588
+ }
589
+ }
590
+ }
591
+ items.push(...pageItems);
592
+ // Backfill-mode early stop via `totalPath`: if the recipe declared a
593
+ // total-count selector, narrow the page budget so we exit after the
594
+ // implied last page rather than walking the full `maxPages` cap. We
595
+ // only consult `totalPath` on page 0 because the value is unlikely to
596
+ // change mid-traversal and re-evaluating per page would cost an extra
597
+ // JSONPath walk for negligible benefit.
598
+ //
599
+ // Applied BEFORE the `onPage` callback below so the user-visible
600
+ // `Page N/M` denominator already reflects the narrowed cap on the
601
+ // very first page event (otherwise the spinner ratio would jump
602
+ // from `1/20` to `1/2` between page 0 and page 1, which reads as a
603
+ // bug).
604
+ if (backfill && pagination.totalPath && pageIndex === 0) {
605
+ const totalRaw = selectOne(pagination.totalPath, response.body);
606
+ const total = typeof totalRaw === "number" ? totalRaw : Number(coerceString(totalRaw));
607
+ if (Number.isFinite(total) && total > 0 && pagination.pageSize) {
608
+ const computedMax = Math.max(1, Math.ceil(total / pagination.pageSize));
609
+ if (computedMax < effectiveCap) {
610
+ effectiveCap = computedMax;
611
+ }
612
+ }
613
+ }
614
+ // Surface per-page progress to the CLI spinner / non-TTY log (#198).
615
+ // The callback is invoked before any early-exit checks below so the
616
+ // user always sees a final `Page N/N` event for the page that decided
617
+ // termination. `effectiveCap` is the denominator the loop will respect
618
+ // (recipe `maxPages`, narrowed by `totalPath` on page 0 in backfill
619
+ // mode above), so the user-visible ratio shrinks as the budget tightens.
620
+ if (onPage) {
621
+ onPage({
622
+ pageIndex,
623
+ pageTotal: effectiveCap,
624
+ items: pageItems.length,
625
+ });
626
+ }
627
+ // Stop when the page yielded zero items — protects against runaway
628
+ // pagination on broken recipes / empty trailing pages.
629
+ if (matches.length === 0)
630
+ break;
631
+ if (hitSeen)
632
+ break;
633
+ // End-of-pagination heuristic: when the recipe declared a `pageSize`
634
+ // and this page returned fewer matches than that, treat it as the last
635
+ // page. Saves one extra round-trip per source on the common "trailing
636
+ // partial page" case (page 0 of size N, …, page K returns K' < N).
637
+ // Skipped for `cursor` / `token` pagination where `nextCursor` is the
638
+ // authoritative signal — those types may legitimately return fewer
639
+ // items per page than the requested size.
640
+ if (pagination.pageSize !== undefined &&
641
+ (pagination.type === "page" || pagination.type === "offset") &&
642
+ matches.length < pagination.pageSize) {
643
+ break;
644
+ }
645
+ // Compute next URL.
646
+ let nextUrl;
647
+ if (pagination.type === "link-header") {
648
+ nextUrl = response.linkNext;
649
+ }
650
+ else {
651
+ nextUrl = computeNextUrl(source, pagination, currentUrl, response.body, pageItems.length, pageIndex + 1);
652
+ }
653
+ if (!nextUrl)
654
+ break;
655
+ currentUrl = nextUrl;
656
+ pageIndex++;
657
+ }
658
+ // Warn for default-chain fields where every candidate returned null —
659
+ // recipe authors typically want to know the API has a non-standard
660
+ // shape (e.g. `additionalFields.headline` instead of `$.title`). We
661
+ // skip the warning when the recipe explicitly declared the selector
662
+ // (the absence is then on the user, not the default chain).
663
+ for (const field of Object.keys(adoption)) {
664
+ const explicit = selectors[field];
665
+ if (!explicit && adoption[field] === null) {
666
+ warn(`json-api adapter: source '${source.id}' — default selector chain for '${field}' produced no value; consider setting jsonSelectors.${field} explicitly`);
667
+ }
668
+ }
669
+ // Build state. Prefer the server-supplied ETag; otherwise hash the page-0
670
+ // body so re-runs without a server ETag still dedup correctly (mirrors the
671
+ // html adapter's content-hash fallback).
672
+ let nextEtag = previous?.lastEtag;
673
+ if (lastEtag) {
674
+ nextEtag = lastEtag;
675
+ }
676
+ else if (firstBodyText && firstBodyText.length > 0) {
677
+ nextEtag = `${CONTENT_HASH_PREFIX}${createHash("sha256")
678
+ .update(firstBodyText)
679
+ .digest("hex")}`;
680
+ }
681
+ // Avoid unused-variable warnings while keeping `firstBody` available for
682
+ // future debug surfaces (`source test` may want to print the first page
683
+ // body when no items matched).
684
+ void firstBody;
685
+ // Compose diag payload for `source test --show-content`. The selector
686
+ // adoption map reports the JSONPath candidate that won the fallback
687
+ // chain per field (or the recipe-supplied path verbatim, or `null` when
688
+ // every candidate missed). Pagination preview surfaces the next-URL /
689
+ // Link / cursor extraction so users can spot misconfigurations without
690
+ // letting the dry-run actually walk page 1.
691
+ const selectorAdoption = {
692
+ items: itemsPath ?? null,
693
+ title: adoption.title ?? null,
694
+ link: adoption.link ?? null,
695
+ publishedAt: adoption.publishedAt ?? null,
696
+ summary: adoption.summary ?? null,
697
+ };
698
+ const diag = {
699
+ selectorAdoption,
700
+ ...(paginationPreview ? { paginationPreview } : {}),
701
+ };
702
+ if (notModified) {
703
+ return {
704
+ items: [],
705
+ notModified: true,
706
+ state: {
707
+ lastFetchedAt: fetchedAt,
708
+ lastEtag: nextEtag,
709
+ },
710
+ diag,
711
+ };
712
+ }
713
+ return {
714
+ items,
715
+ state: {
716
+ lastFetchedAt: fetchedAt,
717
+ lastEtag: nextEtag,
718
+ },
719
+ diag,
720
+ };
721
+ },
722
+ };
723
+ //# sourceMappingURL=json-api.js.map