@ozzylabs/feedradar 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli/dismiss.d.ts +2 -1
- package/dist/cli/dismiss.d.ts.map +1 -1
- package/dist/cli/dismiss.js +4 -1
- package/dist/cli/dismiss.js.map +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +7 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/items.d.ts +44 -0
- package/dist/cli/items.d.ts.map +1 -0
- package/dist/cli/items.js +288 -0
- package/dist/cli/items.js.map +1 -0
- package/dist/cli/research.d.ts +21 -0
- package/dist/cli/research.d.ts.map +1 -1
- package/dist/cli/research.js +54 -10
- package/dist/cli/research.js.map +1 -1
- package/dist/cli/review.d.ts +23 -0
- package/dist/cli/review.d.ts.map +1 -1
- package/dist/cli/review.js +293 -2
- package/dist/cli/review.js.map +1 -1
- package/dist/cli/source.d.ts.map +1 -1
- package/dist/cli/source.js +3 -0
- package/dist/cli/source.js.map +1 -1
- package/dist/cli/triage.d.ts +136 -0
- package/dist/cli/triage.d.ts.map +1 -0
- package/dist/cli/triage.js +1110 -0
- package/dist/cli/triage.js.map +1 -0
- package/dist/cli/undismiss.d.ts +30 -0
- package/dist/cli/undismiss.d.ts.map +1 -0
- package/dist/cli/undismiss.js +133 -0
- package/dist/cli/undismiss.js.map +1 -0
- package/dist/cli/watch.d.ts.map +1 -1
- package/dist/cli/watch.js +2 -0
- package/dist/cli/watch.js.map +1 -1
- package/dist/cli/workflow/generate-combined-with-triage.d.ts +115 -0
- package/dist/cli/workflow/generate-combined-with-triage.d.ts.map +1 -0
- package/dist/cli/workflow/generate-combined-with-triage.js +446 -0
- package/dist/cli/workflow/generate-combined-with-triage.js.map +1 -0
- package/dist/cli/workflow.d.ts +6 -5
- package/dist/cli/workflow.d.ts.map +1 -1
- package/dist/cli/workflow.js +13 -8
- package/dist/cli/workflow.js.map +1 -1
- package/dist/core/feeds/json-api.d.ts +26 -0
- package/dist/core/feeds/json-api.d.ts.map +1 -1
- package/dist/core/feeds/json-api.js +360 -223
- package/dist/core/feeds/json-api.js.map +1 -1
- package/dist/core/recipes.d.ts.map +1 -1
- package/dist/core/recipes.js +10 -0
- package/dist/core/recipes.js.map +1 -1
- package/dist/core/transitions.d.ts +30 -0
- package/dist/core/transitions.d.ts.map +1 -0
- package/dist/core/transitions.js +103 -0
- package/dist/core/transitions.js.map +1 -0
- package/dist/core/triage/adapter.d.ts +80 -0
- package/dist/core/triage/adapter.d.ts.map +1 -0
- package/dist/core/triage/adapter.js +128 -0
- package/dist/core/triage/adapter.js.map +1 -0
- package/dist/core/triage/index.d.ts +105 -0
- package/dist/core/triage/index.d.ts.map +1 -0
- package/dist/core/triage/index.js +246 -0
- package/dist/core/triage/index.js.map +1 -0
- package/dist/core/triage/prompt.d.ts +30 -0
- package/dist/core/triage/prompt.d.ts.map +1 -0
- package/dist/core/triage/prompt.js +157 -0
- package/dist/core/triage/prompt.js.map +1 -0
- package/dist/core/triage/response.d.ts +114 -0
- package/dist/core/triage/response.d.ts.map +1 -0
- package/dist/core/triage/response.js +188 -0
- package/dist/core/triage/response.js.map +1 -0
- package/dist/recipes/aws-whats-new.yaml +62 -7
- package/dist/recipes/dev-to.yaml +24 -0
- package/dist/schemas/item.d.ts +151 -5
- package/dist/schemas/item.d.ts.map +1 -1
- package/dist/schemas/item.js +164 -4
- package/dist/schemas/item.js.map +1 -1
- package/dist/schemas/recipe.d.ts +22 -0
- package/dist/schemas/recipe.d.ts.map +1 -1
- package/dist/schemas/recipe.js +13 -1
- package/dist/schemas/recipe.js.map +1 -1
- package/dist/schemas/source.d.ts +135 -0
- package/dist/schemas/source.d.ts.map +1 -1
- package/dist/schemas/source.js +138 -0
- package/dist/schemas/source.js.map +1 -1
- package/dist/templates/agents/AGENTS.md +36 -4
- package/dist/templates/workflows/combined-with-triage.template.yaml.tmpl +133 -0
- package/package.json +1 -1
|
@@ -474,250 +474,387 @@ function effectiveMaxPages(pagination, backfill, override) {
|
|
|
474
474
|
return Math.min(recipeCap, override);
|
|
475
475
|
return recipeCap;
|
|
476
476
|
}
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
const
|
|
498
|
-
const
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
477
|
+
/**
|
|
478
|
+
* Apply a single facet value to the source URL by injecting the templated
|
|
479
|
+
* query parameter. Replaces any existing value of `facet.param` so a recipe
|
|
480
|
+
* URL with a placeholder/default does not double-up at fetch time.
|
|
481
|
+
*/
|
|
482
|
+
function applyFacetValue(rawUrl, facet, value) {
|
|
483
|
+
const u = new URL(rawUrl);
|
|
484
|
+
const substituted = facet.template.replace("{}", String(value));
|
|
485
|
+
u.searchParams.set(facet.param, substituted);
|
|
486
|
+
return u.toString();
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Enumerate the facet values for a single facet spec.
|
|
490
|
+
*
|
|
491
|
+
* - `range`: `[start, end]` inclusive, walked with `step` (default 1).
|
|
492
|
+
* Schema guarantees `step > 0` and `start <= end` so the loop terminates.
|
|
493
|
+
* - `enum`: returns the explicit list verbatim (string or number).
|
|
494
|
+
*/
|
|
495
|
+
function* generateFacetValues(facet) {
|
|
496
|
+
if (facet.type === "range") {
|
|
497
|
+
const [start, end] = facet.range;
|
|
498
|
+
const step = facet.step;
|
|
499
|
+
for (let v = start; v <= end; v += step)
|
|
500
|
+
yield v;
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
for (const v of facet.values)
|
|
504
|
+
yield v;
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Inner fetch — the original single-axis (pagination-only) traversal. The
|
|
508
|
+
* public adapter delegates here either directly (no facets) or once per
|
|
509
|
+
* facet value (facet sweep mode).
|
|
510
|
+
*
|
|
511
|
+
* `dryRun` is preserved (single-page fetch behaviour) but the public
|
|
512
|
+
* adapter narrows it further in facet sweep mode to "first facet value
|
|
513
|
+
* only" so `source test` does not walk every year.
|
|
514
|
+
*/
|
|
515
|
+
async function fetchSingle(source, options) {
|
|
516
|
+
if (!source.pagination) {
|
|
517
|
+
throw new Error(`json-api adapter: source '${source.id}' has no pagination config`);
|
|
518
|
+
}
|
|
519
|
+
const fetchImpl = options.fetch ?? globalThis.fetch;
|
|
520
|
+
if (typeof fetchImpl !== "function") {
|
|
521
|
+
throw new Error("json-api adapter: no fetch implementation available (Node 22+ required)");
|
|
522
|
+
}
|
|
523
|
+
const pagination = source.pagination;
|
|
524
|
+
// `jsonSelectors` is optional in the schema (#174). When omitted, every
|
|
525
|
+
// field falls back to its default chain so trivial APIs (dev.to,
|
|
526
|
+
// generic JSON Feed clones) work without a selector block at all.
|
|
527
|
+
const selectors = source.jsonSelectors ?? {};
|
|
528
|
+
const env = options.env ?? process.env;
|
|
529
|
+
const headers = buildHeaders(source, env);
|
|
530
|
+
const previous = options.state;
|
|
531
|
+
const previousSeen = new Set(previous?.lastSeenIds ?? []);
|
|
532
|
+
const fetchedAt = new Date().toISOString();
|
|
533
|
+
const backfill = options.backfill === true;
|
|
534
|
+
const dryRun = options.dryRun === true;
|
|
535
|
+
const warn = options.warn ?? (() => { });
|
|
536
|
+
const onPage = options.onPage;
|
|
537
|
+
const maxPages = effectiveMaxPages(pagination, backfill, options.maxPagesOverride);
|
|
538
|
+
let currentUrl = initialUrl(source, pagination);
|
|
539
|
+
let pageIndex = 0;
|
|
540
|
+
const items = [];
|
|
541
|
+
let lastEtag = null;
|
|
542
|
+
let firstBodyText = null;
|
|
543
|
+
let firstBody = null;
|
|
544
|
+
let notModified = false;
|
|
545
|
+
// `undefined` means "not seen yet"; once we normalize the first item we
|
|
546
|
+
// overwrite each entry with either the matched path (string) or `null`
|
|
547
|
+
// (no candidate yielded a value). The diag payload reports the final
|
|
548
|
+
// state at end-of-fetch.
|
|
549
|
+
const adoption = {
|
|
550
|
+
title: undefined,
|
|
551
|
+
link: undefined,
|
|
552
|
+
publishedAt: undefined,
|
|
553
|
+
summary: undefined,
|
|
554
|
+
};
|
|
555
|
+
let itemsPath = null;
|
|
556
|
+
let paginationPreview;
|
|
557
|
+
// Effective cap may tighten mid-traversal when `totalPath` resolves to a
|
|
558
|
+
// value smaller than the recipe's `maxPages` (backfill early stop).
|
|
559
|
+
let effectiveCap = maxPages;
|
|
560
|
+
// Dry-run mode short-circuits after page 0: we record the diag preview
|
|
561
|
+
// (next URL / Link header / nextCursor) but never fetch page 1.
|
|
562
|
+
if (dryRun)
|
|
563
|
+
effectiveCap = Math.min(effectiveCap, 1);
|
|
564
|
+
while (pageIndex < effectiveCap) {
|
|
565
|
+
const response = await fetchPage(currentUrl, fetchImpl, headers, pagination, pageIndex, {
|
|
566
|
+
etag: previous?.lastEtag,
|
|
567
|
+
// Skip conditional GET in backfill mode so a stale ETag from a
|
|
568
|
+
// previous normal-mode run does not 304-out a requested full-history
|
|
569
|
+
// traversal.
|
|
570
|
+
sendConditional: !backfill,
|
|
571
|
+
});
|
|
572
|
+
if (pageIndex === 0) {
|
|
573
|
+
firstBody = response.body;
|
|
574
|
+
firstBodyText = response.bodyText;
|
|
575
|
+
lastEtag = response.etag;
|
|
545
576
|
if (response.status === 304) {
|
|
546
|
-
|
|
577
|
+
notModified = true;
|
|
547
578
|
break;
|
|
548
579
|
}
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
else {
|
|
571
|
-
previewNextUrl = computeNextUrl(source, pagination, currentUrl, response.body, pageItems.length, 1);
|
|
572
|
-
}
|
|
573
|
-
paginationPreview = {
|
|
574
|
-
strategy: pagination.type,
|
|
575
|
-
nextUrl: previewNextUrl,
|
|
576
|
-
...(linkHeaderNext !== undefined ? { linkHeaderNext } : {}),
|
|
577
|
-
...(nextCursor !== undefined ? { nextCursor } : {}),
|
|
578
|
-
};
|
|
579
|
-
}
|
|
580
|
-
// Normal-mode early stop: if this page contains an id we have already
|
|
581
|
-
// seen, the older pages will all be older still — stop paginating.
|
|
582
|
-
let hitSeen = false;
|
|
583
|
-
if (!backfill && previousSeen.size > 0) {
|
|
584
|
-
for (const item of pageItems) {
|
|
585
|
-
if (previousSeen.has(item.id)) {
|
|
586
|
-
hitSeen = true;
|
|
587
|
-
break;
|
|
588
|
-
}
|
|
589
|
-
}
|
|
590
|
-
}
|
|
591
|
-
items.push(...pageItems);
|
|
592
|
-
// Backfill-mode early stop via `totalPath`: if the recipe declared a
|
|
593
|
-
// total-count selector, narrow the page budget so we exit after the
|
|
594
|
-
// implied last page rather than walking the full `maxPages` cap. We
|
|
595
|
-
// only consult `totalPath` on page 0 because the value is unlikely to
|
|
596
|
-
// change mid-traversal and re-evaluating per page would cost an extra
|
|
597
|
-
// JSONPath walk for negligible benefit.
|
|
598
|
-
//
|
|
599
|
-
// Applied BEFORE the `onPage` callback below so the user-visible
|
|
600
|
-
// `Page N/M` denominator already reflects the narrowed cap on the
|
|
601
|
-
// very first page event (otherwise the spinner ratio would jump
|
|
602
|
-
// from `1/20` to `1/2` between page 0 and page 1, which reads as a
|
|
603
|
-
// bug).
|
|
604
|
-
if (backfill && pagination.totalPath && pageIndex === 0) {
|
|
605
|
-
const totalRaw = selectOne(pagination.totalPath, response.body);
|
|
606
|
-
const total = typeof totalRaw === "number" ? totalRaw : Number(coerceString(totalRaw));
|
|
607
|
-
if (Number.isFinite(total) && total > 0 && pagination.pageSize) {
|
|
608
|
-
const computedMax = Math.max(1, Math.ceil(total / pagination.pageSize));
|
|
609
|
-
if (computedMax < effectiveCap) {
|
|
610
|
-
effectiveCap = computedMax;
|
|
611
|
-
}
|
|
612
|
-
}
|
|
613
|
-
}
|
|
614
|
-
// Surface per-page progress to the CLI spinner / non-TTY log (#198).
|
|
615
|
-
// The callback is invoked before any early-exit checks below so the
|
|
616
|
-
// user always sees a final `Page N/N` event for the page that decided
|
|
617
|
-
// termination. `effectiveCap` is the denominator the loop will respect
|
|
618
|
-
// (recipe `maxPages`, narrowed by `totalPath` on page 0 in backfill
|
|
619
|
-
// mode above), so the user-visible ratio shrinks as the budget tightens.
|
|
620
|
-
if (onPage) {
|
|
621
|
-
onPage({
|
|
622
|
-
pageIndex,
|
|
623
|
-
pageTotal: effectiveCap,
|
|
624
|
-
items: pageItems.length,
|
|
625
|
-
});
|
|
626
|
-
}
|
|
627
|
-
// Stop when the page yielded zero items — protects against runaway
|
|
628
|
-
// pagination on broken recipes / empty trailing pages.
|
|
629
|
-
if (matches.length === 0)
|
|
630
|
-
break;
|
|
631
|
-
if (hitSeen)
|
|
632
|
-
break;
|
|
633
|
-
// End-of-pagination heuristic: when the recipe declared a `pageSize`
|
|
634
|
-
// and this page returned fewer matches than that, treat it as the last
|
|
635
|
-
// page. Saves one extra round-trip per source on the common "trailing
|
|
636
|
-
// partial page" case (page 0 of size N, …, page K returns K' < N).
|
|
637
|
-
// Skipped for `cursor` / `token` pagination where `nextCursor` is the
|
|
638
|
-
// authoritative signal — those types may legitimately return fewer
|
|
639
|
-
// items per page than the requested size.
|
|
640
|
-
if (pagination.pageSize !== undefined &&
|
|
641
|
-
(pagination.type === "page" || pagination.type === "offset") &&
|
|
642
|
-
matches.length < pagination.pageSize) {
|
|
643
|
-
break;
|
|
580
|
+
}
|
|
581
|
+
if (response.status === 304) {
|
|
582
|
+
// 304 on a later page is unusual but treat as end-of-pagination.
|
|
583
|
+
break;
|
|
584
|
+
}
|
|
585
|
+
const itemsResult = resolveItemsList(selectors, response.body);
|
|
586
|
+
if (pageIndex === 0)
|
|
587
|
+
itemsPath = itemsResult.path;
|
|
588
|
+
const matches = itemsResult.matches;
|
|
589
|
+
const pageItems = matches
|
|
590
|
+
.map((m) => elementToItem(m, source, selectors, fetchedAt, adoption))
|
|
591
|
+
.filter((i) => i !== null);
|
|
592
|
+
// Surface a pagination preview for `source test` on page 0 only. We
|
|
593
|
+
// compute the *would-be* next URL / cursor / Link header but never
|
|
594
|
+
// actually fetch it in dry-run mode (#174 state-clean invariant).
|
|
595
|
+
if (pageIndex === 0) {
|
|
596
|
+
const linkHeaderNext = pagination.type === "link-header" ? response.linkNext : undefined;
|
|
597
|
+
let nextCursor;
|
|
598
|
+
if ((pagination.type === "cursor" || pagination.type === "token") &&
|
|
599
|
+
pagination.nextCursorPath) {
|
|
600
|
+
nextCursor = coerceString(selectOne(pagination.nextCursorPath, response.body)) ?? null;
|
|
644
601
|
}
|
|
645
|
-
|
|
646
|
-
let nextUrl;
|
|
602
|
+
let previewNextUrl;
|
|
647
603
|
if (pagination.type === "link-header") {
|
|
648
|
-
|
|
604
|
+
previewNextUrl = response.linkNext;
|
|
649
605
|
}
|
|
650
606
|
else {
|
|
651
|
-
|
|
607
|
+
previewNextUrl = computeNextUrl(source, pagination, currentUrl, response.body, pageItems.length, 1);
|
|
652
608
|
}
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
609
|
+
paginationPreview = {
|
|
610
|
+
strategy: pagination.type,
|
|
611
|
+
nextUrl: previewNextUrl,
|
|
612
|
+
...(linkHeaderNext !== undefined ? { linkHeaderNext } : {}),
|
|
613
|
+
...(nextCursor !== undefined ? { nextCursor } : {}),
|
|
614
|
+
};
|
|
657
615
|
}
|
|
658
|
-
//
|
|
659
|
-
//
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
616
|
+
// Normal-mode early stop: if this page contains an id we have already
|
|
617
|
+
// seen, the older pages will all be older still — stop paginating.
|
|
618
|
+
let hitSeen = false;
|
|
619
|
+
if (!backfill && previousSeen.size > 0) {
|
|
620
|
+
for (const item of pageItems) {
|
|
621
|
+
if (previousSeen.has(item.id)) {
|
|
622
|
+
hitSeen = true;
|
|
623
|
+
break;
|
|
624
|
+
}
|
|
667
625
|
}
|
|
668
626
|
}
|
|
669
|
-
|
|
670
|
-
//
|
|
671
|
-
//
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
627
|
+
items.push(...pageItems);
|
|
628
|
+
// Backfill-mode early stop via `totalPath`: if the recipe declared a
|
|
629
|
+
// total-count selector, narrow the page budget so we exit after the
|
|
630
|
+
// implied last page rather than walking the full `maxPages` cap. We
|
|
631
|
+
// only consult `totalPath` on page 0 because the value is unlikely to
|
|
632
|
+
// change mid-traversal and re-evaluating per page would cost an extra
|
|
633
|
+
// JSONPath walk for negligible benefit.
|
|
634
|
+
//
|
|
635
|
+
// Applied BEFORE the `onPage` callback below so the user-visible
|
|
636
|
+
// `Page N/M` denominator already reflects the narrowed cap on the
|
|
637
|
+
// very first page event (otherwise the spinner ratio would jump
|
|
638
|
+
// from `1/20` to `1/2` between page 0 and page 1, which reads as a
|
|
639
|
+
// bug).
|
|
640
|
+
if (backfill && pagination.totalPath && pageIndex === 0) {
|
|
641
|
+
const totalRaw = selectOne(pagination.totalPath, response.body);
|
|
642
|
+
const total = typeof totalRaw === "number" ? totalRaw : Number(coerceString(totalRaw));
|
|
643
|
+
if (Number.isFinite(total) && total > 0 && pagination.pageSize) {
|
|
644
|
+
const computedMax = Math.max(1, Math.ceil(total / pagination.pageSize));
|
|
645
|
+
if (computedMax < effectiveCap) {
|
|
646
|
+
effectiveCap = computedMax;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
675
649
|
}
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
650
|
+
// Surface per-page progress to the CLI spinner / non-TTY log (#198).
|
|
651
|
+
// The callback is invoked before any early-exit checks below so the
|
|
652
|
+
// user always sees a final `Page N/N` event for the page that decided
|
|
653
|
+
// termination. `effectiveCap` is the denominator the loop will respect
|
|
654
|
+
// (recipe `maxPages`, narrowed by `totalPath` on page 0 in backfill
|
|
655
|
+
// mode above), so the user-visible ratio shrinks as the budget tightens.
|
|
656
|
+
if (onPage) {
|
|
657
|
+
onPage({
|
|
658
|
+
pageIndex,
|
|
659
|
+
pageTotal: effectiveCap,
|
|
660
|
+
items: pageItems.length,
|
|
661
|
+
});
|
|
680
662
|
}
|
|
681
|
-
//
|
|
682
|
-
//
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
//
|
|
688
|
-
//
|
|
689
|
-
//
|
|
690
|
-
//
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
state: {
|
|
707
|
-
lastFetchedAt: fetchedAt,
|
|
708
|
-
lastEtag: nextEtag,
|
|
709
|
-
},
|
|
710
|
-
diag,
|
|
711
|
-
};
|
|
663
|
+
// Stop when the page yielded zero items — protects against runaway
|
|
664
|
+
// pagination on broken recipes / empty trailing pages.
|
|
665
|
+
if (matches.length === 0)
|
|
666
|
+
break;
|
|
667
|
+
if (hitSeen)
|
|
668
|
+
break;
|
|
669
|
+
// End-of-pagination heuristic: when the recipe declared a `pageSize`
|
|
670
|
+
// and this page returned fewer matches than that, treat it as the last
|
|
671
|
+
// page. Saves one extra round-trip per source on the common "trailing
|
|
672
|
+
// partial page" case (page 0 of size N, …, page K returns K' < N).
|
|
673
|
+
// Skipped for `cursor` / `token` pagination where `nextCursor` is the
|
|
674
|
+
// authoritative signal — those types may legitimately return fewer
|
|
675
|
+
// items per page than the requested size.
|
|
676
|
+
if (pagination.pageSize !== undefined &&
|
|
677
|
+
(pagination.type === "page" || pagination.type === "offset") &&
|
|
678
|
+
matches.length < pagination.pageSize) {
|
|
679
|
+
break;
|
|
680
|
+
}
|
|
681
|
+
// Compute next URL.
|
|
682
|
+
let nextUrl;
|
|
683
|
+
if (pagination.type === "link-header") {
|
|
684
|
+
nextUrl = response.linkNext;
|
|
685
|
+
}
|
|
686
|
+
else {
|
|
687
|
+
nextUrl = computeNextUrl(source, pagination, currentUrl, response.body, pageItems.length, pageIndex + 1);
|
|
712
688
|
}
|
|
689
|
+
if (!nextUrl)
|
|
690
|
+
break;
|
|
691
|
+
currentUrl = nextUrl;
|
|
692
|
+
pageIndex++;
|
|
693
|
+
}
|
|
694
|
+
// Warn for default-chain fields where every candidate returned null —
|
|
695
|
+
// recipe authors typically want to know the API has a non-standard
|
|
696
|
+
// shape (e.g. `additionalFields.headline` instead of `$.title`). We
|
|
697
|
+
// skip the warning when the recipe explicitly declared the selector
|
|
698
|
+
// (the absence is then on the user, not the default chain).
|
|
699
|
+
for (const field of Object.keys(adoption)) {
|
|
700
|
+
const explicit = selectors[field];
|
|
701
|
+
if (!explicit && adoption[field] === null) {
|
|
702
|
+
warn(`json-api adapter: source '${source.id}' — default selector chain for '${field}' produced no value; consider setting jsonSelectors.${field} explicitly`);
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
// Build state. Prefer the server-supplied ETag; otherwise hash the page-0
|
|
706
|
+
// body so re-runs without a server ETag still dedup correctly (mirrors the
|
|
707
|
+
// html adapter's content-hash fallback).
|
|
708
|
+
let nextEtag = previous?.lastEtag;
|
|
709
|
+
if (lastEtag) {
|
|
710
|
+
nextEtag = lastEtag;
|
|
711
|
+
}
|
|
712
|
+
else if (firstBodyText && firstBodyText.length > 0) {
|
|
713
|
+
nextEtag = `${CONTENT_HASH_PREFIX}${createHash("sha256").update(firstBodyText).digest("hex")}`;
|
|
714
|
+
}
|
|
715
|
+
// Avoid unused-variable warnings while keeping `firstBody` available for
|
|
716
|
+
// future debug surfaces (`source test` may want to print the first page
|
|
717
|
+
// body when no items matched).
|
|
718
|
+
void firstBody;
|
|
719
|
+
// Compose diag payload for `source test --show-content`. The selector
|
|
720
|
+
// adoption map reports the JSONPath candidate that won the fallback
|
|
721
|
+
// chain per field (or the recipe-supplied path verbatim, or `null` when
|
|
722
|
+
// every candidate missed). Pagination preview surfaces the next-URL /
|
|
723
|
+
// Link / cursor extraction so users can spot misconfigurations without
|
|
724
|
+
// letting the dry-run actually walk page 1.
|
|
725
|
+
const selectorAdoption = {
|
|
726
|
+
items: itemsPath ?? null,
|
|
727
|
+
title: adoption.title ?? null,
|
|
728
|
+
link: adoption.link ?? null,
|
|
729
|
+
publishedAt: adoption.publishedAt ?? null,
|
|
730
|
+
summary: adoption.summary ?? null,
|
|
731
|
+
};
|
|
732
|
+
const diag = {
|
|
733
|
+
selectorAdoption,
|
|
734
|
+
...(paginationPreview ? { paginationPreview } : {}),
|
|
735
|
+
};
|
|
736
|
+
if (notModified) {
|
|
713
737
|
return {
|
|
714
|
-
items,
|
|
738
|
+
items: [],
|
|
739
|
+
notModified: true,
|
|
715
740
|
state: {
|
|
716
741
|
lastFetchedAt: fetchedAt,
|
|
717
742
|
lastEtag: nextEtag,
|
|
718
743
|
},
|
|
719
744
|
diag,
|
|
720
745
|
};
|
|
746
|
+
}
|
|
747
|
+
return {
|
|
748
|
+
items,
|
|
749
|
+
state: {
|
|
750
|
+
lastFetchedAt: fetchedAt,
|
|
751
|
+
lastEtag: nextEtag,
|
|
752
|
+
},
|
|
753
|
+
diag,
|
|
754
|
+
};
|
|
755
|
+
}
|
|
756
|
+
/**
|
|
757
|
+
* Public adapter. When `source.facets` is set, wraps {@link fetchSingle}
|
|
758
|
+
* in an outer facet sweep loop (ADR-0017). Each iteration:
|
|
759
|
+
*
|
|
760
|
+
* - injects the facet value into the URL via {@link applyFacetValue}
|
|
761
|
+
* - delegates to {@link fetchSingle} with `facets: undefined` so the
|
|
762
|
+
* inner traversal sees the modified URL but does not recurse
|
|
763
|
+
* - disables conditional GET in facet sweep mode (ADR-0017 §State —
|
|
764
|
+
* per-facet ETag tracking is deferred to a future ADR)
|
|
765
|
+
* - merges state.lastSeenIds globally across facet values (item IDs are
|
|
766
|
+
* unique across facets in the documented AWS What's New use case)
|
|
767
|
+
*
|
|
768
|
+
* Inner traversal semantics (`lastSeenIds` early-stop, `pagination.maxPages`
|
|
769
|
+
* cap, `--max-pages` override, `--backfill` full traversal) apply unchanged
|
|
770
|
+
* to each facet value. The outer loop walks every facet value in both
|
|
771
|
+
* normal and `--backfill` modes — normal mode gets the early-stop benefit
|
|
772
|
+
* inside each value but never skips a facet outright (that would silently
|
|
773
|
+
* miss items in a facet whose first page has not changed since last run).
|
|
774
|
+
*
|
|
775
|
+
* Dry-run (`source test`) iterates only the first facet value so the
|
|
776
|
+
* selector adoption preview is meaningful without walking every year.
|
|
777
|
+
*
|
|
778
|
+
* Phase 1 limitation: a single facet entry only. Multi-facet (e.g. year ×
|
|
779
|
+
* category) requires composition rules that are out of scope here — see
|
|
780
|
+
* ADR-0017 §Scope.
|
|
781
|
+
*/
|
|
782
|
+
export const jsonApiAdapter = {
|
|
783
|
+
kind: "json-api",
|
|
784
|
+
fetch: async (source, options = {}) => {
|
|
785
|
+
if (!source.facets || Object.keys(source.facets).length === 0) {
|
|
786
|
+
return fetchSingle(source, options);
|
|
787
|
+
}
|
|
788
|
+
const facetEntries = Object.entries(source.facets);
|
|
789
|
+
if (facetEntries.length > 1) {
|
|
790
|
+
// Phase 1 single-facet guard. The schema accepts a record shape for
|
|
791
|
+
// forward-compat, but composing two axes (year × category) needs
|
|
792
|
+
// explicit ordering / dedup semantics that ADR-0017 defers.
|
|
793
|
+
throw new Error(`json-api adapter: source '${source.id}' declares ${facetEntries.length} facets — multi-facet sweep is not supported in Phase 1 (ADR-0017 §Scope)`);
|
|
794
|
+
}
|
|
795
|
+
const [, facetSpec] = facetEntries[0];
|
|
796
|
+
const dryRun = options.dryRun === true;
|
|
797
|
+
// Aggregate items + lastSeenIds across every facet value. ETag is
|
|
798
|
+
// intentionally NOT persisted: a single ETag cannot represent the
|
|
799
|
+
// combined state of N facet values, and re-using last-run's ETag
|
|
800
|
+
// would 304-out the next sweep. Per-facet ETag is future work.
|
|
801
|
+
const aggregatedItems = [];
|
|
802
|
+
const aggregatedSeen = new Set(options.state?.lastSeenIds ?? []);
|
|
803
|
+
let aggregatedDiag;
|
|
804
|
+
let aggregatedNotModified = true;
|
|
805
|
+
const fetchedAt = new Date().toISOString();
|
|
806
|
+
for (const value of generateFacetValues(facetSpec)) {
|
|
807
|
+
const innerUrl = applyFacetValue(source.url, facetSpec, value);
|
|
808
|
+
// Build a "single-axis" view of the source: same id / pagination /
|
|
809
|
+
// selectors but with the facet-stamped URL and `facets: undefined`
|
|
810
|
+
// so the inner fetch does not recurse.
|
|
811
|
+
const innerSource = { ...source, url: innerUrl, facets: undefined };
|
|
812
|
+
// Share the running lastSeenIds set with the inner fetch so the
|
|
813
|
+
// per-facet early-stop heuristic dedupes against items already
|
|
814
|
+
// observed in earlier facets. Conditional GET is disabled: each
|
|
815
|
+
// facet value has its own ETag and re-using the previous value's
|
|
816
|
+
// would silently 304-out the next slice.
|
|
817
|
+
const innerOptions = {
|
|
818
|
+
...options,
|
|
819
|
+
state: options.state
|
|
820
|
+
? {
|
|
821
|
+
...options.state,
|
|
822
|
+
lastEtag: undefined,
|
|
823
|
+
lastSeenIds: Array.from(aggregatedSeen),
|
|
824
|
+
}
|
|
825
|
+
: {
|
|
826
|
+
sourceId: source.id,
|
|
827
|
+
lastSeenIds: Array.from(aggregatedSeen),
|
|
828
|
+
},
|
|
829
|
+
};
|
|
830
|
+
const result = await fetchSingle(innerSource, innerOptions);
|
|
831
|
+
// Capture the diag from the FIRST facet value only — it serves as
|
|
832
|
+
// the representative selector-adoption / pagination-preview surface
|
|
833
|
+
// for `source test`. Later iterations overwrite nothing.
|
|
834
|
+
if (aggregatedDiag === undefined)
|
|
835
|
+
aggregatedDiag = result.diag;
|
|
836
|
+
if (!result.notModified)
|
|
837
|
+
aggregatedNotModified = false;
|
|
838
|
+
for (const item of result.items) {
|
|
839
|
+
aggregatedItems.push(item);
|
|
840
|
+
aggregatedSeen.add(item.id);
|
|
841
|
+
}
|
|
842
|
+
// Dry-run: walk only the first facet value so `source test` stays
|
|
843
|
+
// cheap and the per-page-0 selector preview is meaningful.
|
|
844
|
+
if (dryRun)
|
|
845
|
+
break;
|
|
846
|
+
}
|
|
847
|
+
return {
|
|
848
|
+
items: aggregatedItems,
|
|
849
|
+
// ADR-0017 §State: ETag disabled in facet sweep mode. Persist
|
|
850
|
+
// `undefined` so the next run starts fresh.
|
|
851
|
+
state: {
|
|
852
|
+
lastFetchedAt: fetchedAt,
|
|
853
|
+
lastEtag: undefined,
|
|
854
|
+
},
|
|
855
|
+
...(aggregatedNotModified && aggregatedItems.length === 0 ? { notModified: true } : {}),
|
|
856
|
+
...(aggregatedDiag ? { diag: aggregatedDiag } : {}),
|
|
857
|
+
};
|
|
721
858
|
},
|
|
722
859
|
};
|
|
723
860
|
//# sourceMappingURL=json-api.js.map
|