niahere 0.3.5 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/programmatic-seo/SKILL.md +47 -0
- package/skills/programmatic-seo/agents/openai.yaml +4 -0
- package/skills/programmatic-seo/references/architecture.md +114 -0
- package/skills/programmatic-seo/references/content-quality.md +103 -0
- package/skills/programmatic-seo/references/nextjs-inspection.md +129 -0
- package/skills/programmatic-seo/references/sources.md +42 -0
- package/skills/programmatic-seo/references/validation.md +78 -0
- package/src/channels/slack.ts +14 -4
- package/src/core/runner.ts +15 -1
package/package.json
CHANGED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: programmatic-seo
|
|
3
|
+
description: "Plans, audits, and implements scalable programmatic SEO systems with URL inventory, intent mapping, metadata, schema, internal linking, sitemaps, ISR/static rendering, and quality gates. Use when the user mentions 'programmatic SEO,' 'pSEO,' '100k pages,' 'generate SEO pages,' 'scaled landing pages,' 'template pages,' 'SEO page factory,' 'dynamic SEO routes,' 'sitemap shards,' 'keyword cannibalization,' 'doorway pages,' or 'thin content at scale.' Does not replace general SEO audits, AI SEO citation work, or llms.txt tasks unless PSEO scale is central."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Programmatic SEO
|
|
7
|
+
|
|
8
|
+
## Core Rule
|
|
9
|
+
|
|
10
|
+
Turn PSEO requests into a gated system. A URL becomes indexable only after it has distinct intent, unique visible value, canonical consistency, crawlable links, and a measurable reason to stay live.
|
|
11
|
+
|
|
12
|
+
## First Checks
|
|
13
|
+
|
|
14
|
+
1. If `.agents/product-marketing-context.md` exists, read it before asking discovery questions.
|
|
15
|
+
2. If the task is a broad SEO audit without PSEO scale, invoke the `seo` skill instead.
|
|
16
|
+
3. If the user asks for current framework behavior, Google policy, or AI-search behavior, verify primary docs before relying on memory.
|
|
17
|
+
|
|
18
|
+
## Mode Selection
|
|
19
|
+
|
|
20
|
+
| Task | Read |
|
|
21
|
+
| --- | --- |
|
|
22
|
+
| Define page families, intent, uniqueness, doorway risk, cannibalization, hubs, or content blocks | [references/content-quality.md](references/content-quality.md) |
|
|
23
|
+
| Design the PSEO system: URL registry, statuses, metadata core, schema core, linking, sitemap model, facets | [references/architecture.md](references/architecture.md) |
|
|
24
|
+
| Inspect or optimize a Next.js app for PSEO: App Router, ISR, `generateStaticParams`, metadata, sitemaps, caching, build performance | [references/nextjs-inspection.md](references/nextjs-inspection.md) |
|
|
25
|
+
| Validate launch readiness, tests, Search Console checks, and monitoring | [references/validation.md](references/validation.md) |
|
|
26
|
+
| Ground claims in primary docs or source links | [references/sources.md](references/sources.md) |
|
|
27
|
+
|
|
28
|
+
## Output Shape
|
|
29
|
+
|
|
30
|
+
For audits, return:
|
|
31
|
+
- `Verdict`: whether the PSEO system is ready to scale.
|
|
32
|
+
- `Blockers`: issues that can harm crawl, indexation, quality, or build reliability.
|
|
33
|
+
- `Architecture`: recommended URL inventory, routing, rendering, metadata, schema, and sitemap structure.
|
|
34
|
+
- `Quality gates`: checks required before pages become indexable.
|
|
35
|
+
- `Implementation plan`: scoped code changes and verification steps.
|
|
36
|
+
|
|
37
|
+
For implementation, make the smallest code changes that establish durable guardrails. Prefer reusable SEO/data/template modules over inline page-level SEO logic.
|
|
38
|
+
|
|
39
|
+
## Hard Rules
|
|
40
|
+
|
|
41
|
+
- Scale only after a page pattern proves unique usefulness.
|
|
42
|
+
- Never create separate indexable URLs solely for plural/singular, synonym, query-fanout, or city/service keyword swaps.
|
|
43
|
+
- Never rely on `robots.txt` for deindexing or canonicalization.
|
|
44
|
+
- Never mark up schema that is not visible to users.
|
|
45
|
+
- Never include non-canonical, noindex, redirected, 404, or duplicate URLs in XML sitemaps.
|
|
46
|
+
- Never let arbitrary search, sort, filter, or facet combinations become indexable by default.
|
|
47
|
+
- Never promise that compliant pages will be indexed; treat indexation as an outcome to monitor.
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# PSEO Architecture
|
|
2
|
+
|
|
3
|
+
## Core Principle
|
|
4
|
+
|
|
5
|
+
Separate concerns:
|
|
6
|
+
|
|
7
|
+
- Data layer decides what URLs exist.
|
|
8
|
+
- SEO core generates metadata, canonicals, robots directives, and schema.
|
|
9
|
+
- Templates render visible page content from structured data.
|
|
10
|
+
- Routing resolves canonical paths and page status.
|
|
11
|
+
- Linking builds the crawl graph.
|
|
12
|
+
- Sitemaps expose only canonical, indexable URLs.
|
|
13
|
+
|
|
14
|
+
## Canonical URL Registry
|
|
15
|
+
|
|
16
|
+
Use a single registry or query layer as the source of truth:
|
|
17
|
+
|
|
18
|
+
```txt
|
|
19
|
+
pseo_pages
|
|
20
|
+
- id
|
|
21
|
+
- canonical_path
|
|
22
|
+
- slug_parts
|
|
23
|
+
- template_key
|
|
24
|
+
- entity_ids or query_hash
|
|
25
|
+
- title
|
|
26
|
+
- meta_description
|
|
27
|
+
- h1
|
|
28
|
+
- status: draft | indexable | noindex | canonical_duplicate | redirected | deleted
|
|
29
|
+
- canonical_target
|
|
30
|
+
- redirect_target
|
|
31
|
+
- last_modified_at
|
|
32
|
+
- content_hash
|
|
33
|
+
- priority_bucket
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Implementation rules:
|
|
37
|
+
|
|
38
|
+
- Page loaders fetch by canonical path.
|
|
39
|
+
- Missing or deleted pages return `404` or framework equivalent.
|
|
40
|
+
- `noindex` pages render explicit robots directives and stay out of sitemaps.
|
|
41
|
+
- Alias and legacy paths redirect to canonical paths.
|
|
42
|
+
- Internal links and sitemaps emit only canonical paths.
|
|
43
|
+
|
|
44
|
+
## Rendering Strategy
|
|
45
|
+
|
|
46
|
+
Choose rendering based on URL count, freshness, and cacheability:
|
|
47
|
+
|
|
48
|
+
- Full static generation: small, stable URL sets.
|
|
49
|
+
- ISR or equivalent incremental generation: large cacheable long-tail inventories.
|
|
50
|
+
- Dynamic rendering: real-time, personalized, auth-sensitive, or uncacheable pages.
|
|
51
|
+
- Static export: only when every route is known at build time and no incremental rendering is needed.
|
|
52
|
+
|
|
53
|
+
For Next.js-specific inspection and implementation guidance, read [nextjs-inspection.md](nextjs-inspection.md).
|
|
54
|
+
|
|
55
|
+
## Build Performance
|
|
56
|
+
|
|
57
|
+
Do:
|
|
58
|
+
|
|
59
|
+
- Pre-render only priority buckets when inventories are large.
|
|
60
|
+
- Generate fewer or zero long-tail paths in preview environments.
|
|
61
|
+
- Keep path collection cheap: one batched query, no full content fetches, no per-page network fanout.
|
|
62
|
+
- Fetch full content in the page loader, not in path generation.
|
|
63
|
+
|
|
64
|
+
Do not:
|
|
65
|
+
|
|
66
|
+
- Generate 100k+ pages in every build by default.
|
|
67
|
+
- Put expensive joins or remote calls inside path collection.
|
|
68
|
+
- Depend on dev-server behavior to validate production caching.
|
|
69
|
+
|
|
70
|
+
## Metadata And Schema Core
|
|
71
|
+
|
|
72
|
+
Create shared builders:
|
|
73
|
+
|
|
74
|
+
- `buildMetadata(page)`: title, description, canonical, robots, Open Graph, Twitter.
|
|
75
|
+
- `buildBreadcrumbs(page)`: canonical breadcrumb hierarchy.
|
|
76
|
+
- `buildSchema(page)`: JSON-LD matching visible content.
|
|
77
|
+
- `buildInternalLinks(page)`: parent, siblings, related pages, and next actions.
|
|
78
|
+
|
|
79
|
+
Schema rules:
|
|
80
|
+
|
|
81
|
+
- Use JSON-LD when possible.
|
|
82
|
+
- Mark up only content visible to users.
|
|
83
|
+
- Use the most specific relevant schema type.
|
|
84
|
+
- Keep required properties complete for the chosen rich result type.
|
|
85
|
+
- Validate representative pages with Rich Results Test or equivalent rendered inspection.
|
|
86
|
+
|
|
87
|
+
## Sitemaps
|
|
88
|
+
|
|
89
|
+
Rules:
|
|
90
|
+
|
|
91
|
+
- A sitemap file must stay under 50,000 URLs and 50MB uncompressed.
|
|
92
|
+
- Use a sitemap index for large inventories.
|
|
93
|
+
- Shard by page family, priority bucket, date, or deterministic ID range.
|
|
94
|
+
- Include only absolute canonical URLs returning 200 and marked indexable.
|
|
95
|
+
- Use accurate `lastmod` from the page source of truth.
|
|
96
|
+
- Reference the sitemap index from `robots.txt`.
|
|
97
|
+
|
|
98
|
+
For Next.js App Router, prefer `generateSitemaps()` or explicit route handlers for shards.
|
|
99
|
+
|
|
100
|
+
## Facets, Search Params, And Pagination
|
|
101
|
+
|
|
102
|
+
Facets are deny-by-default:
|
|
103
|
+
|
|
104
|
+
- Curate allowed indexable combinations.
|
|
105
|
+
- Return `404` for empty or nonsensical combinations.
|
|
106
|
+
- Use canonical or noindex for useful non-canonical variants.
|
|
107
|
+
- Avoid arbitrary sort, order, filter, and tracking parameters in crawlable links.
|
|
108
|
+
|
|
109
|
+
Pagination:
|
|
110
|
+
|
|
111
|
+
- Give paginated content unique crawlable URLs.
|
|
112
|
+
- Link pages with normal anchors.
|
|
113
|
+
- Self-canonicalize pages that are distinct paginated archives.
|
|
114
|
+
- Do not make infinite scroll or "load more" the only discovery path.
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# PSEO Content Quality
|
|
2
|
+
|
|
3
|
+
## Intent Gate
|
|
4
|
+
|
|
5
|
+
Approve a page family only when it has:
|
|
6
|
+
|
|
7
|
+
- A target user and job-to-be-done.
|
|
8
|
+
- One primary intent cluster, not a bag of keyword variants.
|
|
9
|
+
- One canonical URL per intent cluster.
|
|
10
|
+
- A distinct reason to exist: proprietary data, useful aggregation, comparison, local/entity specificity, workflow utility, first-hand expertise, or an interactive tool.
|
|
11
|
+
- A clear relationship to a hub, siblings, and downstream actions.
|
|
12
|
+
|
|
13
|
+
Reject pages created only for plural/singular variants, synonyms, "near me" swaps, city/service shells, or AI-query fanout.
|
|
14
|
+
|
|
15
|
+
## Page Entity Model
|
|
16
|
+
|
|
17
|
+
Each generated page should be a first-class entity with fields like:
|
|
18
|
+
|
|
19
|
+
```txt
|
|
20
|
+
id
|
|
21
|
+
canonical_path
|
|
22
|
+
template_key
|
|
23
|
+
intent
|
|
24
|
+
primary_keyword
|
|
25
|
+
supporting_keywords
|
|
26
|
+
parent_hub
|
|
27
|
+
related_pages
|
|
28
|
+
schema_type
|
|
29
|
+
status
|
|
30
|
+
last_modified_at
|
|
31
|
+
content_hash
|
|
32
|
+
quality_score
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Keep page status explicit: `draft`, `indexable`, `noindex`, `canonical_duplicate`, `redirected`, or `deleted`.
|
|
36
|
+
|
|
37
|
+
## Unique Value Gate
|
|
38
|
+
|
|
39
|
+
Every indexable generated page should include at least 3-5 page-specific content blocks:
|
|
40
|
+
|
|
41
|
+
- Direct answer to the main query.
|
|
42
|
+
- Key facts table from structured data.
|
|
43
|
+
- Comparison, ranking, availability, pricing, or alternatives.
|
|
44
|
+
- Evidence, methodology, dates, and source attribution.
|
|
45
|
+
- Real FAQs from query data, support logs, or sales calls.
|
|
46
|
+
- Examples, workflows, screenshots, calculators, filters, or next-step actions.
|
|
47
|
+
|
|
48
|
+
The template can be repeated. The value inside it cannot be mostly boilerplate.
|
|
49
|
+
|
|
50
|
+
## Duplicate And Cannibalization Gate
|
|
51
|
+
|
|
52
|
+
Before indexation:
|
|
53
|
+
|
|
54
|
+
- Compare pages inside the same template family with shingles, hashes, embeddings, or another near-duplicate check.
|
|
55
|
+
- Check keyword-to-URL ownership: one owner URL per intent.
|
|
56
|
+
- Canonicalize, merge, redirect, noindex, or delete pages that satisfy the same intent.
|
|
57
|
+
- Avoid linking with the same anchor text to multiple competing pages.
|
|
58
|
+
- Publish in batches and monitor whether Google chooses unexpected canonicals.
|
|
59
|
+
|
|
60
|
+
## Doorway Risk Gate
|
|
61
|
+
|
|
62
|
+
Reject or redesign page sets where:
|
|
63
|
+
|
|
64
|
+
- Pages funnel users to one real page instead of solving the searcher's need.
|
|
65
|
+
- Many pages target similar queries with substantially similar content.
|
|
66
|
+
- Location/service pages only swap place names, numbers, or generic claims.
|
|
67
|
+
- Pages are internal search results with no curated value.
|
|
68
|
+
- Pages are discoverable only through XML sitemaps or SEO-only link blocks.
|
|
69
|
+
- The page family is closer to search results than to a browseable hierarchy.
|
|
70
|
+
|
|
71
|
+
## Hub And Spoke Linking
|
|
72
|
+
|
|
73
|
+
Use a deliberate graph:
|
|
74
|
+
|
|
75
|
+
- Hubs target broad topics, categories, use cases, locations, or entity classes.
|
|
76
|
+
- Spokes target narrower intents and link up to hubs.
|
|
77
|
+
- Spokes link sideways to true siblings and related alternatives.
|
|
78
|
+
- Hubs link down to high-quality spokes in useful groupings.
|
|
79
|
+
- Conversion links appear only when contextually useful.
|
|
80
|
+
- Links must be crawlable `<a href>` links to canonical URLs.
|
|
81
|
+
|
|
82
|
+
## Structured Blocks
|
|
83
|
+
|
|
84
|
+
Use blocks that are easy for users and search systems to parse:
|
|
85
|
+
|
|
86
|
+
- Definition or direct answer: 40-80 words.
|
|
87
|
+
- Step-by-step process for "how to" intent.
|
|
88
|
+
- Comparison table for "vs", "best", and evaluation intent.
|
|
89
|
+
- Pros/cons for decision intent.
|
|
90
|
+
- FAQ using natural questions.
|
|
91
|
+
- Evidence block with dated sources and methodology.
|
|
92
|
+
|
|
93
|
+
For AI-search extractability, prefer concise answer blocks, tables, current facts, named sources, and clear headings. Do not create thin pages for every AI fanout query.
|
|
94
|
+
|
|
95
|
+
## Kill, Merge, Or Improve Rules
|
|
96
|
+
|
|
97
|
+
Set a review window per page family. Then act:
|
|
98
|
+
|
|
99
|
+
- Merge if two pages earn impressions for the same intent.
|
|
100
|
+
- Canonicalize if variants are useful to users but not distinct search results.
|
|
101
|
+
- Noindex if a page is useful in-product but not a search landing page.
|
|
102
|
+
- Redirect or delete if it has no durable user value.
|
|
103
|
+
- Improve if it has impressions but weak click-through or engagement and the intent remains valid.
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Next.js PSEO Inspection
|
|
2
|
+
|
|
3
|
+
Use this when auditing or optimizing a Next.js app for programmatic SEO. Keep the inspection tied to the app's actual version, router, deployment target, and caching model.
|
|
4
|
+
|
|
5
|
+
## Quick Classification
|
|
6
|
+
|
|
7
|
+
Inspect first:
|
|
8
|
+
|
|
9
|
+
- Next.js version and whether the app uses App Router, Pages Router, or both.
|
|
10
|
+
- Deployment target: Vercel, self-hosted Node, static export, edge runtime, or other.
|
|
11
|
+
- `next.config.*` for `output: "export"`, redirects, rewrites, headers, image config, experimental cache settings, and trailing slash behavior.
|
|
12
|
+
- Route inventory: `app/`, `pages/`, route handlers, dynamic segments, catch-all routes, and sitemap/robots files.
|
|
13
|
+
- Data source for slugs, canonical paths, status, redirects, `lastmod`, and template selection.
|
|
14
|
+
|
|
15
|
+
## App Router Checks
|
|
16
|
+
|
|
17
|
+
Inspect dynamic SEO routes:
|
|
18
|
+
|
|
19
|
+
- `app/[slug]/page.tsx`, `app/[...slug]/page.tsx`, or template-specific dynamic routes.
|
|
20
|
+
- `generateStaticParams()` scope and cost.
|
|
21
|
+
- `dynamicParams`, `dynamic`, `revalidate`, `fetchCache`, and runtime exports.
|
|
22
|
+
- `generateMetadata()` and whether it uses the same canonical registry as page rendering.
|
|
23
|
+
- `notFound()`, redirects, and `robots` metadata for non-indexable states.
|
|
24
|
+
|
|
25
|
+
Default for large PSEO: pre-render only priority paths and let the long tail generate through ISR or cached request-time rendering.
|
|
26
|
+
|
|
27
|
+
Pattern:
|
|
28
|
+
|
|
29
|
+
```ts
|
|
30
|
+
export const revalidate = 3600
|
|
31
|
+
|
|
32
|
+
export async function generateStaticParams() {
|
|
33
|
+
return hotPaths.map((path) => ({ slug: path.split("/") }))
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Rules:
|
|
38
|
+
|
|
39
|
+
- `generateStaticParams()` should fetch paths only, not full page bodies.
|
|
40
|
+
- Do not run one DB/API request per generated URL.
|
|
41
|
+
- Do not pre-render every long-tail URL in preview builds.
|
|
42
|
+
- Validate ISR/caching in production mode, not only `next dev`.
|
|
43
|
+
|
|
44
|
+
## Pages Router Checks
|
|
45
|
+
|
|
46
|
+
Inspect:
|
|
47
|
+
|
|
48
|
+
- `getStaticPaths`, `fallback`, `getStaticProps`, `revalidate`, and `getServerSideProps`.
|
|
49
|
+
- Whether `getStaticPaths` returns all URLs or only priority URLs.
|
|
50
|
+
- Whether fallback pages render complete indexable content once generated.
|
|
51
|
+
- Whether page data is too large for hydration or `__NEXT_DATA__`.
|
|
52
|
+
|
|
53
|
+
Prefer `fallback: "blocking"` or an equivalent long-tail strategy for large inventories when using Pages Router, unless the app has a reason to prebuild all paths.
|
|
54
|
+
|
|
55
|
+
## Metadata And Canonicals
|
|
56
|
+
|
|
57
|
+
Check that metadata is centralized:
|
|
58
|
+
|
|
59
|
+
- Title, description, canonical, robots, Open Graph, and Twitter values come from the page registry or SEO core.
|
|
60
|
+
- Canonicals are absolute, stable, and match internal links and sitemap URLs.
|
|
61
|
+
- Non-indexable statuses emit `noindex` and stay out of sitemaps.
|
|
62
|
+
- Duplicate or alias paths redirect or canonicalize consistently.
|
|
63
|
+
- `generateMetadata()` does not duplicate expensive page loading if the page also fetches the same data.
|
|
64
|
+
|
|
65
|
+
## Structured Data
|
|
66
|
+
|
|
67
|
+
Inspect JSON-LD generation:
|
|
68
|
+
|
|
69
|
+
- It is emitted server-side in rendered HTML.
|
|
70
|
+
- It describes visible content on the page.
|
|
71
|
+
- It uses the right type for the template: `Article`, `FAQPage`, `BreadcrumbList`, `Product`, `ItemList`, `LocalBusiness`, or another specific type.
|
|
72
|
+
- Breadcrumb schema matches visible breadcrumbs and canonical URLs.
|
|
73
|
+
- FAQ schema only marks up visible FAQs.
|
|
74
|
+
|
|
75
|
+
## Sitemaps And Robots
|
|
76
|
+
|
|
77
|
+
Inspect:
|
|
78
|
+
|
|
79
|
+
- `app/sitemap.ts`, nested `sitemap.ts`, route handlers, or generated XML files.
|
|
80
|
+
- `generateSitemaps()` if URL count exceeds one sitemap file.
|
|
81
|
+
- URL count per shard and uncompressed size.
|
|
82
|
+
- Absolute canonical URLs only.
|
|
83
|
+
- Accurate `lastModified` from source data.
|
|
84
|
+
- No `noindex`, redirect, duplicate, 404, draft, or parameter URLs.
|
|
85
|
+
- `app/robots.ts` or `public/robots.txt` references the sitemap index.
|
|
86
|
+
|
|
87
|
+
For large inventories, shard by template, priority bucket, updated date, or deterministic ID range.
|
|
88
|
+
|
|
89
|
+
## Links, Facets, And Pagination
|
|
90
|
+
|
|
91
|
+
Inspect rendered links:
|
|
92
|
+
|
|
93
|
+
- Hubs, breadcrumbs, related pages, siblings, and next actions use normal `<a href>` links.
|
|
94
|
+
- Internal links point to canonical URLs.
|
|
95
|
+
- Search/filter/sort params are not crawlable by default.
|
|
96
|
+
- Empty or nonsensical facet combinations return `404` or are blocked before indexation.
|
|
97
|
+
- Paginated archives have unique crawlable URLs and self-canonicals when they represent distinct pages.
|
|
98
|
+
|
|
99
|
+
## Build And Runtime Performance
|
|
100
|
+
|
|
101
|
+
Inspect build output and logs:
|
|
102
|
+
|
|
103
|
+
- Number of generated static pages.
|
|
104
|
+
- Time spent inside path generation.
|
|
105
|
+
- Remote API or DB fanout during build.
|
|
106
|
+
- Static generation timeout warnings.
|
|
107
|
+
- Preview build behavior versus production build behavior.
|
|
108
|
+
- Cache invalidation strategy: `revalidatePath`, `revalidateTag`, cache tags, or deploy-based rebuilds.
|
|
109
|
+
- Self-hosted deployments: whether ISR/cache storage is shared across instances.
|
|
110
|
+
|
|
111
|
+
Recommended guardrails:
|
|
112
|
+
|
|
113
|
+
- Cap pre-rendered PSEO paths by environment.
|
|
114
|
+
- Make path generation a single batched query.
|
|
115
|
+
- Cache shared template data separately from per-page data.
|
|
116
|
+
- Use on-demand revalidation for CMS or data updates where available.
|
|
117
|
+
- Add tests that sitemap queries exclude non-indexable statuses.
|
|
118
|
+
|
|
119
|
+
## Output For A Next.js PSEO Audit
|
|
120
|
+
|
|
121
|
+
Return:
|
|
122
|
+
|
|
123
|
+
- `Router and rendering`: current setup and scaling risk.
|
|
124
|
+
- `URL source of truth`: where canonical path/status data lives.
|
|
125
|
+
- `Metadata/schema`: centralization and correctness gaps.
|
|
126
|
+
- `Sitemaps/robots`: shard and inclusion issues.
|
|
127
|
+
- `Crawl graph`: internal link and facet risks.
|
|
128
|
+
- `Build/runtime`: slow-build and cache risks.
|
|
129
|
+
- `Fix plan`: smallest code changes and verification commands.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# PSEO Source Notes
|
|
2
|
+
|
|
3
|
+
Prefer current primary sources when making implementation or policy decisions. Useful starting points:
|
|
4
|
+
|
|
5
|
+
## Google Search Central
|
|
6
|
+
|
|
7
|
+
- Search Essentials: https://developers.google.com/search/docs/essentials
|
|
8
|
+
- Spam policies, including scaled content and doorway abuse: https://developers.google.com/search/docs/essentials/spam-policies
|
|
9
|
+
- Helpful, reliable, people-first content: https://developers.google.com/search/docs/fundamentals/creating-helpful-content
|
|
10
|
+
- AI-generated content guidance: https://developers.google.com/search/docs/fundamentals/using-gen-ai-content
|
|
11
|
+
- Optimizing for generative AI features on Google Search: https://developers.google.com/search/docs/fundamentals/ai-optimization-guide
|
|
12
|
+
- Crawl budget guide: https://developers.google.com/crawling/docs/crawl-budget
|
|
13
|
+
- Canonicalization: https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls
|
|
14
|
+
- Crawlable links: https://developers.google.com/search/docs/crawling-indexing/links-crawlable
|
|
15
|
+
- Sitemaps overview and large sitemap guidance: https://developers.google.com/search/docs/crawling-indexing/sitemaps/overview
|
|
16
|
+
- Structured data guidelines: https://developers.google.com/search/docs/appearance/structured-data/sd-policies
|
|
17
|
+
- Faceted navigation crawl guidance: https://developers.google.com/crawling/docs/faceted-navigation
|
|
18
|
+
|
|
19
|
+
## Sitemaps
|
|
20
|
+
|
|
21
|
+
- Sitemap protocol: https://www.sitemaps.org/protocol.html
|
|
22
|
+
- Sitemap FAQ: https://www.sitemaps.org/faq.html
|
|
23
|
+
|
|
24
|
+
## Next.js
|
|
25
|
+
|
|
26
|
+
- ISR guide: https://nextjs.org/docs/app/guides/incremental-static-regeneration
|
|
27
|
+
- `generateStaticParams`: https://nextjs.org/docs/app/api-reference/functions/generate-static-params
|
|
28
|
+
- `generateSitemaps`: https://nextjs.org/docs/app/api-reference/functions/generate-sitemaps
|
|
29
|
+
- Metadata and `generateMetadata`: https://nextjs.org/docs/app/api-reference/functions/generate-metadata
|
|
30
|
+
- Robots file conventions: https://nextjs.org/docs/app/api-reference/file-conventions/metadata/robots
|
|
31
|
+
- Sitemap file conventions: https://nextjs.org/docs/app/api-reference/file-conventions/metadata/sitemap
|
|
32
|
+
- Static exports: https://nextjs.org/docs/app/guides/static-exports
|
|
33
|
+
- Caching: https://nextjs.org/docs/app/getting-started/caching
|
|
34
|
+
- Static generation timeout: https://nextjs.org/docs/messages/static-page-generation-timeout
|
|
35
|
+
|
|
36
|
+
## Research And Industry Context
|
|
37
|
+
|
|
38
|
+
- Generative Engine Optimization paper: https://arxiv.org/pdf/2311.09735
|
|
39
|
+
- Ahrefs programmatic SEO guide: https://ahrefs.com/blog/programmatic-seo/
|
|
40
|
+
- Semrush programmatic SEO guide: https://www.semrush.com/blog/programmatic-seo/
|
|
41
|
+
|
|
42
|
+
Treat third-party SEO research as directional evidence, not policy. Prefer Google docs for policy and official framework docs for implementation constraints.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# PSEO Validation
|
|
2
|
+
|
|
3
|
+
## Pre-Launch Checklist
|
|
4
|
+
|
|
5
|
+
For each page family, verify:
|
|
6
|
+
|
|
7
|
+
- A clear target user, intent, and unique value source exists.
|
|
8
|
+
- Each indexable URL has a canonical path and status.
|
|
9
|
+
- Titles, H1s, descriptions, canonicals, robots directives, and schema are generated centrally.
|
|
10
|
+
- Content has page-specific data, not mostly boilerplate.
|
|
11
|
+
- Near-duplicate and cannibalization checks pass.
|
|
12
|
+
- Doorway-page risk has been explicitly reviewed.
|
|
13
|
+
- Internal links use crawlable anchors and canonical URLs.
|
|
14
|
+
- Breadcrumbs reflect the site hierarchy.
|
|
15
|
+
- Sitemaps include only canonical, indexable, 200-status URLs.
|
|
16
|
+
- Facets/search params cannot create infinite crawl spaces.
|
|
17
|
+
- Representative pages pass rendered metadata and schema inspection.
|
|
18
|
+
|
|
19
|
+
## Codebase Audit Checklist
|
|
20
|
+
|
|
21
|
+
Inspect:
|
|
22
|
+
|
|
23
|
+
- Route structure and dynamic SEO routes.
|
|
24
|
+
- URL source of truth: database, CMS, files, generated JSON, or ad hoc slug code.
|
|
25
|
+
- Page statuses and canonical/redirect/noindex handling.
|
|
26
|
+
- Metadata generation and duplicate title/description risks.
|
|
27
|
+
- JSON-LD generation and whether schema content is visible.
|
|
28
|
+
- Sitemap generation, shard size, absolute URLs, and `lastmod`.
|
|
29
|
+
- `robots.txt` sitemap references and disallow rules.
|
|
30
|
+
- Internal links to non-indexable, redirected, duplicate, or parameterized URLs.
|
|
31
|
+
- Build logs: static path counts, timeouts, remote API fanout, and preview behavior.
|
|
32
|
+
- Search/filter/facet/pagination URL behavior.
|
|
33
|
+
|
|
34
|
+
## Mechanical Tests
|
|
35
|
+
|
|
36
|
+
Add or run tests where practical:
|
|
37
|
+
|
|
38
|
+
- URL registry returns only valid indexable URLs for sitemap queries.
|
|
39
|
+
- Sitemap shards stay below URL and size limits.
|
|
40
|
+
- Non-indexable, deleted, duplicate, and redirected pages never appear in sitemaps.
|
|
41
|
+
- Metadata builder emits absolute canonicals.
|
|
42
|
+
- Schema builder omits invisible or unavailable content.
|
|
43
|
+
- Duplicate pages resolve through canonical/noindex/redirect behavior.
|
|
44
|
+
- Empty or invalid facet combinations return `404`.
|
|
45
|
+
- Preview builds do not pre-render the full long-tail inventory.
|
|
46
|
+
|
|
47
|
+
## Manual Spot Checks
|
|
48
|
+
|
|
49
|
+
Sample pages from every template and priority bucket:
|
|
50
|
+
|
|
51
|
+
- View rendered HTML head for title, description, canonical, robots, OG, Twitter, and JSON-LD.
|
|
52
|
+
- Confirm the canonical URL returns the same page and is linked internally.
|
|
53
|
+
- Confirm the page answers its primary intent without forcing a click to another page.
|
|
54
|
+
- Confirm visible page-specific facts match structured data.
|
|
55
|
+
- Confirm related links are useful and not just exact-match SEO blocks.
|
|
56
|
+
|
|
57
|
+
## Launch Strategy
|
|
58
|
+
|
|
59
|
+
- Publish in batches by page family or priority bucket.
|
|
60
|
+
- Submit sitemap indexes after the first batch is live.
|
|
61
|
+
- Watch Search Console for indexed count, discovered-not-indexed, crawled-not-indexed, duplicate without user-selected canonical, and unexpected canonical selection.
|
|
62
|
+
- Compare query-to-URL mapping after each batch.
|
|
63
|
+
- Delay the next batch if Google is ignoring, canonicalizing away, or clustering pages unexpectedly.
|
|
64
|
+
|
|
65
|
+
## Monitoring
|
|
66
|
+
|
|
67
|
+
Track:
|
|
68
|
+
|
|
69
|
+
- Indexed / submitted ratio by sitemap shard.
|
|
70
|
+
- Crawl stats and server errors.
|
|
71
|
+
- Unexpected canonical selection.
|
|
72
|
+
- Query cannibalization.
|
|
73
|
+
- Organic clicks, qualified conversions, and engagement by page family.
|
|
74
|
+
- Stale data and `lastmod` accuracy.
|
|
75
|
+
- Pages with impressions but poor CTR.
|
|
76
|
+
- Pages with no impressions after the review window.
|
|
77
|
+
|
|
78
|
+
Use monitoring to prune. More pages are not better if they dilute crawl demand, create cannibalization, or fail to serve users.
|
package/src/channels/slack.ts
CHANGED
|
@@ -430,10 +430,20 @@ class SlackChannel implements Channel {
|
|
|
430
430
|
);
|
|
431
431
|
|
|
432
432
|
const reply = result.trim();
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
433
|
+
const cleaned = cleanSentinel(reply);
|
|
434
|
+
|
|
435
|
+
// [NO_REPLY] anywhere in the reply suppresses the send. If it appeared
|
|
436
|
+
// alongside real content the model got confused — warn so we can spot it.
|
|
437
|
+
if (!reply || cleaned.includes("[NO_REPLY]")) {
|
|
438
|
+
const exact = !reply || cleaned === "[NO_REPLY]";
|
|
439
|
+
if (exact) {
|
|
440
|
+
log.info({ channel: msg.channel, key }, "slack: agent chose not to reply");
|
|
441
|
+
} else {
|
|
442
|
+
log.warn(
|
|
443
|
+
{ channel: msg.channel, key, reply },
|
|
444
|
+
"slack: [NO_REPLY] sentinel mixed with content; suppressing send",
|
|
445
|
+
);
|
|
446
|
+
}
|
|
437
447
|
if (messageId) await Message.updateDeliveryStatus(messageId, "sent").catch(() => {});
|
|
438
448
|
return;
|
|
439
449
|
}
|
package/src/core/runner.ts
CHANGED
|
@@ -56,10 +56,24 @@ async function runJobWithCodex(fullPrompt: string, cwd: string, model: string):
|
|
|
56
56
|
args.splice(3, 0, "-m", model);
|
|
57
57
|
}
|
|
58
58
|
|
|
59
|
+
const CODEX_EXCLUDED = new Set([
|
|
60
|
+
"ANTHROPIC_API_KEY",
|
|
61
|
+
"OPENAI_API_KEY",
|
|
62
|
+
"GEMINI_API_KEY",
|
|
63
|
+
"SLACK_BOT_TOKEN",
|
|
64
|
+
"SLACK_APP_TOKEN",
|
|
65
|
+
"TELEGRAM_BOT_TOKEN",
|
|
66
|
+
"TWILIO_AUTH_TOKEN",
|
|
67
|
+
"DATABASE_URL",
|
|
68
|
+
]);
|
|
69
|
+
const codexEnv = Object.fromEntries(
|
|
70
|
+
Object.entries(process.env).filter(([k]) => !CODEX_EXCLUDED.has(k))
|
|
71
|
+
);
|
|
72
|
+
|
|
59
73
|
const proc = Bun.spawn(args, {
|
|
60
74
|
stdout: "pipe",
|
|
61
75
|
stderr: "pipe",
|
|
62
|
-
env:
|
|
76
|
+
env: codexEnv,
|
|
63
77
|
});
|
|
64
78
|
|
|
65
79
|
const stdout = await new Response(proc.stdout).text();
|