hubspot-cms-sync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +52 -0
- package/bin/hubspot-cms-sync.mjs +115 -0
- package/docs/CONFIGURATION.md +83 -0
- package/docs/GITHUB_ACTIONS.md +70 -0
- package/docs/MIGRATION_PLAN.md +361 -0
- package/docs/PLAN_REVIEW.md +42 -0
- package/docs/SKILL_DISTRIBUTION.md +79 -0
- package/examples/github-actions/ci.yml +56 -0
- package/examples/github-actions/preview.yml +71 -0
- package/examples/github-actions/publish.yml +82 -0
- package/examples/hubspot-cms-sync.config.mjs +45 -0
- package/examples/site.manifest.json +19 -0
- package/package.json +41 -0
- package/skill/SKILL.md +54 -0
- package/skill/references/commands.md +54 -0
- package/skill/references/config.md +25 -0
- package/skill/references/failures.md +58 -0
- package/skill/references/github-actions.md +56 -0
- package/skill/references/screenshots-and-fidelity.md +33 -0
- package/src/adapters/assets.mjs +576 -0
- package/src/adapters/blog.mjs +921 -0
- package/src/adapters/content.mjs +213 -0
- package/src/adapters/forms.mjs +569 -0
- package/src/adapters/pages.mjs +463 -0
- package/src/adapters/theme.mjs +503 -0
- package/src/config.mjs +113 -0
- package/src/corpus-scan.mjs +248 -0
- package/src/cta-inventory.mjs +352 -0
- package/src/index.mjs +3 -0
- package/src/lib/canonical.mjs +234 -0
- package/src/lib/hub.mjs +197 -0
- package/src/lib/orchestrate.mjs +141 -0
- package/src/lib/refs.mjs +398 -0
- package/src/lib/sync-state.mjs +86 -0
- package/src/manifest.mjs +353 -0
- package/src/preflight.mjs +385 -0
- package/src/pull.mjs +99 -0
- package/src/push.mjs +354 -0
- package/src/republish.mjs +102 -0
|
@@ -0,0 +1,921 @@
|
|
|
1
|
+
// sync/adapters/blog.mjs — blog adapter for the bidirectional sync framework.
|
|
2
|
+
//
|
|
3
|
+
// Refactors sync/blog-sync.mjs into the adapter interface (pull/push), wired to
|
|
4
|
+
// the Stage 1 foundation (sync/lib/hub.mjs, canonical.mjs, refs.mjs). It owns the
|
|
5
|
+
// blog container, authors, tags and posts of ONE account.
|
|
6
|
+
//
|
|
7
|
+
// What changed vs sync/blog-sync.mjs (codex findings #5/#6/#7):
|
|
8
|
+
//
|
|
9
|
+
// #6 Identity by blogSlug, NOT blogName / blogs[0]. Pull records each post's
|
|
10
|
+
// blogSlug, authorSlug and tagSlugs (slug-keyed, portable). Push selects the
|
|
11
|
+
// EXACT container by slug via hub.resolveBlogBySlug (which matches by slug,
|
|
12
|
+
// never objects[0]), and FAILS on an ambiguous / missing container — the stale
|
|
13
|
+
// "Old" blog (slug blog-old-pages) can never win.
|
|
14
|
+
//
|
|
15
|
+
// #5 URL-rewrite query-string bug. The old rewriteUrls() replaced the bare URL
|
|
16
|
+
// first, so `orig?width=...` lost its prefix and the query-variant regex no
|
|
17
|
+
// longer matched. Here canonicalize-on-pull (rawUrlToToken) rewrites the
|
|
18
|
+
// `orig?query` form BEFORE the bare `orig`, so query-string variants collapse
|
|
19
|
+
// onto the same @asset token. Unit-tested.
|
|
20
|
+
//
|
|
21
|
+
// #7 Two-phase publish-date. Scheduling a post requires a FUTURE publishDate, but
|
|
22
|
+
// that clobbers the real 2017–2026 date. So push (when publishing) schedules a
|
|
23
|
+
// near-future date, polls the LIVE post until it goes PUBLISHED, then PATCHes
|
|
24
|
+
// the ORIGINAL publishDate back to preserve chronology. publishPost() is the
|
|
25
|
+
// pure-ish driver; the schedule/poll/patch hub calls are injected so it is
|
|
26
|
+
// unit-testable without the network.
|
|
27
|
+
//
|
|
28
|
+
// Assets. Post bodies embed hosted image URLs on legacy hosts (cdn2.hubspot.net,
|
|
29
|
+
// *.hubspotusercontent*, googleusercontent, theseventhsense.com). Pull rewrites
|
|
30
|
+
// each KNOWN asset URL (from content/blog/assets/manifest.json: originalUrl ->
|
|
31
|
+
// localFile) into a logical @asset:<localFile> token and registers the asset key
|
|
32
|
+
// in the refs registry. Push re-hosts each asset to the TARGET File Manager and
|
|
33
|
+
// resolves @asset tokens to the target's hosted URLs (refs.resolve hard-fails on
|
|
34
|
+
// any unmapped asset). Canonical post JSON therefore NEVER carries a hosted URL.
|
|
35
|
+
//
|
|
36
|
+
// Adapter contract:
|
|
37
|
+
// pull(acct, { contentDir, registry }) -> { pulled, notes }
|
|
38
|
+
// push(acct, { contentDir, registry }) -> { pushed, notes }
|
|
39
|
+
// PRODUCTION 529456 is READ-ONLY; this adapter never hardcodes a portal — the
|
|
40
|
+
// orchestrator passes `acct`, and push writes only to whatever acct it is given.
|
|
41
|
+
|
|
42
|
+
import {
|
|
43
|
+
readFileSync,
|
|
44
|
+
writeFileSync,
|
|
45
|
+
mkdirSync,
|
|
46
|
+
readdirSync,
|
|
47
|
+
existsSync,
|
|
48
|
+
} from 'node:fs';
|
|
49
|
+
import { join, resolve as resolvePath, basename, extname } from 'node:path';
|
|
50
|
+
import { createHash } from 'node:crypto';
|
|
51
|
+
|
|
52
|
+
import { hub, getAll } from '../lib/hub.mjs';
|
|
53
|
+
import { stableStringify } from '../lib/canonical.mjs';
|
|
54
|
+
import { resolve as resolveRefs, canonicalize as canonicalizeRefs } from '../lib/refs.mjs';
|
|
55
|
+
import { resolveCtaEmbeds, loadInventory } from '../cta-inventory.mjs';
|
|
56
|
+
|
|
57
|
+
const API = 'https://api.hubapi.com';
|
|
58
|
+
|
|
59
|
+
export const name = 'blog';
|
|
60
|
+
// Blog posts embed asset refs; on push we re-host assets ourselves and populate
|
|
61
|
+
// the registry's asset map, so we do not depend on a separate assets adapter for
|
|
62
|
+
// blog imagery. Forms/CTAs are not referenced by blog posts in this corpus.
|
|
63
|
+
export const dependsOn = ['assets'];
|
|
64
|
+
|
|
65
|
+
// ── layout ──────────────────────────────────────────────────────────────────
|
|
66
|
+
// contentDir is the repo root content dir; the blog lives under content/blog.
|
|
67
|
+
const BLOG_SUBDIR = 'blog';
|
|
68
|
+
const POSTS_SUBDIR = 'posts';
|
|
69
|
+
const ASSETS_SUBDIR = 'assets';
|
|
70
|
+
const CONTAINER_FILE = 'container.json';
|
|
71
|
+
|
|
72
|
+
function blogDir(contentDir) {
|
|
73
|
+
return join(resolvePath(contentDir), BLOG_SUBDIR);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// ── asset URL helpers ─────────────────────────────────────────────────────────
|
|
77
|
+
|
|
78
|
+
// Hosts that carry per-account / legacy imagery we want to canonicalize away.
|
|
79
|
+
const IMG_HOST = /(hubfs|hubspotusercontent|cdn\d*\.hubspot\.net|theseventhsense\.com|googleusercontent)/i;
|
|
80
|
+
|
|
81
|
+
// Deterministic local filename for an original asset URL (mirrors blog-sync.mjs so
|
|
82
|
+
// an existing assets/manifest.json keeps working). Same input -> same name.
|
|
83
|
+
export function localAssetName(url) {
|
|
84
|
+
const h = createHash('sha1').update(url).digest('hex').slice(0, 10);
|
|
85
|
+
let base = decodeURIComponent(basename(String(url).split('?')[0]))
|
|
86
|
+
.replace(/[^\w.\-]/g, '_')
|
|
87
|
+
.slice(-60);
|
|
88
|
+
if (!extname(base)) base += '.img';
|
|
89
|
+
return `${h}-${base}`;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Escape a string for safe embedding in a RegExp.
|
|
93
|
+
function reEscape(s) {
|
|
94
|
+
return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* rawUrlToToken(text, assetMap) -> text with raw asset URLs replaced by tokens.
|
|
99
|
+
*
|
|
100
|
+
* assetMap: { originalUrl -> localFileName }. For each entry we replace BOTH the
|
|
101
|
+
* query-string variant (`orig?...`) AND the bare `orig` with `@asset:<localFile>`.
|
|
102
|
+
*
|
|
103
|
+
* THE FIX (codex #5): the `orig?query` form is rewritten FIRST. The old code did
|
|
104
|
+
* the bare replace first, which turned `orig?width=80` into `token?width=80` and
|
|
105
|
+
* left a dangling query the later regex could no longer match. Order matters; this
|
|
106
|
+
* is the function the unit test pins.
|
|
107
|
+
*
|
|
108
|
+
* Pure: no I/O. Returns the input unchanged when text is empty/non-string.
|
|
109
|
+
*/
|
|
110
|
+
export function rawUrlToToken(text, assetMap) {
|
|
111
|
+
if (typeof text !== 'string' || text.length === 0) return text;
|
|
112
|
+
let out = text;
|
|
113
|
+
for (const [orig, file] of Object.entries(assetMap || {})) {
|
|
114
|
+
if (!orig || !file) continue;
|
|
115
|
+
const token = `@asset:${file}`;
|
|
116
|
+
// 1. query-string variant FIRST: orig?<query> (up to a quote/space/paren).
|
|
117
|
+
out = out.replace(new RegExp(reEscape(orig) + `\\?[^"'\\s)]*`, 'g'), token);
|
|
118
|
+
// 2. then the bare URL.
|
|
119
|
+
out = out.split(orig).join(token);
|
|
120
|
+
}
|
|
121
|
+
return out;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* canonicalizeField(text, assetMap, registry) -> portable text.
|
|
126
|
+
*
|
|
127
|
+
* THE FIX (codex #4 — data loss / de-portability on a re-hosted pull):
|
|
128
|
+
*
|
|
129
|
+
* rawUrlToToken only knows the ORIGINAL (prod) URLs recorded in the blog asset
|
|
130
|
+
* manifest. When we pull from a RE-HOSTED account (e.g. the dev sandbox), the post
|
|
131
|
+
* body carries that account's OWN hosted URLs — which are NOT manifest keys — so the
|
|
132
|
+
* manifest pass leaves them LITERAL, and a literal per-account URL would get committed
|
|
133
|
+
* to git. That is the de-portability bug.
|
|
134
|
+
*
|
|
135
|
+
* So after the manifest pass we fold ANY remaining hosted image/CTA/form/menu/portal
|
|
136
|
+
* ref through refs.canonicalize. That collapses every `…/hubfs/<portal>/<tail>` URL
|
|
137
|
+
* into a portable `@asset:<tail>` token (host + portal discarded), and tokenizes any
|
|
138
|
+
* lingering CTA/form GUID or bare portal id. After this, NO literal per-account hosted
|
|
139
|
+
* URL or GUID can ever land in the committed canonical content.
|
|
140
|
+
*
|
|
141
|
+
* Idempotent: refs.canonicalize matches raw URLs/GUIDs only; the `@asset:` / `@cta:` /
|
|
142
|
+
* `@portal` tokens it (and rawUrlToToken) emit are inert to it, so re-canonicalizing
|
|
143
|
+
* already-canonical content is a no-op (it does not clobber existing tokens).
|
|
144
|
+
*
|
|
145
|
+
* Note the two token *flavours* both round-trip cleanly:
|
|
146
|
+
* • `@asset:<manifestLocalFile>` (from rawUrlToToken) — rehosted by THIS adapter.
|
|
147
|
+
* • `@asset:<hubfsPathTail>` (from refs.canonicalize) — rehosted by the assets
|
|
148
|
+
* adapter (blog dependsOn ['assets']), which scans content/blog/** for exactly
|
|
149
|
+
* these tokens. Either way push's resolveRefs hard-fails on an unmapped token.
|
|
150
|
+
*/
|
|
151
|
+
export function canonicalizeField(text, assetMap, registry, ctaCtx) {
|
|
152
|
+
if (typeof text !== 'string' || text.length === 0) return text;
|
|
153
|
+
// CTAs FIRST (before asset/ref canonicalize): each legacy CTA embed carries a
|
|
154
|
+
// per-account portal id + a CTA GUID + a cta/redirect & no-cache.hubspot.com image
|
|
155
|
+
// URL. Resolving the whole embed to a portable styled <a href> here means NO @cta
|
|
156
|
+
// token (no producer adapter, codex #3/#5) and NO per-account guid survives to be
|
|
157
|
+
// mis-tokenized by canonicalizeRefs. Unknown / still-tracked CTAs are PRESERVED raw
|
|
158
|
+
// and surfaced loudly via ctaCtx (never silently dropped).
|
|
159
|
+
let s = text;
|
|
160
|
+
if (ctaCtx && ctaCtx.inventory) {
|
|
161
|
+
const r = resolveCtaEmbeds(s, ctaCtx.inventory);
|
|
162
|
+
if (r.text !== s) ctaCtx.resolved = (ctaCtx.resolved || 0) + 1;
|
|
163
|
+
s = r.text;
|
|
164
|
+
for (const g of r.unresolved) ctaCtx.unresolved.add(g);
|
|
165
|
+
for (const n of r.notes) ctaCtx.notes.add(n);
|
|
166
|
+
}
|
|
167
|
+
s = rawUrlToToken(s, assetMap);
|
|
168
|
+
if (!registry) return s;
|
|
169
|
+
// codex #5 (extended to re-hosted URLs): strip the ?<query> off any hosted hubfs
|
|
170
|
+
// image URL BEFORE canonicalize folds it to @asset:<tail>. The refs.hubfsUrl regex
|
|
171
|
+
// captures up to a quote/space/paren, so `…/img.png?width=80` would otherwise become
|
|
172
|
+
// a SEPARATE `@asset:img.png?width=80` token (mismatching the bare `@asset:img.png`,
|
|
173
|
+
// and registering a path with no committed bytes). Collapsing the query first makes
|
|
174
|
+
// every width-variant land on the SAME @asset token — same guarantee the manifest
|
|
175
|
+
// pass already gives for known URLs.
|
|
176
|
+
s = stripHubfsQuery(s);
|
|
177
|
+
// Fold any per-account ref the manifest didn't know about into logical tokens.
|
|
178
|
+
return canonicalizeRefs(s, registry);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Drop the `?<query>` from any hosted `…/hubfs/<portal>/<tail>?<query>` URL so the
|
|
182
|
+
// query-variant and the bare URL canonicalize onto the SAME @asset token. Matches the
|
|
183
|
+
// same host/portal shape refs.hubfsUrl uses; leaves non-hubfs URLs (and their queries)
|
|
184
|
+
// untouched.
|
|
185
|
+
const HUBFS_WITH_QUERY = /(https?:\/\/[a-z0-9.-]+\/hubfs\/\d{5,}\/[^"'\\\s)?]+)\?[^"'\s)]*/gi;
|
|
186
|
+
export function stripHubfsQuery(text) {
|
|
187
|
+
if (typeof text !== 'string' || text.length === 0) return text;
|
|
188
|
+
return text.replace(HUBFS_WITH_QUERY, '$1');
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// ── container ──────────────────────────────────────────────────────────────────
|
|
192
|
+
|
|
193
|
+
// Legacy v2 endpoint lists blog containers (content groups) with id + slug.
|
|
194
|
+
async function listBlogs(acct) {
|
|
195
|
+
const { ok, status, json } = await hub(acct, 'GET', '/content/api/v2/blogs?limit=100');
|
|
196
|
+
if (!ok) {
|
|
197
|
+
const msg = json?.message || json?.category || JSON.stringify(json).slice(0, 200);
|
|
198
|
+
throw new Error(`GET /content/api/v2/blogs -> ${status}: ${msg}`);
|
|
199
|
+
}
|
|
200
|
+
return (json.objects || []).map((b) => ({
|
|
201
|
+
id: String(b.id),
|
|
202
|
+
name: b.name,
|
|
203
|
+
slug: b.slug,
|
|
204
|
+
url: b.absolute_url,
|
|
205
|
+
itemTemplatePath: b.item_template_path,
|
|
206
|
+
listingTemplatePath: b.listing_template_path,
|
|
207
|
+
listingPageId: b.listing_page_id,
|
|
208
|
+
}));
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ── PULL ─────────────────────────────────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* pull(acct, { contentDir, registry }) -> { pulled, notes }
|
|
215
|
+
*
|
|
216
|
+
* Reads the account's blog (container + authors + tags + posts) and writes
|
|
217
|
+
* canonical, slug-keyed files under <contentDir>/blog. Refs in post bodies are
|
|
218
|
+
* logical-ized: asset URLs become @asset tokens (registered into `registry`), and
|
|
219
|
+
* any other per-account ref (CTA/form/portal) is folded by refs.canonicalize via
|
|
220
|
+
* the orchestrator's normal pass — here we own the asset rewrite because the asset
|
|
221
|
+
* key is the blog manifest's local filename, which refs.mjs cannot know.
|
|
222
|
+
*
|
|
223
|
+
* Container identity is its SLUG. We pull EVERY non-"Old" container's posts, but
|
|
224
|
+
* store each post keyed by slug with its blogSlug recorded so push can target the
|
|
225
|
+
* exact container.
|
|
226
|
+
*/
|
|
227
|
+
export async function pull(acct, { contentDir, registry }) {
|
|
228
|
+
const notes = [];
|
|
229
|
+
const dir = blogDir(contentDir);
|
|
230
|
+
const postsOut = join(dir, POSTS_SUBDIR);
|
|
231
|
+
mkdirSync(postsOut, { recursive: true });
|
|
232
|
+
|
|
233
|
+
const blogs = await listBlogs(acct);
|
|
234
|
+
// Ignore the stale "Old" blog (codex #6): its slug is blog-old-pages and its
|
|
235
|
+
// name carries an "| Old" marker. We never migrate it.
|
|
236
|
+
const liveBlogs = blogs.filter(
|
|
237
|
+
(b) => b.slug !== 'blog-old-pages' && !/\|\s*old\b/i.test(b.name || ''),
|
|
238
|
+
);
|
|
239
|
+
if (liveBlogs.length === 0) {
|
|
240
|
+
throw new Error(`No live blog container found for portal ${acct.portalId} (all containers look stale/Old).`);
|
|
241
|
+
}
|
|
242
|
+
const blogBySlug = new Map(liveBlogs.map((b) => [b.slug, b]));
|
|
243
|
+
const blogById = new Map(blogs.map((b) => [String(b.id), b]));
|
|
244
|
+
|
|
245
|
+
const authors = await getAll(acct, '/cms/v3/blogs/authors');
|
|
246
|
+
const tags = await getAll(acct, '/cms/v3/blogs/tags');
|
|
247
|
+
const posts = await getAll(acct, '/cms/v3/blogs/posts');
|
|
248
|
+
|
|
249
|
+
const authorById = new Map(authors.map((a) => [String(a.id), a]));
|
|
250
|
+
const tagById = new Map(tags.map((t) => [String(t.id), t]));
|
|
251
|
+
|
|
252
|
+
// Container config (one canonical file per live container, keyed by slug).
|
|
253
|
+
for (const b of liveBlogs) {
|
|
254
|
+
const container = {
|
|
255
|
+
slug: b.slug,
|
|
256
|
+
name: b.name,
|
|
257
|
+
itemTemplatePath: b.itemTemplatePath || '',
|
|
258
|
+
listingTemplatePath: b.listingTemplatePath || '',
|
|
259
|
+
// Canonicalize the listing-page override to 0 so diffs stay clean and push
|
|
260
|
+
// re-clears it (SYNC-NOTES §4: a non-zero listingPageId masks the template).
|
|
261
|
+
listingPageId: 0,
|
|
262
|
+
};
|
|
263
|
+
writeFileSync(
|
|
264
|
+
join(dir, containerFileFor(b.slug)),
|
|
265
|
+
stableStringify(container),
|
|
266
|
+
);
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Build the asset map (originalUrl -> localFile) from the committed manifest, so
|
|
270
|
+
// the same physical bytes resolve to the same @asset key across accounts.
|
|
271
|
+
const assetMap = loadAssetManifest(dir);
|
|
272
|
+
// CTA inventory (guid -> { destinationHref, name, tracked }) for the SOURCE portal.
|
|
273
|
+
// Built one-time, READ-ONLY, by `node sync/cta-inventory.mjs <account>`. Each blog
|
|
274
|
+
// CTA embed is resolved to a portable styled <a href> so the committed body carries
|
|
275
|
+
// NO @cta token and NO per-account guid. Unknown / still-tracked CTAs are preserved
|
|
276
|
+
// raw and surfaced loudly below (codex #3/#5).
|
|
277
|
+
const ctaCtx = { inventory: loadInventory(acct.portalId), unresolved: new Set(), notes: new Set(), resolved: 0 };
|
|
278
|
+
// Register every known asset key into the registry so push can demand a mapping.
|
|
279
|
+
for (const file of Object.values(assetMap)) {
|
|
280
|
+
registry.assets[file] = registry.assets[file] ?? true;
|
|
281
|
+
}
|
|
282
|
+
delete registry.__rev_assets;
|
|
283
|
+
|
|
284
|
+
let pulled = 0;
|
|
285
|
+
for (const p of posts) {
|
|
286
|
+
const container = blogById.get(String(p.contentGroupId));
|
|
287
|
+
// Skip posts that belong to the stale/old container or to a container we are
|
|
288
|
+
// not migrating.
|
|
289
|
+
if (!container || !blogBySlug.has(container.slug)) continue;
|
|
290
|
+
if (!p.slug) continue;
|
|
291
|
+
|
|
292
|
+
const author = authorById.get(String(p.blogAuthorId));
|
|
293
|
+
const portable = {
|
|
294
|
+
slug: p.slug,
|
|
295
|
+
blogSlug: container.slug,
|
|
296
|
+
name: p.name,
|
|
297
|
+
htmlTitle: p.htmlTitle || p.name,
|
|
298
|
+
state: p.state,
|
|
299
|
+
authorSlug: author?.slug || slugifyName(author?.displayName || author?.fullName) || null,
|
|
300
|
+
authorName: author?.displayName || author?.fullName || null,
|
|
301
|
+
tagSlugs: (p.tagIds || [])
|
|
302
|
+
.map((id) => tagById.get(String(id)))
|
|
303
|
+
.filter(Boolean)
|
|
304
|
+
.map((t) => t.slug || slugifyName(t.name))
|
|
305
|
+
.filter(Boolean)
|
|
306
|
+
.sort(),
|
|
307
|
+
tagNames: (p.tagIds || [])
|
|
308
|
+
.map((id) => tagById.get(String(id))?.name)
|
|
309
|
+
.filter(Boolean),
|
|
310
|
+
metaDescription: p.metaDescription || '',
|
|
311
|
+
featuredImage: canonicalizeField(canonUrl(p.featuredImage), assetMap, registry, ctaCtx),
|
|
312
|
+
featuredImageAltText: p.featuredImageAltText || '',
|
|
313
|
+
useFeaturedImage: p.useFeaturedImage ?? false,
|
|
314
|
+
postBody: canonicalizeField(p.postBody || '', assetMap, registry, ctaCtx),
|
|
315
|
+
postSummary: canonicalizeField(p.postSummary || '', assetMap, registry, ctaCtx),
|
|
316
|
+
// publishDate is preserved verbatim — it IS the canonical chronology source
|
|
317
|
+
// (codex #7). It is content here, not a volatile timestamp to strip.
|
|
318
|
+
publishDate: p.publishDate || null,
|
|
319
|
+
};
|
|
320
|
+
writeFileSync(
|
|
321
|
+
join(postsOut, `${postFileFor(p.slug)}.json`),
|
|
322
|
+
stableStringify(portable),
|
|
323
|
+
);
|
|
324
|
+
pulled++;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Surface every CTA we could NOT resolve to a portable link — loud, never silent.
|
|
328
|
+
for (const n of ctaCtx.notes) notes.push(n);
|
|
329
|
+
if (ctaCtx.unresolved.size > 0) {
|
|
330
|
+
notes.push(
|
|
331
|
+
`⚠ ${ctaCtx.unresolved.size} CTA(s) preserved as raw embed HTML (unknown / still-tracked): ` +
|
|
332
|
+
`${[...ctaCtx.unresolved].sort().join(', ')}. Run \`node sync/cta-inventory.mjs ${acct.name}\` ` +
|
|
333
|
+
`to resolve them before pushing (the push preflight will fail-closed on any surviving @cta token).`,
|
|
334
|
+
);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
notes.push(
|
|
338
|
+
`containers: ${liveBlogs.length} | authors: ${authors.length} | tags: ${tags.length} | posts: ${pulled} | CTA blocks resolved: ${ctaCtx.resolved || 0} | CTAs preserved: ${ctaCtx.unresolved.size}`,
|
|
339
|
+
);
|
|
340
|
+
return { pulled, notes };
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Featured-image URLs carry ?width= variants; strip the query so it matches the
|
|
344
|
+
// manifest key (which is stored bare).
|
|
345
|
+
function canonUrl(u) {
|
|
346
|
+
if (!u) return u || '';
|
|
347
|
+
return String(u);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// ── PUSH ─────────────────────────────────────────────────────────────────────
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* push(acct, { contentDir, registry }) -> { pushed, notes }
|
|
354
|
+
*
|
|
355
|
+
* Reads canonical post files, resolves their @asset (and any other logical) refs
|
|
356
|
+
* to THIS account's ids/URLs, and creates-or-updates each post by slug. Asset
|
|
357
|
+
* tokens are resolved by re-hosting every referenced asset to the target File
|
|
358
|
+
* Manager and recording the hosted URL in the registry's asset map; refs.resolve
|
|
359
|
+
* then swaps tokens for URLs and HARD-FAILS on any unmapped asset.
|
|
360
|
+
*
|
|
361
|
+
* Container is selected by slug (resolveBlogBySlug — never objects[0]); a missing
|
|
362
|
+
* container throws the UI-gated "create the blog first" instruction.
|
|
363
|
+
*
|
|
364
|
+
* publish: when opts.publish is set, each post goes through the two-phase publish
|
|
365
|
+
* (schedule future -> poll live -> PATCH original publishDate). Defaults to draft
|
|
366
|
+
* (the orchestrator drives publishing separately in most flows).
|
|
367
|
+
*/
|
|
368
|
+
export async function push(
|
|
369
|
+
acct,
|
|
370
|
+
{
|
|
371
|
+
contentDir,
|
|
372
|
+
registry,
|
|
373
|
+
publish = false,
|
|
374
|
+
limit,
|
|
375
|
+
dryRun = false,
|
|
376
|
+
hubFn = hub,
|
|
377
|
+
// Injectable clock + sleep so the "wait past every scheduled publish" gate
|
|
378
|
+
// (codex #7 final-pass fix) is unit-testable WITHOUT actually waiting ~90s.
|
|
379
|
+
now = () => Date.now(),
|
|
380
|
+
sleep = defaultSleep,
|
|
381
|
+
} = {},
|
|
382
|
+
) {
|
|
383
|
+
const notes = [];
|
|
384
|
+
const dir = blogDir(contentDir);
|
|
385
|
+
const postsDir = join(dir, POSTS_SUBDIR);
|
|
386
|
+
if (!existsSync(postsDir)) {
|
|
387
|
+
throw new Error(`No posts at ${postsDir} — run blog.pull first.`);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Assets are now uploaded by the `assets` adapter (blog dependsOn ['assets'],
|
|
391
|
+
// so it runs first on push), which re-hosts every @asset under content/assets/
|
|
392
|
+
// to the target File Manager and records the URLs in registry.assets — refs.resolve
|
|
393
|
+
// then replaces @asset tokens below. (The old blog-local rehostAssets path is
|
|
394
|
+
// retired: one upload location, no /blog-migrated vs /synced-assets split.)
|
|
395
|
+
|
|
396
|
+
let files = readdirSync(postsDir).filter((f) => f.endsWith('.json'));
|
|
397
|
+
files.sort();
|
|
398
|
+
if (limit) files = files.slice(0, limit);
|
|
399
|
+
|
|
400
|
+
// Group posts by their blogSlug and resolve each container exactly once.
|
|
401
|
+
const posts = files.map((f) => JSON.parse(readFileSync(join(postsDir, f), 'utf8')));
|
|
402
|
+
const containerCache = new Map();
|
|
403
|
+
async function containerIdFor(blogSlug) {
|
|
404
|
+
if (containerCache.has(blogSlug)) return containerCache.get(blogSlug);
|
|
405
|
+
const blog = await resolveBlogObjBySlugVia(hubFn, acct, blogSlug);
|
|
406
|
+
if (!blog) {
|
|
407
|
+
throw new Error(
|
|
408
|
+
`No blog container with slug "${blogSlug}" on portal ${acct.portalId}. ` +
|
|
409
|
+
`Creating a blog is UI-gated — create it once in Settings → Website → Blog ` +
|
|
410
|
+
`(SYNC-NOTES §4), then re-run.`,
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
const id = String(blog.id);
|
|
414
|
+
containerCache.set(blogSlug, id);
|
|
415
|
+
// BLOG THEME (user: "make sure the blog theme gets set correctly"): set the
|
|
416
|
+
// item/listing template paths from the committed container.json and clear the
|
|
417
|
+
// listing_page_id override (a non-zero one masks listing_template_path —
|
|
418
|
+
// SYNC-NOTES §4). Re-PUT busts the edge cache; idempotent (same values = skip).
|
|
419
|
+
if (!dryRun) {
|
|
420
|
+
const note = await applyContainerConfig(hubFn, acct, dir, blogSlug, blog);
|
|
421
|
+
if (note) notes.push(note);
|
|
422
|
+
}
|
|
423
|
+
return id;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
const authorCache = await nameIndex(hubFn, acct, '/cms/v3/blogs/authors', ['slug', 'displayName']);
|
|
427
|
+
const tagCache = await nameIndex(hubFn, acct, '/cms/v3/blogs/tags', ['slug', 'name']);
|
|
428
|
+
const existing = new Map(
|
|
429
|
+
(await getAllVia(hubFn, acct, '/cms/v3/blogs/posts')).map((p) => [p.slug, String(p.id)]),
|
|
430
|
+
);
|
|
431
|
+
|
|
432
|
+
let created = 0,
|
|
433
|
+
updated = 0,
|
|
434
|
+
published = 0,
|
|
435
|
+
failed = 0;
|
|
436
|
+
// Posts to date-restore in a FINAL pass (after every schedule has fired), so a
|
|
437
|
+
// not-yet-fired scheduled publish can't clobber the date set per-post (the race
|
|
438
|
+
// that churned 33/68 dates to "today" — SYNC-NOTES §3 / codex #7).
|
|
439
|
+
const toRestore = [];
|
|
440
|
+
// Latest epoch-ms any post is scheduled to auto-publish. The final restore pass
|
|
441
|
+
// must run AFTER this fires; otherwise a late schedule re-clobbers the date we
|
|
442
|
+
// just restored. publishPost returns each post's scheduledMs; we keep the max.
|
|
443
|
+
let latestScheduleMs = 0;
|
|
444
|
+
|
|
445
|
+
for (const p of posts) {
|
|
446
|
+
try {
|
|
447
|
+
const contentGroupId = await containerIdFor(p.blogSlug);
|
|
448
|
+
const blogAuthorId = await ensureAuthor(acct, p, authorCache, hubFn);
|
|
449
|
+
const tagIds = [];
|
|
450
|
+
for (const t of postTagPairs(p)) {
|
|
451
|
+
tagIds.push(await ensureTag(acct, t, tagCache, hubFn));
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
const body = {
|
|
455
|
+
contentGroupId,
|
|
456
|
+
name: p.name,
|
|
457
|
+
htmlTitle: p.htmlTitle || p.name,
|
|
458
|
+
slug: p.slug,
|
|
459
|
+
// Resolve @asset (and any other logical) tokens to THIS account's values.
|
|
460
|
+
// refs.resolve hard-fails if any token is unmapped → push aborts loudly.
|
|
461
|
+
postBody: resolveRefs(p.postBody || '', registry),
|
|
462
|
+
postSummary: resolveRefs(p.postSummary || '', registry),
|
|
463
|
+
metaDescription: p.metaDescription || '',
|
|
464
|
+
featuredImage: resolveRefs(p.featuredImage || '', registry),
|
|
465
|
+
featuredImageAltText: p.featuredImageAltText || '',
|
|
466
|
+
useFeaturedImage: p.useFeaturedImage ?? false,
|
|
467
|
+
blogAuthorId,
|
|
468
|
+
tagIds,
|
|
469
|
+
// Always send the original publishDate so a re-push restores the real
|
|
470
|
+
// 2017–2026 chronology instead of leaving "now" from a prior schedule.
|
|
471
|
+
publishDate: p.publishDate || undefined,
|
|
472
|
+
state: publish ? 'PUBLISHED' : 'DRAFT',
|
|
473
|
+
};
|
|
474
|
+
|
|
475
|
+
if (dryRun) {
|
|
476
|
+
notes.push(`would ${existing.has(p.slug) ? 'update' : 'create'}: ${p.slug}`);
|
|
477
|
+
continue;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
let id;
|
|
481
|
+
if (existing.has(p.slug)) {
|
|
482
|
+
id = existing.get(p.slug);
|
|
483
|
+
await hubOk(hubFn, acct, 'PATCH', `/cms/v3/blogs/posts/${id}`, body);
|
|
484
|
+
updated++;
|
|
485
|
+
} else {
|
|
486
|
+
const j = await hubOk(hubFn, acct, 'POST', '/cms/v3/blogs/posts', body);
|
|
487
|
+
id = String(j.id);
|
|
488
|
+
existing.set(p.slug, id);
|
|
489
|
+
created++;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (publish) {
|
|
493
|
+
const r = await publishPost(acct, id, p.publishDate, { hubFn, now, sleep });
|
|
494
|
+
if (r.scheduledMs > latestScheduleMs) latestScheduleMs = r.scheduledMs;
|
|
495
|
+
if (p.publishDate) toRestore.push({ id, slug: p.slug, publishDate: p.publishDate });
|
|
496
|
+
published++;
|
|
497
|
+
}
|
|
498
|
+
} catch (e) {
|
|
499
|
+
failed++;
|
|
500
|
+
notes.push(`✖ ${p.slug}: ${e.message}`);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// FINAL date-restore pass (codex #7): re-PATCH each canonical publishDate and
|
|
505
|
+
// VERIFY it stuck. CRITICAL: this pass must run AFTER every post's scheduled
|
|
506
|
+
// publish (now+90s) has fired — otherwise a late schedule clobbers the date we
|
|
507
|
+
// just restored (the race that churned 33/68 dates on the last full push). So we
|
|
508
|
+
// WAIT past the LATEST schedule time (plus a settle margin) before restoring.
|
|
509
|
+
// sleep/now are injectable so unit tests don't actually wait ~90s.
|
|
510
|
+
let restored = 0;
|
|
511
|
+
if (!dryRun && publish && toRestore.length) {
|
|
512
|
+
await waitUntil(latestScheduleMs + SCHEDULE_SETTLE_MS, { now, sleep });
|
|
513
|
+
for (const { id, slug, publishDate } of toRestore) {
|
|
514
|
+
const ok = await restoreCanonicalDate(acct, id, publishDate, { hubFn, sleep });
|
|
515
|
+
if (ok) restored++;
|
|
516
|
+
else notes.push(`⚠ date-restore unconfirmed for ${slug} (verify chronology)`);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
notes.push(
|
|
521
|
+
`created ${created} | updated ${updated} | published ${published} | restored ${restored} | failed ${failed}`,
|
|
522
|
+
);
|
|
523
|
+
|
|
524
|
+
// codex #9: a per-post failure must NOT report a clean push. The old code counted
|
|
525
|
+
// `failed` and returned `done`, so a half-pushed blog (some posts silently lost)
|
|
526
|
+
// looked successful to the orchestrator. Now any failure THROWS, surfacing every
|
|
527
|
+
// offending slug; the partial writes already done are durable (re-run converges),
|
|
528
|
+
// but the run is unambiguously a failure. dryRun never "fails" (no writes attempted).
|
|
529
|
+
if (!dryRun && failed > 0) {
|
|
530
|
+
const offenders = notes.filter((n) => n.startsWith('✖')).join('\n ');
|
|
531
|
+
throw new Error(
|
|
532
|
+
`blog.push: ${failed} post(s) failed — push did NOT complete cleanly ` +
|
|
533
|
+
`(no silent data loss):\n ${offenders}`,
|
|
534
|
+
);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
return { pushed: created + updated, notes };
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// ── two-phase publish (codex #7) ───────────────────────────────────────────────
|
|
541
|
+
|
|
542
|
+
/**
|
|
543
|
+
* publishPost(acct, id, originalPublishDate, { hubFn, now, pollMs, maxPolls, sleep })
|
|
544
|
+
*
|
|
545
|
+
* Three phases, all idempotent:
|
|
546
|
+
* 1. SCHEDULE a near-future publishDate (HubSpot rejects "now"/past — SYNC-NOTES
|
|
547
|
+
* §3). POST /cms/v3/blogs/posts/schedule { id, publishDate: now+90s .000Z }.
|
|
548
|
+
* 2. POLL the LIVE (non-draft) post until state === PUBLISHED (the first publish
|
|
549
|
+
* via push-live no-ops; the schedule fires ~75–90s later).
|
|
550
|
+
* 3. PATCH the ORIGINAL publishDate back so chronology survives (re-scheduling
|
|
551
|
+
* clobbered it to the scheduled time — SYNC-NOTES §3 / codex #7). Skipped when
|
|
552
|
+
* there is no original date.
|
|
553
|
+
*
|
|
554
|
+
* Side-effecting hub calls are injected (hubFn/sleep/now) so this is unit-testable
|
|
555
|
+
* with a mock that walks DRAFT -> PUBLISHED and asserts the final PATCH date.
|
|
556
|
+
*/
|
|
557
|
+
export async function publishPost(
|
|
558
|
+
acct,
|
|
559
|
+
id,
|
|
560
|
+
originalPublishDate,
|
|
561
|
+
{ hubFn = hub, now = () => Date.now(), pollMs = 5000, maxPolls = 40, sleep = defaultSleep } = {},
|
|
562
|
+
) {
|
|
563
|
+
const scheduledMs = now() + 90_000;
|
|
564
|
+
const future = toIso(scheduledMs);
|
|
565
|
+
await hubOk(hubFn, acct, 'POST', '/cms/v3/blogs/posts/schedule', {
|
|
566
|
+
id: String(id),
|
|
567
|
+
publishDate: future,
|
|
568
|
+
});
|
|
569
|
+
|
|
570
|
+
// Poll the live post until it reports PUBLISHED.
|
|
571
|
+
let live = false;
|
|
572
|
+
for (let i = 0; i < maxPolls; i++) {
|
|
573
|
+
const { ok, json } = await hubFn(acct, 'GET', `/cms/v3/blogs/posts/${id}`);
|
|
574
|
+
const state = ok ? json?.state || json?.currentState : null;
|
|
575
|
+
if (state === 'PUBLISHED') {
|
|
576
|
+
live = true;
|
|
577
|
+
break;
|
|
578
|
+
}
|
|
579
|
+
await sleep(pollMs);
|
|
580
|
+
}
|
|
581
|
+
if (!live) {
|
|
582
|
+
throw new Error(`post ${id} did not reach PUBLISHED after scheduling (timeout).`);
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Restore the real publish date (preserves 2017–2026 order).
|
|
586
|
+
if (originalPublishDate) {
|
|
587
|
+
await hubOk(hubFn, acct, 'PATCH', `/cms/v3/blogs/posts/${id}`, {
|
|
588
|
+
publishDate: normalizeDate(originalPublishDate),
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
// scheduledMs is the epoch-ms of the future publish so the caller can WAIT past
|
|
592
|
+
// the LATEST schedule before its final date-restore pass (codex #7 race fix).
|
|
593
|
+
return {
|
|
594
|
+
id: String(id),
|
|
595
|
+
publishDate: originalPublishDate ? normalizeDate(originalPublishDate) : future,
|
|
596
|
+
scheduledMs,
|
|
597
|
+
};
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* restoreCanonicalDate(acct, id, originalPublishDate, { hubFn, sleep, tries, settleMs })
|
|
602
|
+
* -> boolean
|
|
603
|
+
*
|
|
604
|
+
* FINAL-pass date restore (codex #7). PATCHes the canonical publishDate and VERIFIES
|
|
605
|
+
* the live post reports it (day precision), retrying if a late scheduled publish has
|
|
606
|
+
* clobbered it back to "now". Returns true once confirmed, false if it never sticks.
|
|
607
|
+
* Run AFTER every post's schedule has fired so there is nothing left to clobber.
|
|
608
|
+
*/
|
|
609
|
+
export async function restoreCanonicalDate(
|
|
610
|
+
acct,
|
|
611
|
+
id,
|
|
612
|
+
originalPublishDate,
|
|
613
|
+
{ hubFn = hub, sleep = defaultSleep, tries = 4, settleMs = 3000 } = {},
|
|
614
|
+
) {
|
|
615
|
+
if (!originalPublishDate) return true;
|
|
616
|
+
const want = normalizeDate(originalPublishDate);
|
|
617
|
+
const wantDay = String(want).slice(0, 10);
|
|
618
|
+
for (let i = 0; i < tries; i++) {
|
|
619
|
+
await hubOk(hubFn, acct, 'PATCH', `/cms/v3/blogs/posts/${id}`, { publishDate: want });
|
|
620
|
+
const { ok, json } = await hubFn(acct, 'GET', `/cms/v3/blogs/posts/${id}`);
|
|
621
|
+
if (ok && String(json?.publishDate).slice(0, 10) === wantDay) return true;
|
|
622
|
+
await sleep(settleMs);
|
|
623
|
+
}
|
|
624
|
+
return false;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
function defaultSleep(ms) {
|
|
628
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Extra settle margin (ms) added on top of the latest scheduled publish before the
|
|
632
|
+
// final date-restore pass runs, so the schedule has DEFINITELY fired (and can no
|
|
633
|
+
// longer clobber the restored date) — codex #7.
|
|
634
|
+
const SCHEDULE_SETTLE_MS = 10_000;
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* waitUntil(targetMs, { now, sleep }) — block (via injectable sleep) until the
|
|
638
|
+
* clock (injectable now) is at/after targetMs. Used to hold the final date-restore
|
|
639
|
+
* pass back until every scheduled publish has fired. Returns immediately when the
|
|
640
|
+
* target is already in the past (e.g. nothing was scheduled). Bounded-step sleeps
|
|
641
|
+
* so a test's mock `now` that jumps forward terminates promptly.
|
|
642
|
+
*/
|
|
643
|
+
export async function waitUntil(targetMs, { now = () => Date.now(), sleep = defaultSleep } = {}) {
|
|
644
|
+
let remaining = targetMs - now();
|
|
645
|
+
while (remaining > 0) {
|
|
646
|
+
// Cap each sleep so an advancing mock clock re-checks often; real sleep coalesces.
|
|
647
|
+
await sleep(Math.min(remaining, 5000));
|
|
648
|
+
remaining = targetMs - now();
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// Coerce epoch-ms / ISO variants to a single `.000Z` form.
|
|
653
|
+
function toIso(ms) {
|
|
654
|
+
return new Date(ms).toISOString().replace(/\.\d{3}Z$/, '.000Z');
|
|
655
|
+
}
|
|
656
|
+
function normalizeDate(d) {
|
|
657
|
+
if (d == null) return d;
|
|
658
|
+
const t = typeof d === 'number' ? d : Date.parse(d);
|
|
659
|
+
if (Number.isNaN(t)) return d;
|
|
660
|
+
return toIso(t);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// ── assets: re-host to the target File Manager, populate registry asset map ──────
|
|
664
|
+
|
|
665
|
+
function loadAssetManifest(dir) {
|
|
666
|
+
// Assets now live in the unified content/assets/ tree (sibling of content/blog),
|
|
667
|
+
// keyed by content-hash+slug. dir is content/blog, so ../assets.
|
|
668
|
+
const f = join(dir, '..', 'assets', 'manifest.json');
|
|
669
|
+
if (!existsSync(f)) return {};
|
|
670
|
+
try {
|
|
671
|
+
return JSON.parse(readFileSync(f, 'utf8'));
|
|
672
|
+
} catch {
|
|
673
|
+
return {};
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
// Upload one local file to the target File Manager; return its hosted URL.
|
|
678
|
+
// overwrite:true + EXACT_FOLDER converges (no -1 duplicates — codex #4).
|
|
679
|
+
async function uploadAsset(acct, path, fileName) {
|
|
680
|
+
const buf = readFileSync(path);
|
|
681
|
+
const form = new FormData();
|
|
682
|
+
form.append('file', new Blob([buf]), fileName);
|
|
683
|
+
form.append('fileName', fileName);
|
|
684
|
+
form.append('folderPath', '/blog-migrated');
|
|
685
|
+
form.append(
|
|
686
|
+
'options',
|
|
687
|
+
JSON.stringify({
|
|
688
|
+
access: 'PUBLIC_INDEXABLE',
|
|
689
|
+
overwrite: true,
|
|
690
|
+
duplicateValidationStrategy: 'NONE',
|
|
691
|
+
duplicateValidationScope: 'EXACT_FOLDER',
|
|
692
|
+
}),
|
|
693
|
+
);
|
|
694
|
+
const res = await fetch(`${API}/files/v3/files`, {
|
|
695
|
+
method: 'POST',
|
|
696
|
+
headers: { Authorization: `Bearer ${acct.key}` },
|
|
697
|
+
body: form,
|
|
698
|
+
});
|
|
699
|
+
const j = await res.json().catch(() => ({}));
|
|
700
|
+
if (!res.ok) throw new Error(`upload ${fileName} -> ${res.status}: ${j.message || ''}`);
|
|
701
|
+
return j.url || j.objects?.[0]?.url;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
/**
|
|
705
|
+
* rehostAssets(acct, dir, registry) — upload every manifest asset to THIS account's
|
|
706
|
+
* File Manager and record the hosted URL under registry.assets[localFile]. The
|
|
707
|
+
* gitignored .sync-state/<portalId>.rehosted.json cache is consulted/updated so a
|
|
708
|
+
* re-push reuses prior uploads (no duplicates). After this runs, refs.resolve can
|
|
709
|
+
* turn @asset:<localFile> into the target's hosted URL.
|
|
710
|
+
*/
|
|
711
|
+
async function rehostAssets(acct, dir, registry) {
|
|
712
|
+
const manifest = loadAssetManifest(dir);
|
|
713
|
+
const adir = join(dir, ASSETS_SUBDIR);
|
|
714
|
+
const entries = Object.entries(manifest); // [originalUrl, localFile]
|
|
715
|
+
if (entries.length === 0) return registry;
|
|
716
|
+
|
|
717
|
+
// gitignored per-portal rehost cache (localFile -> hosted URL).
|
|
718
|
+
const cachePath = stateRehostPath(acct.portalId);
|
|
719
|
+
const cache = existsSync(cachePath) ? JSON.parse(readFileSync(cachePath, 'utf8')) : {};
|
|
720
|
+
|
|
721
|
+
for (const [, file] of entries) {
|
|
722
|
+
if (!file) continue;
|
|
723
|
+
if (registry.assets[file] && typeof registry.assets[file] === 'string') continue;
|
|
724
|
+
if (cache[file]) {
|
|
725
|
+
registry.assets[file] = cache[file];
|
|
726
|
+
continue;
|
|
727
|
+
}
|
|
728
|
+
const localPath = join(adir, file);
|
|
729
|
+
if (!existsSync(localPath)) continue; // dead/unrecovered asset — left for resolve() to flag
|
|
730
|
+
const url = await uploadAsset(acct, localPath, file);
|
|
731
|
+
cache[file] = url;
|
|
732
|
+
registry.assets[file] = url;
|
|
733
|
+
}
|
|
734
|
+
delete registry.__rev_assets;
|
|
735
|
+
try {
|
|
736
|
+
mkdirSync(resolvePath('.sync-state'), { recursive: true });
|
|
737
|
+
writeFileSync(cachePath, JSON.stringify(cache, null, 2));
|
|
738
|
+
} catch {
|
|
739
|
+
/* non-fatal: the .sync-state rehost cache is only an upload optimization */
|
|
740
|
+
}
|
|
741
|
+
return registry;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
// gitignored per-portal rehost cache lives at repo .sync-state/<portalId>.rehosted.json.
|
|
745
|
+
function stateRehostPath(portalId) {
|
|
746
|
+
return join(resolvePath('.sync-state'), `${portalId}.rehosted.json`);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// ── authors / tags upsert (by slug, then display name) ──────────────────────────
|
|
750
|
+
|
|
751
|
+
// Build a {key -> id} index keyed by EACH of the given fields (slug preferred).
|
|
752
|
+
async function nameIndex(hubFn, acct, path, keys) {
|
|
753
|
+
const items = await getAllVia(hubFn, acct, path);
|
|
754
|
+
const idx = new Map();
|
|
755
|
+
for (const it of items) {
|
|
756
|
+
for (const k of keys) {
|
|
757
|
+
const v = it[k];
|
|
758
|
+
if (v != null && v !== '') idx.set(String(v).toLowerCase(), String(it.id));
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
return idx;
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
async function ensureAuthor(acct, post, cache, hubFn) {
|
|
765
|
+
const slug = post.authorSlug;
|
|
766
|
+
const display = post.authorName;
|
|
767
|
+
if (!slug && !display) return null;
|
|
768
|
+
for (const k of [slug, display]) {
|
|
769
|
+
if (k && cache.has(String(k).toLowerCase())) return cache.get(String(k).toLowerCase());
|
|
770
|
+
}
|
|
771
|
+
const j = await hubOk(hubFn, acct, 'POST', '/cms/v3/blogs/authors', {
|
|
772
|
+
displayName: display || slug,
|
|
773
|
+
fullName: display || slug,
|
|
774
|
+
slug: slug || undefined,
|
|
775
|
+
});
|
|
776
|
+
const id = String(j.id);
|
|
777
|
+
if (slug) cache.set(String(slug).toLowerCase(), id);
|
|
778
|
+
if (display) cache.set(String(display).toLowerCase(), id);
|
|
779
|
+
return id;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
// Yield {slug, name} pairs for a post's tags, aligning tagSlugs with tagNames.
|
|
783
|
+
function postTagPairs(post) {
|
|
784
|
+
const slugs = post.tagSlugs || [];
|
|
785
|
+
const names = post.tagNames || [];
|
|
786
|
+
// tagSlugs is sorted on pull; tagNames is source-order. Prefer slug, fall back
|
|
787
|
+
// to name. We pair positionally only when lengths match; otherwise key by slug.
|
|
788
|
+
if (slugs.length && slugs.length === names.length) {
|
|
789
|
+
return slugs.map((slug, i) => ({ slug, name: names[i] }));
|
|
790
|
+
}
|
|
791
|
+
if (slugs.length) return slugs.map((slug) => ({ slug, name: slug }));
|
|
792
|
+
return names.map((name) => ({ slug: slugifyName(name), name }));
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
async function ensureTag(acct, { slug, name }, cache, hubFn) {
|
|
796
|
+
for (const k of [slug, name]) {
|
|
797
|
+
if (k && cache.has(String(k).toLowerCase())) return cache.get(String(k).toLowerCase());
|
|
798
|
+
}
|
|
799
|
+
const j = await hubOk(hubFn, acct, 'POST', '/cms/v3/blogs/tags', {
|
|
800
|
+
name: name || slug,
|
|
801
|
+
slug: slug || undefined,
|
|
802
|
+
});
|
|
803
|
+
const id = String(j.id);
|
|
804
|
+
if (slug) cache.set(String(slug).toLowerCase(), id);
|
|
805
|
+
if (name) cache.set(String(name).toLowerCase(), id);
|
|
806
|
+
return id;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
// ── small helpers ──────────────────────────────────────────────────────────────
|
|
810
|
+
|
|
811
|
+
// hub() returns { ok, status, json }; throw on non-ok, else return json.
|
|
812
|
+
async function hubOk(hubFn, acct, method, path, body) {
|
|
813
|
+
const { ok, status, json } = await hubFn(acct, method, path, body);
|
|
814
|
+
if (!ok) {
|
|
815
|
+
const msg = json?.message || json?.category || JSON.stringify(json).slice(0, 200);
|
|
816
|
+
throw new Error(`${method} ${path} -> ${status}: ${msg}`);
|
|
817
|
+
}
|
|
818
|
+
return json;
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
// Paginate via the injected hubFn (so push is fully mockable; mirrors hub.getAll).
|
|
822
|
+
async function getAllVia(hubFn, acct, path) {
|
|
823
|
+
const out = [];
|
|
824
|
+
let after;
|
|
825
|
+
do {
|
|
826
|
+
const sep = path.includes('?') ? '&' : '?';
|
|
827
|
+
const url = `${path}${sep}limit=100${after ? `&after=${after}` : ''}`;
|
|
828
|
+
const json = await hubOk(hubFn, acct, 'GET', url);
|
|
829
|
+
out.push(...(json.results || []));
|
|
830
|
+
after = json.paging?.next?.after;
|
|
831
|
+
} while (after);
|
|
832
|
+
return out;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// Resolve the FULL legacy blog object (id + item_template_path +
|
|
836
|
+
// listing_template_path + listing_page_id) so we can both target the container AND
|
|
837
|
+
// diff its template config. Matches by slug (matchBlogSlug semantics), never [0]
|
|
838
|
+
// — the stale "Old" blog cannot win (codex #6).
|
|
839
|
+
async function resolveBlogObjBySlugVia(hubFn, acct, slug) {
|
|
840
|
+
const json = await hubOk(hubFn, acct, 'GET', '/content/api/v2/blogs?limit=100');
|
|
841
|
+
const want = slug == null ? '' : String(slug);
|
|
842
|
+
for (const b of json.objects || []) {
|
|
843
|
+
if (String(b.slug ?? '') === want) return b;
|
|
844
|
+
}
|
|
845
|
+
return null;
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
/**
|
|
849
|
+
* applyContainerConfig(hubFn, acct, dir, blogSlug, liveBlog) -> note|null
|
|
850
|
+
*
|
|
851
|
+
* BLOG THEME fix: PUT /content/api/v2/blogs/{id} so the container points at the
|
|
852
|
+
* seventh-sense-theme blog templates from the committed container.json, and clears
|
|
853
|
+
* the listing_page_id override (a non-zero one masks listing_template_path —
|
|
854
|
+
* SYNC-NOTES §4; re-PUT also busts the edge cache).
|
|
855
|
+
*
|
|
856
|
+
* Idempotent: when the live blog already has item_template_path +
|
|
857
|
+
* listing_template_path matching the canon AND listing_page_id === 0, we SKIP the
|
|
858
|
+
* PUT entirely (no churn). Otherwise we PUT and return a note naming the change.
|
|
859
|
+
*/
|
|
860
|
+
async function applyContainerConfig(hubFn, acct, dir, blogSlug, liveBlog) {
|
|
861
|
+
const cfg = loadContainerConfig(dir, blogSlug);
|
|
862
|
+
if (!cfg) return null; // no committed container.json for this slug — nothing to enforce.
|
|
863
|
+
const wantItem = cfg.itemTemplatePath || '';
|
|
864
|
+
const wantListing = cfg.listingTemplatePath || '';
|
|
865
|
+
// If the canon carries no template paths there is nothing to enforce.
|
|
866
|
+
if (!wantItem && !wantListing) return null;
|
|
867
|
+
|
|
868
|
+
const haveItem = liveBlog.item_template_path || '';
|
|
869
|
+
const haveListing = liveBlog.listing_template_path || '';
|
|
870
|
+
const haveListingPageId = Number(liveBlog.listing_page_id || 0);
|
|
871
|
+
|
|
872
|
+
const inSync =
|
|
873
|
+
haveItem === wantItem &&
|
|
874
|
+
haveListing === wantListing &&
|
|
875
|
+
haveListingPageId === 0;
|
|
876
|
+
if (inSync) return null; // idempotent: re-PUT of identical values is a no-op → skip.
|
|
877
|
+
|
|
878
|
+
await hubOk(hubFn, acct, 'PUT', `/content/api/v2/blogs/${liveBlog.id}`, {
|
|
879
|
+
item_template_path: wantItem,
|
|
880
|
+
listing_template_path: wantListing,
|
|
881
|
+
// Clear the listing-page override so /blog renders listing_template_path.
|
|
882
|
+
listing_page_id: 0,
|
|
883
|
+
});
|
|
884
|
+
return `blog theme: set container "${blogSlug}" templates → item=${wantItem} listing=${wantListing} (listing_page_id cleared)`;
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
// Load the committed container.json for a blog slug (camelCase canon fields), or
|
|
888
|
+
// null when absent. Mirrors containerFileFor()'s slug→filename mapping.
|
|
889
|
+
function loadContainerConfig(dir, blogSlug) {
|
|
890
|
+
const f = join(dir, containerFileFor(blogSlug));
|
|
891
|
+
if (!existsSync(f)) return null;
|
|
892
|
+
try {
|
|
893
|
+
return JSON.parse(readFileSync(f, 'utf8'));
|
|
894
|
+
} catch {
|
|
895
|
+
return null;
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
function slugifyName(name) {
|
|
900
|
+
if (!name) return null;
|
|
901
|
+
return String(name)
|
|
902
|
+
.toLowerCase()
|
|
903
|
+
.trim()
|
|
904
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
905
|
+
.replace(/^-+|-+$/g, '');
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// Post slug -> filename stem (mirror of canonical.slugToFile but local to blog so
|
|
909
|
+
// the adapter is self-contained: 'blog/x' -> 'blog__x').
|
|
910
|
+
export function postFileFor(slug) {
|
|
911
|
+
return String(slug).replace(/\//g, '__');
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
// Container slug -> "container[.<slug>].json". The primary blog (slug "blog")
|
|
915
|
+
// uses container.json; any extra container is suffixed by slug.
|
|
916
|
+
export function containerFileFor(slug) {
|
|
917
|
+
if (slug === 'blog' || !slug) return CONTAINER_FILE;
|
|
918
|
+
return `container.${String(slug).replace(/\//g, '__')}.json`;
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
export default { name, dependsOn, pull, push, publishPost, restoreCanonicalDate, waitUntil, rawUrlToToken, canonicalizeField, localAssetName };
|