hubspot-cms-sync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +52 -0
  3. package/bin/hubspot-cms-sync.mjs +115 -0
  4. package/docs/CONFIGURATION.md +83 -0
  5. package/docs/GITHUB_ACTIONS.md +70 -0
  6. package/docs/MIGRATION_PLAN.md +361 -0
  7. package/docs/PLAN_REVIEW.md +42 -0
  8. package/docs/SKILL_DISTRIBUTION.md +79 -0
  9. package/examples/github-actions/ci.yml +56 -0
  10. package/examples/github-actions/preview.yml +71 -0
  11. package/examples/github-actions/publish.yml +82 -0
  12. package/examples/hubspot-cms-sync.config.mjs +45 -0
  13. package/examples/site.manifest.json +19 -0
  14. package/package.json +41 -0
  15. package/skill/SKILL.md +54 -0
  16. package/skill/references/commands.md +54 -0
  17. package/skill/references/config.md +25 -0
  18. package/skill/references/failures.md +58 -0
  19. package/skill/references/github-actions.md +56 -0
  20. package/skill/references/screenshots-and-fidelity.md +33 -0
  21. package/src/adapters/assets.mjs +576 -0
  22. package/src/adapters/blog.mjs +921 -0
  23. package/src/adapters/content.mjs +213 -0
  24. package/src/adapters/forms.mjs +569 -0
  25. package/src/adapters/pages.mjs +463 -0
  26. package/src/adapters/theme.mjs +503 -0
  27. package/src/config.mjs +113 -0
  28. package/src/corpus-scan.mjs +248 -0
  29. package/src/cta-inventory.mjs +352 -0
  30. package/src/index.mjs +3 -0
  31. package/src/lib/canonical.mjs +234 -0
  32. package/src/lib/hub.mjs +197 -0
  33. package/src/lib/orchestrate.mjs +141 -0
  34. package/src/lib/refs.mjs +398 -0
  35. package/src/lib/sync-state.mjs +86 -0
  36. package/src/manifest.mjs +353 -0
  37. package/src/preflight.mjs +385 -0
  38. package/src/pull.mjs +99 -0
  39. package/src/push.mjs +354 -0
  40. package/src/republish.mjs +102 -0
@@ -0,0 +1,398 @@
1
+ // sync/lib/refs.mjs — per-account REFERENCE extraction + logical canonicalization.
2
+ //
3
+ // THE CRUX (codex findings #1, #2): HubSpot content embeds per-account ids — form
4
+ // GUIDs, CTA GUIDs, `hbspt.cta.load(<portal>,'<guid>')`, CTA embed HTML, hosted
5
+ // hubfs/asset URLs, generic `guid` fields, and bare portal ids (prod 529456 / dev
6
+ // 246389711). None of these are portable. The canonical store committed to git must
7
+ // hold LOGICAL refs (`@form:contact`, `@cta:book-demo`, `@asset:Sucess.jpg`,
8
+ // `@portal`, `@menu:main`); push RESOLVES them to the TARGET account's ids and
9
+ // HARD-FAILS if any logical ref has no target mapping.
10
+ //
11
+ // Composition with canonical.mjs (canon.mjs): canon owns JSON/HTML *shape*
12
+ // normalization (stable key order, entity/whitespace, null/empty policy, publishDate
13
+ // coercion). refs owns *identity* portability. On PULL the pipeline is
14
+ // `canon.normalize(raw)` then `canonicalize(str, sourceRegistry)` — shape first, then
15
+ // strip per-account ids to logical tokens — and the result is what gets committed. On
16
+ // PUSH it is the inverse: `resolve(str, targetRegistry)` injects the target portal's
17
+ // ids, then the bytes are uploaded. Because both layers are pure string/JSON
18
+ // transforms with no I/O, they unit-test without network. A Registry is loaded/saved
19
+ // per account by the orchestrator (e.g. `.sync-state/<portalId>.refs.json`, gitignored)
20
+ // and is the single rawId<->logicalKey lookup for that account.
21
+ //
22
+ // Pure module: no fs, no fetch, no globals. Everything here is a pure function.
23
+
24
+ // ---------------------------------------------------------------------------
25
+ // Known portal ids confirmed in-repo. Used to recognise BARE portal ids and to
26
+ // validate that a remapped portal is plausible. Not a write-allowlist.
27
+ // ---------------------------------------------------------------------------
28
+ export const KNOWN_PORTALS = ['529456', '246389711'];
29
+
30
+ // A GUID as HubSpot emits it (lowercase hex, 8-4-4-4-12).
31
+ const GUID = '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}';
32
+
33
+ // ---------------------------------------------------------------------------
34
+ // REF_PATTERNS — one regex per reference SHAPE. Each is global so `extractRefs`
35
+ // can enumerate every occurrence. `kind` names the logical namespace it feeds.
36
+ //
37
+ // IMPORTANT ordering note for canonicalize/resolve: `hubfsUrl` must be applied
38
+ // BEFORE `portalId`, because a hubfs URL contains a portal-id segment that we want
39
+ // folded into the single `@asset:<path>` token rather than separately tokenized.
40
+ // ---------------------------------------------------------------------------
41
+ export const REF_PATTERNS = {
42
+ // form_id field value, or a bare form GUID inside a form module body.
43
+ // Capture group 1 = the GUID.
44
+ formGuid: new RegExp(`"form_id"\\s*:\\s*"(${GUID})"`, 'g'),
45
+
46
+ // hbspt.cta.load(<portal>, '<guid>', {...}) — carries BOTH a portal id and a CTA
47
+ // guid. Group 1 = portal, group 2 = guid. The quote may be single OR double, and
48
+ // arbitrary whitespace may surround the args.
49
+ ctaLoad: new RegExp(`hbspt\\.cta\\.load\\(\\s*(\\d{5,})\\s*,\\s*['"](${GUID})['"]`, 'g'),
50
+
51
+ // Every other place a CTA guid appears: {{cta('guid')}} / {{ cta("guid") }} (single
52
+ // OR double quote, arbitrary whitespace inside the call), the "guid" body field,
53
+ // cta/redirect/<portal>/<guid>, pg=<guid>, hs-cta-<guid> ids, data-hs-img-pg.
54
+ // Group 1 = the GUID. (ctaLoad is handled separately for its portal arg.)
55
+ ctaGuid: new RegExp(
56
+ `(?:\\{\\{\\s*cta\\(\\s*['"]|"guid"\\s*:\\s*"|/cta/(?:redirect|default)/\\d{5,}/|[?&]pg=|hs-cta(?:-wrapper|-img|-ie-element|-node)?-|data-hs-img-pg="|hs-cta-)(${GUID})`,
57
+ 'g',
58
+ ),
59
+
60
+ // Hosted asset URL on any HubSpot file host. THREE path shapes occur in the corpus:
61
+ // 1. /hubfs/<portal>/<tail> (cdn2.hubspot.net, *.hubspotusercontent*)
62
+ // 2. /hub/<portal>/hubfs/<tail> (legacy File-Manager host path)
63
+ // 3. /hs-fs/hubfs/<tail> (theseventhsense.com — NO portal segment)
64
+ // and the portal-bearing variant /hs-fs/hubfs/<portal>/<tail>.
65
+ // Group 1 = portal (may be undefined for the portal-less /hs-fs/ shape),
66
+ // group 2 = the path tail (the stable, portable key). The tail is portal-agnostic.
67
+ // Hosts seen in corpus: cdn2.hubspot.net, <portal>.fs1.hubspotusercontent-naN.net,
68
+ // f.hubspotusercontent00.net, fs.hubspotusercontent00.net, www.theseventhsense.com.
69
+ hubfsUrl: new RegExp(
70
+ `https?://[a-z0-9.-]+/(?:hub/(\\d{5,})/hubfs|hs-fs/hubfs(?:/(\\d{5,}))?|hubfs/(\\d{5,}))/([^"'\\\\\\s),]+)`,
71
+ 'g',
72
+ ),
73
+
74
+ // Foreign image hosts that legacy blog bodies still embed (Google Docs paste-ins).
75
+ // These carry no portal but the opaque path IS a stable per-image identity, so we
76
+ // fold the WHOLE URL to a portable `@asset:googleusercontent/<blob>` token. Group 1
77
+ // = the opaque path tail. Hosts: lhN.googleusercontent.com.
78
+ googleUserContentUrl: new RegExp(
79
+ `https?://lh[0-9]+\\.googleusercontent\\.com/([^"'\\\\\\s),]+)`,
80
+ 'g',
81
+ ),
82
+
83
+ // A native/simple menu id (numeric). Group 1 = id. Defensive: confirmed shape in
84
+ // HubSpot menu modules though this corpus's simple_menu modules are link-based.
85
+ menuId: new RegExp(`"menu_?[iI]d"\\s*:\\s*"?(\\d{5,})"?`, 'g'),
86
+
87
+ // A BARE portal id anywhere else (after assets/ctas have been consumed). Group 1 =
88
+ // portal. Word-bounded so it doesn't bite into a longer number.
89
+ portalId: new RegExp(`\\b(${KNOWN_PORTALS.join('|')})\\b`, 'g'),
90
+ };
91
+
92
+ // Logical token grammar. A token is `@<kind>:<key>` or the bare `@portal` sentinel.
93
+ // Keys are slug-safe; `@asset:` keys keep their path (slashes allowed).
94
+ const TOKEN = {
95
+ form: (key) => `@form:${key}`,
96
+ cta: (key) => `@cta:${key}`,
97
+ asset: (key) => `@asset:${key}`,
98
+ menu: (key) => `@menu:${key}`,
99
+ portal: () => `@portal`,
100
+ };
101
+ // Matches any logical token we emit, for resolve() to scan/replace/validate.
102
+ // `@asset:` allows `/` and `.`; others are slug-ish.
103
+ const TOKEN_RE = /@(form|cta|menu):([A-Za-z0-9_-]+)|@asset:([^\s"'\\),]+)|@portal\b/g;
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Registry — per-account map of logicalKey<->rawId, one sub-map per namespace.
107
+ // `forms`/`ctas`/`menus`: { logicalKey: rawGuidOrId }.
108
+ // `assets`: { logicalKey(=pathTail): true } — assets are keyed by their own path,
109
+ // so no id table is needed; presence is the mapping.
110
+ // `portalId`: the account's numeric portal id (for `@portal` resolution).
111
+ // We also build reverse indexes lazily for canonicalize().
112
+ // ---------------------------------------------------------------------------
113
+
114
+ /** An empty registry skeleton. */
115
+ export function emptyRegistry(portalId = null) {
116
+ return { portalId: portalId == null ? null : String(portalId), forms: {}, ctas: {}, menus: {}, assets: {} };
117
+ }
118
+
119
+ /** Normalize/clone a loaded registry object into the canonical shape. */
120
+ export function loadRegistry(obj = {}) {
121
+ const r = emptyRegistry(obj.portalId ?? null);
122
+ for (const ns of ['forms', 'ctas', 'menus', 'assets']) {
123
+ if (obj[ns] && typeof obj[ns] === 'object') Object.assign(r[ns], obj[ns]);
124
+ }
125
+ return r;
126
+ }
127
+
128
+ /** Serialize to a plain, stably-ordered object (composes with canon.stableStringify). */
129
+ export function saveRegistry(reg) {
130
+ return {
131
+ portalId: reg.portalId == null ? null : String(reg.portalId),
132
+ forms: { ...reg.forms },
133
+ ctas: { ...reg.ctas },
134
+ menus: { ...reg.menus },
135
+ assets: { ...reg.assets },
136
+ };
137
+ }
138
+
139
+ const NS_FOR_KIND = {
140
+ formGuid: 'forms',
141
+ ctaGuid: 'ctas',
142
+ ctaLoad: 'ctas',
143
+ menuId: 'menus',
144
+ hubfsUrl: 'assets',
145
+ googleUserContentUrl: 'assets',
146
+ };
147
+
148
+ // hubfsUrl has three alternative portal capture groups (one per path shape) plus the
149
+ // tail. Collapse a regex match into a stable { portal, tail } pair. `portal` may be
150
+ // undefined for the portal-less /hs-fs/hubfs/<tail> shape.
151
+ function hubfsParts(m) {
152
+ const portal = m[1] || m[2] || m[3]; // /hub/, /hs-fs/.../<portal>, or /hubfs/<portal>
153
+ const tail = m[4];
154
+ return { portal, tail };
155
+ }
156
+
157
+ // Build a rawId->logicalKey reverse index for a namespace, memoized on the registry.
158
+ function reverseIndex(reg, ns) {
159
+ const cacheKey = `__rev_${ns}`;
160
+ if (reg[cacheKey]) return reg[cacheKey];
161
+ const rev = Object.create(null);
162
+ for (const [logical, raw] of Object.entries(reg[ns] || {})) rev[String(raw)] = logical;
163
+ // non-enumerable so it doesn't leak into saveRegistry / stableStringify
164
+ Object.defineProperty(reg, cacheKey, { value: rev, enumerable: false, configurable: true });
165
+ return rev;
166
+ }
167
+
168
+ // ---------------------------------------------------------------------------
169
+ // extractRefs(str) -> [{ kind, rawId, match }]
170
+ // Enumerates EVERY reference occurrence across all shapes. `kind` is the pattern
171
+ // name; `rawId` is the per-account id (guid / portal / asset path); `match` is the
172
+ // full matched substring (useful for callers that want to locate/replace in place).
173
+ // Order of kinds mirrors the canonicalize precedence (asset before bare portal).
174
+ // ---------------------------------------------------------------------------
175
+ export function extractRefs(str) {
176
+ if (typeof str !== 'string' || str.length === 0) return [];
177
+ const out = [];
178
+ const push = (kind, rawId, match) => out.push({ kind, rawId, match });
179
+
180
+ // formGuid
181
+ for (const m of str.matchAll(REF_PATTERNS.formGuid)) push('formGuid', m[1], m[0]);
182
+
183
+ // ctaLoad — yields a cta guid AND a portal id
184
+ for (const m of str.matchAll(REF_PATTERNS.ctaLoad)) {
185
+ push('ctaLoad', m[2], m[0]); // the CTA guid (logical namespace = ctas)
186
+ push('portalId', m[1], m[1]); // its portal arg
187
+ }
188
+
189
+ // ctaGuid (all other cta-guid shapes)
190
+ for (const m of str.matchAll(REF_PATTERNS.ctaGuid)) push('ctaGuid', m[1], m[0]);
191
+
192
+ // hubfsUrl — asset path tail is the rawId; record the embedded portal too (if any)
193
+ for (const m of str.matchAll(REF_PATTERNS.hubfsUrl)) {
194
+ const { portal, tail } = hubfsParts(m);
195
+ push('hubfsUrl', tail, m[0]); // rawId = portal-agnostic path tail
196
+ if (portal) push('portalId', portal, portal);
197
+ }
198
+
199
+ // googleUserContentUrl — foreign-host image; key by the opaque path tail
200
+ for (const m of str.matchAll(REF_PATTERNS.googleUserContentUrl)) {
201
+ push('googleUserContentUrl', `googleusercontent/${m[1]}`, m[0]);
202
+ }
203
+
204
+ // menuId
205
+ for (const m of str.matchAll(REF_PATTERNS.menuId)) push('menuId', m[1], m[0]);
206
+
207
+ // bare portalId (anywhere)
208
+ for (const m of str.matchAll(REF_PATTERNS.portalId)) push('portalId', m[1], m[0]);
209
+
210
+ return out;
211
+ }
212
+
213
+ // ---------------------------------------------------------------------------
214
+ // toLogical(kind, rawId, registry) -> logical token string
215
+ // Maps a raw per-account id to its portable logical token using the registry's
216
+ // rawId->logicalKey reverse index. For assets the rawId IS the path tail, which is
217
+ // already portable, so the registry only needs to record it (auto-registered).
218
+ // Throws if a registry mapping is required but missing (forms/ctas/menus): pull-time
219
+ // auto-registration is the caller's job via registerRef(); a hard miss here means a
220
+ // caller asked to logicalize an unregistered id.
221
+ // ---------------------------------------------------------------------------
222
+ export function toLogical(kind, rawId, registry) {
223
+ if (kind === 'portalId') return TOKEN.portal();
224
+ if (kind === 'hubfsUrl' || kind === 'googleUserContentUrl') return TOKEN.asset(String(rawId));
225
+ const ns = NS_FOR_KIND[kind];
226
+ if (!ns) throw new Error(`toLogical: unknown kind ${kind}`);
227
+ const rev = reverseIndex(registry, ns);
228
+ const logical = rev[String(rawId)];
229
+ if (logical == null) {
230
+ throw new Error(`toLogical: no logical key for ${kind} ${rawId} in registry (call registerRef on pull first)`);
231
+ }
232
+ return TOKEN[ns === 'forms' ? 'form' : ns === 'ctas' ? 'cta' : 'menu'](logical);
233
+ }
234
+
235
+ // ---------------------------------------------------------------------------
236
+ // registerRef — pull-time helper: ensure a rawId has a logical key in the registry,
237
+ // minting a deterministic key if absent. Returns the logical key. Assets register by
238
+ // their path tail. This is what makes canonicalize() succeed on first pull.
239
+ // ---------------------------------------------------------------------------
240
+ export function registerRef(reg, kind, rawId, logicalKey = null) {
241
+ if (kind === 'portalId') {
242
+ if (reg.portalId == null) reg.portalId = String(rawId);
243
+ return null;
244
+ }
245
+ if (kind === 'hubfsUrl' || kind === 'googleUserContentUrl') {
246
+ reg.assets[String(rawId)] = true;
247
+ delete reg.__rev_assets;
248
+ return String(rawId);
249
+ }
250
+ const ns = NS_FOR_KIND[kind];
251
+ if (!ns) throw new Error(`registerRef: unknown kind ${kind}`);
252
+ const rev = reverseIndex(reg, ns);
253
+ if (rev[String(rawId)] != null) return rev[String(rawId)];
254
+ const key = logicalKey || mintKey(ns, rawId);
255
+ reg[ns][key] = String(rawId);
256
+ delete reg[`__rev_${ns}`];
257
+ return key;
258
+ }
259
+
260
+ // Deterministic fallback logical key when the caller has no human-friendly name yet.
261
+ function mintKey(ns, rawId) {
262
+ const short = String(rawId).replace(/-/g, '').slice(0, 8);
263
+ return `${ns.slice(0, -1)}-${short}`; // forms->form-xxxx, ctas->cta-xxxx, menus->menu-xxxx
264
+ }
265
+
266
+ // ---------------------------------------------------------------------------
267
+ // canonicalize(str, registry) -> portable str with raw refs replaced by tokens.
268
+ // PRECEDENCE (critical for reversibility):
269
+ // 1. hubfsUrl → @asset:<path> (consumes the portal segment inside the URL)
270
+ // 2. ctaLoad → hbspt.cta.load(@portal,'@cta:key', (portal + guid together)
271
+ // 3. formGuid → "form_id": "@form:key"
272
+ // 4. ctaGuid → @cta:key (all remaining cta-guid shapes)
273
+ // 5. menuId → @menu:key
274
+ // 6. portalId → @portal (any remaining bare portal id)
275
+ // Auto-registers any ref it has not seen so first pull is self-bootstrapping.
276
+ // ---------------------------------------------------------------------------
277
+ export function canonicalize(str, registry) {
278
+ if (typeof str !== 'string' || str.length === 0) return str;
279
+ let s = str;
280
+
281
+ // 1. hosted asset URLs -> @asset:<pathTail> (host + portal collapse into the token).
282
+ // All three HubSpot path shapes (/hubfs/<portal>/, /hub/<portal>/hubfs/,
283
+ // /hs-fs/hubfs/[<portal>/]) fold to the same portal-agnostic tail.
284
+ s = s.replace(REF_PATTERNS.hubfsUrl, (...args) => {
285
+ const m = args.slice(0, 5); // [whole, g1, g2, g3, g4]
286
+ const { tail } = hubfsParts(m);
287
+ const key = registerRef(registry, 'hubfsUrl', tail);
288
+ return TOKEN.asset(key);
289
+ });
290
+
291
+ // 1b. foreign googleusercontent image URLs -> @asset:googleusercontent/<blob>
292
+ s = s.replace(REF_PATTERNS.googleUserContentUrl, (_m, blob) => {
293
+ const key = registerRef(registry, 'googleUserContentUrl', `googleusercontent/${blob}`);
294
+ return TOKEN.asset(key);
295
+ });
296
+
297
+ // 2. hbspt.cta.load(<portal>,'<guid>' -> hbspt.cta.load(@portal,'@cta:key'
298
+ s = s.replace(REF_PATTERNS.ctaLoad, (whole, portal, guid) => {
299
+ registerRef(registry, 'portalId', portal);
300
+ const key = registerRef(registry, 'ctaGuid', guid);
301
+ return whole
302
+ .replace(portal, TOKEN.portal())
303
+ .replace(guid, TOKEN.cta(key));
304
+ });
305
+
306
+ // 3. form_id field
307
+ s = s.replace(REF_PATTERNS.formGuid, (_m, guid) => {
308
+ const key = registerRef(registry, 'formGuid', guid);
309
+ return `"form_id": "${TOKEN.form(key)}"`;
310
+ });
311
+
312
+ // 4. all remaining cta-guid shapes -> swap just the guid for @cta:key in place
313
+ s = s.replace(REF_PATTERNS.ctaGuid, (whole, guid) => {
314
+ const key = registerRef(registry, 'ctaGuid', guid);
315
+ return whole.replace(guid, TOKEN.cta(key));
316
+ });
317
+
318
+ // 5. menu ids
319
+ s = s.replace(REF_PATTERNS.menuId, (whole, id) => {
320
+ const key = registerRef(registry, 'menuId', id);
321
+ return whole.replace(id, TOKEN.menu(key));
322
+ });
323
+
324
+ // 6. any remaining bare portal id
325
+ s = s.replace(REF_PATTERNS.portalId, (m) => {
326
+ registerRef(registry, 'portalId', m);
327
+ return TOKEN.portal();
328
+ });
329
+
330
+ return s;
331
+ }
332
+
333
+ // ---------------------------------------------------------------------------
334
+ // resolve(str, targetRegistry) -> str with logical tokens replaced by the TARGET
335
+ // account's ids. THROWS (push must hard-fail) listing every logical token that has
336
+ // no target mapping. This is the inverse of canonicalize() and the round-trip
337
+ // guarantee: canonicalize(x, src) then resolve(.., tgt) reproduces x byte-for-byte
338
+ // when src and tgt carry the same rawIds for the same logical keys.
339
+ // ---------------------------------------------------------------------------
340
+ export function resolve(str, targetRegistry) {
341
+ if (typeof str !== 'string' || str.length === 0) return str;
342
+ const missing = [];
343
+
344
+ const out = str.replace(TOKEN_RE, (token, kind, key, assetKey) => {
345
+ if (token === '@portal') {
346
+ if (targetRegistry.portalId == null) {
347
+ missing.push('@portal');
348
+ return token;
349
+ }
350
+ return String(targetRegistry.portalId);
351
+ }
352
+ if (assetKey != null) {
353
+ // @asset:<pathTail> -> the target's hosted URL for that path. The target
354
+ // registry's assets map records the path; the rehosted URL is supplied via a
355
+ // resolver hook so this module stays pure/host-agnostic. By default we throw if
356
+ // the path isn't registered for the target.
357
+ const entry = targetRegistry.assets ? targetRegistry.assets[assetKey] : undefined;
358
+ if (entry == null) {
359
+ missing.push(`@asset:${assetKey}`);
360
+ return token;
361
+ }
362
+ // entry may be `true` (path known, URL built by caller) or a concrete URL string.
363
+ return typeof entry === 'string' ? entry : token;
364
+ }
365
+ const ns = kind === 'form' ? 'forms' : kind === 'cta' ? 'ctas' : 'menus';
366
+ const raw = targetRegistry[ns] ? targetRegistry[ns][key] : undefined;
367
+ if (raw == null) {
368
+ missing.push(`@${kind}:${key}`);
369
+ return token;
370
+ }
371
+ return String(raw);
372
+ });
373
+
374
+ if (missing.length) {
375
+ const uniq = [...new Set(missing)].sort();
376
+ throw new Error(
377
+ `resolve: ${uniq.length} logical ref(s) have no mapping in target portal ` +
378
+ `${targetRegistry.portalId ?? '(unknown)'} — push must not proceed: ${uniq.join(', ')}`,
379
+ );
380
+ }
381
+ return out;
382
+ }
383
+
384
+ // ---------------------------------------------------------------------------
385
+ // listLogicalTokens(str) -> [{ kind, key, token }] — pure inspection helper used by
386
+ // corpus tests (assert no raw portal ids/GUIDs survive) and by push preflight to
387
+ // pre-validate mappings before any network write.
388
+ // ---------------------------------------------------------------------------
389
+ export function listLogicalTokens(str) {
390
+ if (typeof str !== 'string') return [];
391
+ const out = [];
392
+ for (const m of str.matchAll(TOKEN_RE)) {
393
+ if (m[0] === '@portal') out.push({ kind: 'portal', key: null, token: '@portal' });
394
+ else if (m[3] != null) out.push({ kind: 'asset', key: m[3], token: m[0] });
395
+ else out.push({ kind: m[1], key: m[2], token: m[0] });
396
+ }
397
+ return out;
398
+ }
@@ -0,0 +1,86 @@
1
+ // sync/lib/sync-state.mjs — per-account registry persistence + content tree root.
2
+ //
3
+ // The Registry (refs.mjs) is PER ACCOUNT and lives in the GITIGNORED .sync-state/
4
+ // directory at the repo root, one file per portal:
5
+ //
6
+ // .sync-state/<portalId>.registry.json
7
+ //
8
+ // It holds the logical-key <-> per-account-id mapping (forms/ctas/menus GUIDs, asset
9
+ // paths, the portal id) that PULL auto-registers and PUSH resolves. It is never
10
+ // committed (see .gitignore `.sync-state/`), because it is account-specific identity,
11
+ // not portable canonical content.
12
+ //
13
+ // This module owns the load/init + save of that file (composing refs.emptyRegistry /
14
+ // loadRegistry / saveRegistry with canonical.stableStringify for diff-stable bytes)
15
+ // and exposes the canonical content/ tree root the adapters write into.
16
+
17
+ import { readFileSync, writeFileSync, mkdirSync, existsSync, renameSync } from 'node:fs';
18
+ import { join } from 'node:path';
19
+
20
+ import { emptyRegistry, loadRegistry, saveRegistry } from './refs.mjs';
21
+ import { stableStringify } from './canonical.mjs';
22
+ import { loadConfigSyncFallback } from '../config.mjs';
23
+
24
+ function fallbackConfig() {
25
+ return loadConfigSyncFallback();
26
+ }
27
+
28
+ /** Repo-root canonical content tree (content/...). */
29
+ export function contentDir(cfg = fallbackConfig()) {
30
+ return cfg.contentDirPath || join(cfg.root || process.cwd(), cfg.contentDir || 'content');
31
+ }
32
+
33
+ /** Gitignored per-account state directory (.sync-state/). */
34
+ export function syncStateDir(cfg = fallbackConfig()) {
35
+ return cfg.syncStateDirPath || join(cfg.root || process.cwd(), cfg.syncStateDir || '.sync-state');
36
+ }
37
+
38
+ /** Path to a portal's registry file. */
39
+ export function registryPath(portalId, cfg = fallbackConfig()) {
40
+ return join(syncStateDir(cfg), `${String(portalId)}.registry.json`);
41
+ }
42
+
43
+ /**
44
+ * loadAccountRegistry(portalId) -> Registry
45
+ *
46
+ * Load .sync-state/<portalId>.registry.json if present, else initialize an empty
47
+ * registry seeded with this account's portalId (so @portal resolves even on a first
48
+ * push). Always returns a registry whose `portalId` is set to the given portal.
49
+ */
50
+ export function loadAccountRegistry(portalId, cfg = fallbackConfig()) {
51
+ const pid = String(portalId);
52
+ const file = registryPath(pid, cfg);
53
+ let reg;
54
+ if (existsSync(file)) {
55
+ try {
56
+ reg = loadRegistry(JSON.parse(readFileSync(file, 'utf8')));
57
+ } catch (e) {
58
+ throw new Error(`Corrupt registry ${file}: ${e.message}`);
59
+ }
60
+ } else {
61
+ reg = emptyRegistry(pid);
62
+ }
63
+ // The registry MUST carry this account's portal id (it may be absent in an
64
+ // older/empty file). Force it to the account we're operating on.
65
+ reg.portalId = pid;
66
+ return reg;
67
+ }
68
+
69
+ /**
70
+ * persistAccountRegistry(portalId, registry) -> void
71
+ *
72
+ * Serialize the registry to .sync-state/<portalId>.registry.json (creating the
73
+ * gitignored dir as needed), via saveRegistry (drops memoized reverse indexes) +
74
+ * stableStringify (sorted keys, trailing newline) for a stable file.
75
+ */
76
+ export function persistAccountRegistry(portalId, registry, cfg = fallbackConfig()) {
77
+ const dir = syncStateDir(cfg);
78
+ mkdirSync(dir, { recursive: true });
79
+ // Write to a temp file then atomically rename, so a crash mid-write can never
80
+ // leave a half-written (corrupt) registry — the live file is always either the
81
+ // old complete version or the new complete version.
82
+ const final = registryPath(portalId, cfg);
83
+ const tmp = `${final}.tmp-${process.pid}`;
84
+ writeFileSync(tmp, stableStringify(saveRegistry(registry)));
85
+ renameSync(tmp, final);
86
+ }