hubspot-cms-sync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +52 -0
- package/bin/hubspot-cms-sync.mjs +115 -0
- package/docs/CONFIGURATION.md +83 -0
- package/docs/GITHUB_ACTIONS.md +70 -0
- package/docs/MIGRATION_PLAN.md +361 -0
- package/docs/PLAN_REVIEW.md +42 -0
- package/docs/SKILL_DISTRIBUTION.md +79 -0
- package/examples/github-actions/ci.yml +56 -0
- package/examples/github-actions/preview.yml +71 -0
- package/examples/github-actions/publish.yml +82 -0
- package/examples/hubspot-cms-sync.config.mjs +45 -0
- package/examples/site.manifest.json +19 -0
- package/package.json +41 -0
- package/skill/SKILL.md +54 -0
- package/skill/references/commands.md +54 -0
- package/skill/references/config.md +25 -0
- package/skill/references/failures.md +58 -0
- package/skill/references/github-actions.md +56 -0
- package/skill/references/screenshots-and-fidelity.md +33 -0
- package/src/adapters/assets.mjs +576 -0
- package/src/adapters/blog.mjs +921 -0
- package/src/adapters/content.mjs +213 -0
- package/src/adapters/forms.mjs +569 -0
- package/src/adapters/pages.mjs +463 -0
- package/src/adapters/theme.mjs +503 -0
- package/src/config.mjs +113 -0
- package/src/corpus-scan.mjs +248 -0
- package/src/cta-inventory.mjs +352 -0
- package/src/index.mjs +3 -0
- package/src/lib/canonical.mjs +234 -0
- package/src/lib/hub.mjs +197 -0
- package/src/lib/orchestrate.mjs +141 -0
- package/src/lib/refs.mjs +398 -0
- package/src/lib/sync-state.mjs +86 -0
- package/src/manifest.mjs +353 -0
- package/src/preflight.mjs +385 -0
- package/src/pull.mjs +99 -0
- package/src/push.mjs +354 -0
- package/src/republish.mjs +102 -0
package/src/lib/refs.mjs
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
// sync/lib/refs.mjs — per-account REFERENCE extraction + logical canonicalization.
|
|
2
|
+
//
|
|
3
|
+
// THE CRUX (codex findings #1, #2): HubSpot content embeds per-account ids — form
|
|
4
|
+
// GUIDs, CTA GUIDs, `hbspt.cta.load(<portal>,'<guid>')`, CTA embed HTML, hosted
|
|
5
|
+
// hubfs/asset URLs, generic `guid` fields, and bare portal ids (prod 529456 / dev
|
|
6
|
+
// 246389711). None of these are portable. The canonical store committed to git must
|
|
7
|
+
// hold LOGICAL refs (`@form:contact`, `@cta:book-demo`, `@asset:Sucess.jpg`,
|
|
8
|
+
// `@portal`, `@menu:main`); push RESOLVES them to the TARGET account's ids and
|
|
9
|
+
// HARD-FAILS if any logical ref has no target mapping.
|
|
10
|
+
//
|
|
11
|
+
// Composition with canonical.mjs (canon.mjs): canon owns JSON/HTML *shape*
|
|
12
|
+
// normalization (stable key order, entity/whitespace, null/empty policy, publishDate
|
|
13
|
+
// coercion). refs owns *identity* portability. On PULL the pipeline is
|
|
14
|
+
// `canon.normalize(raw)` then `canonicalize(str, sourceRegistry)` — shape first, then
|
|
15
|
+
// strip per-account ids to logical tokens — and the result is what gets committed. On
|
|
16
|
+
// PUSH it is the inverse: `resolve(str, targetRegistry)` injects the target portal's
|
|
17
|
+
// ids, then the bytes are uploaded. Because both layers are pure string/JSON
|
|
18
|
+
// transforms with no I/O, they unit-test without network. A Registry is loaded/saved
|
|
19
|
+
// per account by the orchestrator (e.g. `.sync-state/<portalId>.refs.json`, gitignored)
|
|
20
|
+
// and is the single rawId<->logicalKey lookup for that account.
|
|
21
|
+
//
|
|
22
|
+
// Pure module: no fs, no fetch, no globals. Everything here is a pure function.
|
|
23
|
+
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Known portal ids confirmed in-repo. Used to recognise BARE portal ids and to
|
|
26
|
+
// validate that a remapped portal is plausible. Not a write-allowlist.
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
export const KNOWN_PORTALS = ['529456', '246389711'];
|
|
29
|
+
|
|
30
|
+
// A GUID as HubSpot emits it (lowercase hex, 8-4-4-4-12).
|
|
31
|
+
const GUID = '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}';
|
|
32
|
+
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// REF_PATTERNS — one regex per reference SHAPE. Each is global so `extractRefs`
|
|
35
|
+
// can enumerate every occurrence. `kind` names the logical namespace it feeds.
|
|
36
|
+
//
|
|
37
|
+
// IMPORTANT ordering note for canonicalize/resolve: `hubfsUrl` must be applied
|
|
38
|
+
// BEFORE `portalId`, because a hubfs URL contains a portal-id segment that we want
|
|
39
|
+
// folded into the single `@asset:<path>` token rather than separately tokenized.
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
export const REF_PATTERNS = {
|
|
42
|
+
// form_id field value, or a bare form GUID inside a form module body.
|
|
43
|
+
// Capture group 1 = the GUID.
|
|
44
|
+
formGuid: new RegExp(`"form_id"\\s*:\\s*"(${GUID})"`, 'g'),
|
|
45
|
+
|
|
46
|
+
// hbspt.cta.load(<portal>, '<guid>', {...}) — carries BOTH a portal id and a CTA
|
|
47
|
+
// guid. Group 1 = portal, group 2 = guid. The quote may be single OR double, and
|
|
48
|
+
// arbitrary whitespace may surround the args.
|
|
49
|
+
ctaLoad: new RegExp(`hbspt\\.cta\\.load\\(\\s*(\\d{5,})\\s*,\\s*['"](${GUID})['"]`, 'g'),
|
|
50
|
+
|
|
51
|
+
// Every other place a CTA guid appears: {{cta('guid')}} / {{ cta("guid") }} (single
|
|
52
|
+
// OR double quote, arbitrary whitespace inside the call), the "guid" body field,
|
|
53
|
+
// cta/redirect/<portal>/<guid>, pg=<guid>, hs-cta-<guid> ids, data-hs-img-pg.
|
|
54
|
+
// Group 1 = the GUID. (ctaLoad is handled separately for its portal arg.)
|
|
55
|
+
ctaGuid: new RegExp(
|
|
56
|
+
`(?:\\{\\{\\s*cta\\(\\s*['"]|"guid"\\s*:\\s*"|/cta/(?:redirect|default)/\\d{5,}/|[?&]pg=|hs-cta(?:-wrapper|-img|-ie-element|-node)?-|data-hs-img-pg="|hs-cta-)(${GUID})`,
|
|
57
|
+
'g',
|
|
58
|
+
),
|
|
59
|
+
|
|
60
|
+
// Hosted asset URL on any HubSpot file host. THREE path shapes occur in the corpus:
|
|
61
|
+
// 1. /hubfs/<portal>/<tail> (cdn2.hubspot.net, *.hubspotusercontent*)
|
|
62
|
+
// 2. /hub/<portal>/hubfs/<tail> (legacy File-Manager host path)
|
|
63
|
+
// 3. /hs-fs/hubfs/<tail> (theseventhsense.com — NO portal segment)
|
|
64
|
+
// and the portal-bearing variant /hs-fs/hubfs/<portal>/<tail>.
|
|
65
|
+
// Group 1 = portal (may be undefined for the portal-less /hs-fs/ shape),
|
|
66
|
+
// group 2 = the path tail (the stable, portable key). The tail is portal-agnostic.
|
|
67
|
+
// Hosts seen in corpus: cdn2.hubspot.net, <portal>.fs1.hubspotusercontent-naN.net,
|
|
68
|
+
// f.hubspotusercontent00.net, fs.hubspotusercontent00.net, www.theseventhsense.com.
|
|
69
|
+
hubfsUrl: new RegExp(
|
|
70
|
+
`https?://[a-z0-9.-]+/(?:hub/(\\d{5,})/hubfs|hs-fs/hubfs(?:/(\\d{5,}))?|hubfs/(\\d{5,}))/([^"'\\\\\\s),]+)`,
|
|
71
|
+
'g',
|
|
72
|
+
),
|
|
73
|
+
|
|
74
|
+
// Foreign image hosts that legacy blog bodies still embed (Google Docs paste-ins).
|
|
75
|
+
// These carry no portal but the opaque path IS a stable per-image identity, so we
|
|
76
|
+
// fold the WHOLE URL to a portable `@asset:googleusercontent/<blob>` token. Group 1
|
|
77
|
+
// = the opaque path tail. Hosts: lhN.googleusercontent.com.
|
|
78
|
+
googleUserContentUrl: new RegExp(
|
|
79
|
+
`https?://lh[0-9]+\\.googleusercontent\\.com/([^"'\\\\\\s),]+)`,
|
|
80
|
+
'g',
|
|
81
|
+
),
|
|
82
|
+
|
|
83
|
+
// A native/simple menu id (numeric). Group 1 = id. Defensive: confirmed shape in
|
|
84
|
+
// HubSpot menu modules though this corpus's simple_menu modules are link-based.
|
|
85
|
+
menuId: new RegExp(`"menu_?[iI]d"\\s*:\\s*"?(\\d{5,})"?`, 'g'),
|
|
86
|
+
|
|
87
|
+
// A BARE portal id anywhere else (after assets/ctas have been consumed). Group 1 =
|
|
88
|
+
// portal. Word-bounded so it doesn't bite into a longer number.
|
|
89
|
+
portalId: new RegExp(`\\b(${KNOWN_PORTALS.join('|')})\\b`, 'g'),
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
// Logical token grammar. A token is `@<kind>:<key>` or the bare `@portal` sentinel.
|
|
93
|
+
// Keys are slug-safe; `@asset:` keys keep their path (slashes allowed).
|
|
94
|
+
const TOKEN = {
|
|
95
|
+
form: (key) => `@form:${key}`,
|
|
96
|
+
cta: (key) => `@cta:${key}`,
|
|
97
|
+
asset: (key) => `@asset:${key}`,
|
|
98
|
+
menu: (key) => `@menu:${key}`,
|
|
99
|
+
portal: () => `@portal`,
|
|
100
|
+
};
|
|
101
|
+
// Matches any logical token we emit, for resolve() to scan/replace/validate.
|
|
102
|
+
// `@asset:` allows `/` and `.`; others are slug-ish.
|
|
103
|
+
const TOKEN_RE = /@(form|cta|menu):([A-Za-z0-9_-]+)|@asset:([^\s"'\\),]+)|@portal\b/g;
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Registry — per-account map of logicalKey<->rawId, one sub-map per namespace.
|
|
107
|
+
// `forms`/`ctas`/`menus`: { logicalKey: rawGuidOrId }.
|
|
108
|
+
// `assets`: { logicalKey(=pathTail): true } — assets are keyed by their own path,
|
|
109
|
+
// so no id table is needed; presence is the mapping.
|
|
110
|
+
// `portalId`: the account's numeric portal id (for `@portal` resolution).
|
|
111
|
+
// We also build reverse indexes lazily for canonicalize().
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
/** An empty registry skeleton. */
|
|
115
|
+
export function emptyRegistry(portalId = null) {
|
|
116
|
+
return { portalId: portalId == null ? null : String(portalId), forms: {}, ctas: {}, menus: {}, assets: {} };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/** Normalize/clone a loaded registry object into the canonical shape. */
|
|
120
|
+
export function loadRegistry(obj = {}) {
|
|
121
|
+
const r = emptyRegistry(obj.portalId ?? null);
|
|
122
|
+
for (const ns of ['forms', 'ctas', 'menus', 'assets']) {
|
|
123
|
+
if (obj[ns] && typeof obj[ns] === 'object') Object.assign(r[ns], obj[ns]);
|
|
124
|
+
}
|
|
125
|
+
return r;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Serialize to a plain, stably-ordered object (composes with canon.stableStringify). */
|
|
129
|
+
export function saveRegistry(reg) {
|
|
130
|
+
return {
|
|
131
|
+
portalId: reg.portalId == null ? null : String(reg.portalId),
|
|
132
|
+
forms: { ...reg.forms },
|
|
133
|
+
ctas: { ...reg.ctas },
|
|
134
|
+
menus: { ...reg.menus },
|
|
135
|
+
assets: { ...reg.assets },
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const NS_FOR_KIND = {
|
|
140
|
+
formGuid: 'forms',
|
|
141
|
+
ctaGuid: 'ctas',
|
|
142
|
+
ctaLoad: 'ctas',
|
|
143
|
+
menuId: 'menus',
|
|
144
|
+
hubfsUrl: 'assets',
|
|
145
|
+
googleUserContentUrl: 'assets',
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// hubfsUrl has three alternative portal capture groups (one per path shape) plus the
|
|
149
|
+
// tail. Collapse a regex match into a stable { portal, tail } pair. `portal` may be
|
|
150
|
+
// undefined for the portal-less /hs-fs/hubfs/<tail> shape.
|
|
151
|
+
function hubfsParts(m) {
|
|
152
|
+
const portal = m[1] || m[2] || m[3]; // /hub/, /hs-fs/.../<portal>, or /hubfs/<portal>
|
|
153
|
+
const tail = m[4];
|
|
154
|
+
return { portal, tail };
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Build a rawId->logicalKey reverse index for a namespace, memoized on the registry.
|
|
158
|
+
function reverseIndex(reg, ns) {
|
|
159
|
+
const cacheKey = `__rev_${ns}`;
|
|
160
|
+
if (reg[cacheKey]) return reg[cacheKey];
|
|
161
|
+
const rev = Object.create(null);
|
|
162
|
+
for (const [logical, raw] of Object.entries(reg[ns] || {})) rev[String(raw)] = logical;
|
|
163
|
+
// non-enumerable so it doesn't leak into saveRegistry / stableStringify
|
|
164
|
+
Object.defineProperty(reg, cacheKey, { value: rev, enumerable: false, configurable: true });
|
|
165
|
+
return rev;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// ---------------------------------------------------------------------------
|
|
169
|
+
// extractRefs(str) -> [{ kind, rawId, match }]
|
|
170
|
+
// Enumerates EVERY reference occurrence across all shapes. `kind` is the pattern
|
|
171
|
+
// name; `rawId` is the per-account id (guid / portal / asset path); `match` is the
|
|
172
|
+
// full matched substring (useful for callers that want to locate/replace in place).
|
|
173
|
+
// Order of kinds mirrors the canonicalize precedence (asset before bare portal).
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
export function extractRefs(str) {
|
|
176
|
+
if (typeof str !== 'string' || str.length === 0) return [];
|
|
177
|
+
const out = [];
|
|
178
|
+
const push = (kind, rawId, match) => out.push({ kind, rawId, match });
|
|
179
|
+
|
|
180
|
+
// formGuid
|
|
181
|
+
for (const m of str.matchAll(REF_PATTERNS.formGuid)) push('formGuid', m[1], m[0]);
|
|
182
|
+
|
|
183
|
+
// ctaLoad — yields a cta guid AND a portal id
|
|
184
|
+
for (const m of str.matchAll(REF_PATTERNS.ctaLoad)) {
|
|
185
|
+
push('ctaLoad', m[2], m[0]); // the CTA guid (logical namespace = ctas)
|
|
186
|
+
push('portalId', m[1], m[1]); // its portal arg
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// ctaGuid (all other cta-guid shapes)
|
|
190
|
+
for (const m of str.matchAll(REF_PATTERNS.ctaGuid)) push('ctaGuid', m[1], m[0]);
|
|
191
|
+
|
|
192
|
+
// hubfsUrl — asset path tail is the rawId; record the embedded portal too (if any)
|
|
193
|
+
for (const m of str.matchAll(REF_PATTERNS.hubfsUrl)) {
|
|
194
|
+
const { portal, tail } = hubfsParts(m);
|
|
195
|
+
push('hubfsUrl', tail, m[0]); // rawId = portal-agnostic path tail
|
|
196
|
+
if (portal) push('portalId', portal, portal);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// googleUserContentUrl — foreign-host image; key by the opaque path tail
|
|
200
|
+
for (const m of str.matchAll(REF_PATTERNS.googleUserContentUrl)) {
|
|
201
|
+
push('googleUserContentUrl', `googleusercontent/${m[1]}`, m[0]);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// menuId
|
|
205
|
+
for (const m of str.matchAll(REF_PATTERNS.menuId)) push('menuId', m[1], m[0]);
|
|
206
|
+
|
|
207
|
+
// bare portalId (anywhere)
|
|
208
|
+
for (const m of str.matchAll(REF_PATTERNS.portalId)) push('portalId', m[1], m[0]);
|
|
209
|
+
|
|
210
|
+
return out;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ---------------------------------------------------------------------------
|
|
214
|
+
// toLogical(kind, rawId, registry) -> logical token string
|
|
215
|
+
// Maps a raw per-account id to its portable logical token using the registry's
|
|
216
|
+
// rawId->logicalKey reverse index. For assets the rawId IS the path tail, which is
|
|
217
|
+
// already portable, so the registry only needs to record it (auto-registered).
|
|
218
|
+
// Throws if a registry mapping is required but missing (forms/ctas/menus): pull-time
|
|
219
|
+
// auto-registration is the caller's job via registerRef(); a hard miss here means a
|
|
220
|
+
// caller asked to logicalize an unregistered id.
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
export function toLogical(kind, rawId, registry) {
|
|
223
|
+
if (kind === 'portalId') return TOKEN.portal();
|
|
224
|
+
if (kind === 'hubfsUrl' || kind === 'googleUserContentUrl') return TOKEN.asset(String(rawId));
|
|
225
|
+
const ns = NS_FOR_KIND[kind];
|
|
226
|
+
if (!ns) throw new Error(`toLogical: unknown kind ${kind}`);
|
|
227
|
+
const rev = reverseIndex(registry, ns);
|
|
228
|
+
const logical = rev[String(rawId)];
|
|
229
|
+
if (logical == null) {
|
|
230
|
+
throw new Error(`toLogical: no logical key for ${kind} ${rawId} in registry (call registerRef on pull first)`);
|
|
231
|
+
}
|
|
232
|
+
return TOKEN[ns === 'forms' ? 'form' : ns === 'ctas' ? 'cta' : 'menu'](logical);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// ---------------------------------------------------------------------------
|
|
236
|
+
// registerRef — pull-time helper: ensure a rawId has a logical key in the registry,
|
|
237
|
+
// minting a deterministic key if absent. Returns the logical key. Assets register by
|
|
238
|
+
// their path tail. This is what makes canonicalize() succeed on first pull.
|
|
239
|
+
// ---------------------------------------------------------------------------
|
|
240
|
+
export function registerRef(reg, kind, rawId, logicalKey = null) {
|
|
241
|
+
if (kind === 'portalId') {
|
|
242
|
+
if (reg.portalId == null) reg.portalId = String(rawId);
|
|
243
|
+
return null;
|
|
244
|
+
}
|
|
245
|
+
if (kind === 'hubfsUrl' || kind === 'googleUserContentUrl') {
|
|
246
|
+
reg.assets[String(rawId)] = true;
|
|
247
|
+
delete reg.__rev_assets;
|
|
248
|
+
return String(rawId);
|
|
249
|
+
}
|
|
250
|
+
const ns = NS_FOR_KIND[kind];
|
|
251
|
+
if (!ns) throw new Error(`registerRef: unknown kind ${kind}`);
|
|
252
|
+
const rev = reverseIndex(reg, ns);
|
|
253
|
+
if (rev[String(rawId)] != null) return rev[String(rawId)];
|
|
254
|
+
const key = logicalKey || mintKey(ns, rawId);
|
|
255
|
+
reg[ns][key] = String(rawId);
|
|
256
|
+
delete reg[`__rev_${ns}`];
|
|
257
|
+
return key;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Deterministic fallback logical key when the caller has no human-friendly name yet.
|
|
261
|
+
function mintKey(ns, rawId) {
|
|
262
|
+
const short = String(rawId).replace(/-/g, '').slice(0, 8);
|
|
263
|
+
return `${ns.slice(0, -1)}-${short}`; // forms->form-xxxx, ctas->cta-xxxx, menus->menu-xxxx
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// ---------------------------------------------------------------------------
|
|
267
|
+
// canonicalize(str, registry) -> portable str with raw refs replaced by tokens.
|
|
268
|
+
// PRECEDENCE (critical for reversibility):
|
|
269
|
+
// 1. hubfsUrl → @asset:<path> (consumes the portal segment inside the URL)
|
|
270
|
+
// 2. ctaLoad → hbspt.cta.load(@portal,'@cta:key', (portal + guid together)
|
|
271
|
+
// 3. formGuid → "form_id": "@form:key"
|
|
272
|
+
// 4. ctaGuid → @cta:key (all remaining cta-guid shapes)
|
|
273
|
+
// 5. menuId → @menu:key
|
|
274
|
+
// 6. portalId → @portal (any remaining bare portal id)
|
|
275
|
+
// Auto-registers any ref it has not seen so first pull is self-bootstrapping.
|
|
276
|
+
// ---------------------------------------------------------------------------
|
|
277
|
+
export function canonicalize(str, registry) {
|
|
278
|
+
if (typeof str !== 'string' || str.length === 0) return str;
|
|
279
|
+
let s = str;
|
|
280
|
+
|
|
281
|
+
// 1. hosted asset URLs -> @asset:<pathTail> (host + portal collapse into the token).
|
|
282
|
+
// All three HubSpot path shapes (/hubfs/<portal>/, /hub/<portal>/hubfs/,
|
|
283
|
+
// /hs-fs/hubfs/[<portal>/]) fold to the same portal-agnostic tail.
|
|
284
|
+
s = s.replace(REF_PATTERNS.hubfsUrl, (...args) => {
|
|
285
|
+
const m = args.slice(0, 5); // [whole, g1, g2, g3, g4]
|
|
286
|
+
const { tail } = hubfsParts(m);
|
|
287
|
+
const key = registerRef(registry, 'hubfsUrl', tail);
|
|
288
|
+
return TOKEN.asset(key);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
// 1b. foreign googleusercontent image URLs -> @asset:googleusercontent/<blob>
|
|
292
|
+
s = s.replace(REF_PATTERNS.googleUserContentUrl, (_m, blob) => {
|
|
293
|
+
const key = registerRef(registry, 'googleUserContentUrl', `googleusercontent/${blob}`);
|
|
294
|
+
return TOKEN.asset(key);
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// 2. hbspt.cta.load(<portal>,'<guid>' -> hbspt.cta.load(@portal,'@cta:key'
|
|
298
|
+
s = s.replace(REF_PATTERNS.ctaLoad, (whole, portal, guid) => {
|
|
299
|
+
registerRef(registry, 'portalId', portal);
|
|
300
|
+
const key = registerRef(registry, 'ctaGuid', guid);
|
|
301
|
+
return whole
|
|
302
|
+
.replace(portal, TOKEN.portal())
|
|
303
|
+
.replace(guid, TOKEN.cta(key));
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
// 3. form_id field
|
|
307
|
+
s = s.replace(REF_PATTERNS.formGuid, (_m, guid) => {
|
|
308
|
+
const key = registerRef(registry, 'formGuid', guid);
|
|
309
|
+
return `"form_id": "${TOKEN.form(key)}"`;
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
// 4. all remaining cta-guid shapes -> swap just the guid for @cta:key in place
|
|
313
|
+
s = s.replace(REF_PATTERNS.ctaGuid, (whole, guid) => {
|
|
314
|
+
const key = registerRef(registry, 'ctaGuid', guid);
|
|
315
|
+
return whole.replace(guid, TOKEN.cta(key));
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
// 5. menu ids
|
|
319
|
+
s = s.replace(REF_PATTERNS.menuId, (whole, id) => {
|
|
320
|
+
const key = registerRef(registry, 'menuId', id);
|
|
321
|
+
return whole.replace(id, TOKEN.menu(key));
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
// 6. any remaining bare portal id
|
|
325
|
+
s = s.replace(REF_PATTERNS.portalId, (m) => {
|
|
326
|
+
registerRef(registry, 'portalId', m);
|
|
327
|
+
return TOKEN.portal();
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
return s;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// ---------------------------------------------------------------------------
|
|
334
|
+
// resolve(str, targetRegistry) -> str with logical tokens replaced by the TARGET
|
|
335
|
+
// account's ids. THROWS (push must hard-fail) listing every logical token that has
|
|
336
|
+
// no target mapping. This is the inverse of canonicalize() and the round-trip
|
|
337
|
+
// guarantee: canonicalize(x, src) then resolve(.., tgt) reproduces x byte-for-byte
|
|
338
|
+
// when src and tgt carry the same rawIds for the same logical keys.
|
|
339
|
+
// ---------------------------------------------------------------------------
|
|
340
|
+
export function resolve(str, targetRegistry) {
|
|
341
|
+
if (typeof str !== 'string' || str.length === 0) return str;
|
|
342
|
+
const missing = [];
|
|
343
|
+
|
|
344
|
+
const out = str.replace(TOKEN_RE, (token, kind, key, assetKey) => {
|
|
345
|
+
if (token === '@portal') {
|
|
346
|
+
if (targetRegistry.portalId == null) {
|
|
347
|
+
missing.push('@portal');
|
|
348
|
+
return token;
|
|
349
|
+
}
|
|
350
|
+
return String(targetRegistry.portalId);
|
|
351
|
+
}
|
|
352
|
+
if (assetKey != null) {
|
|
353
|
+
// @asset:<pathTail> -> the target's hosted URL for that path. The target
|
|
354
|
+
// registry's assets map records the path; the rehosted URL is supplied via a
|
|
355
|
+
// resolver hook so this module stays pure/host-agnostic. By default we throw if
|
|
356
|
+
// the path isn't registered for the target.
|
|
357
|
+
const entry = targetRegistry.assets ? targetRegistry.assets[assetKey] : undefined;
|
|
358
|
+
if (entry == null) {
|
|
359
|
+
missing.push(`@asset:${assetKey}`);
|
|
360
|
+
return token;
|
|
361
|
+
}
|
|
362
|
+
// entry may be `true` (path known, URL built by caller) or a concrete URL string.
|
|
363
|
+
return typeof entry === 'string' ? entry : token;
|
|
364
|
+
}
|
|
365
|
+
const ns = kind === 'form' ? 'forms' : kind === 'cta' ? 'ctas' : 'menus';
|
|
366
|
+
const raw = targetRegistry[ns] ? targetRegistry[ns][key] : undefined;
|
|
367
|
+
if (raw == null) {
|
|
368
|
+
missing.push(`@${kind}:${key}`);
|
|
369
|
+
return token;
|
|
370
|
+
}
|
|
371
|
+
return String(raw);
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
if (missing.length) {
|
|
375
|
+
const uniq = [...new Set(missing)].sort();
|
|
376
|
+
throw new Error(
|
|
377
|
+
`resolve: ${uniq.length} logical ref(s) have no mapping in target portal ` +
|
|
378
|
+
`${targetRegistry.portalId ?? '(unknown)'} — push must not proceed: ${uniq.join(', ')}`,
|
|
379
|
+
);
|
|
380
|
+
}
|
|
381
|
+
return out;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// ---------------------------------------------------------------------------
|
|
385
|
+
// listLogicalTokens(str) -> [{ kind, key, token }] — pure inspection helper used by
|
|
386
|
+
// corpus tests (assert no raw portal ids/GUIDs survive) and by push preflight to
|
|
387
|
+
// pre-validate mappings before any network write.
|
|
388
|
+
// ---------------------------------------------------------------------------
|
|
389
|
+
export function listLogicalTokens(str) {
|
|
390
|
+
if (typeof str !== 'string') return [];
|
|
391
|
+
const out = [];
|
|
392
|
+
for (const m of str.matchAll(TOKEN_RE)) {
|
|
393
|
+
if (m[0] === '@portal') out.push({ kind: 'portal', key: null, token: '@portal' });
|
|
394
|
+
else if (m[3] != null) out.push({ kind: 'asset', key: m[3], token: m[0] });
|
|
395
|
+
else out.push({ kind: m[1], key: m[2], token: m[0] });
|
|
396
|
+
}
|
|
397
|
+
return out;
|
|
398
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
// sync/lib/sync-state.mjs — per-account registry persistence + content tree root.
|
|
2
|
+
//
|
|
3
|
+
// The Registry (refs.mjs) is PER ACCOUNT and lives in the GITIGNORED .sync-state/
|
|
4
|
+
// directory at the repo root, one file per portal:
|
|
5
|
+
//
|
|
6
|
+
// .sync-state/<portalId>.registry.json
|
|
7
|
+
//
|
|
8
|
+
// It holds the logical-key <-> per-account-id mapping (forms/ctas/menus GUIDs, asset
|
|
9
|
+
// paths, the portal id) that PULL auto-registers and PUSH resolves. It is never
|
|
10
|
+
// committed (see .gitignore `.sync-state/`), because it is account-specific identity,
|
|
11
|
+
// not portable canonical content.
|
|
12
|
+
//
|
|
13
|
+
// This module owns the load/init + save of that file (composing refs.emptyRegistry /
|
|
14
|
+
// loadRegistry / saveRegistry with canonical.stableStringify for diff-stable bytes)
|
|
15
|
+
// and exposes the canonical content/ tree root the adapters write into.
|
|
16
|
+
|
|
17
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync, renameSync } from 'node:fs';
|
|
18
|
+
import { join } from 'node:path';
|
|
19
|
+
|
|
20
|
+
import { emptyRegistry, loadRegistry, saveRegistry } from './refs.mjs';
|
|
21
|
+
import { stableStringify } from './canonical.mjs';
|
|
22
|
+
import { loadConfigSyncFallback } from '../config.mjs';
|
|
23
|
+
|
|
24
|
+
function fallbackConfig() {
|
|
25
|
+
return loadConfigSyncFallback();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Repo-root canonical content tree (content/...). */
|
|
29
|
+
export function contentDir(cfg = fallbackConfig()) {
|
|
30
|
+
return cfg.contentDirPath || join(cfg.root || process.cwd(), cfg.contentDir || 'content');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Gitignored per-account state directory (.sync-state/). */
|
|
34
|
+
export function syncStateDir(cfg = fallbackConfig()) {
|
|
35
|
+
return cfg.syncStateDirPath || join(cfg.root || process.cwd(), cfg.syncStateDir || '.sync-state');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** Path to a portal's registry file. */
|
|
39
|
+
export function registryPath(portalId, cfg = fallbackConfig()) {
|
|
40
|
+
return join(syncStateDir(cfg), `${String(portalId)}.registry.json`);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* loadAccountRegistry(portalId) -> Registry
|
|
45
|
+
*
|
|
46
|
+
* Load .sync-state/<portalId>.registry.json if present, else initialize an empty
|
|
47
|
+
* registry seeded with this account's portalId (so @portal resolves even on a first
|
|
48
|
+
* push). Always returns a registry whose `portalId` is set to the given portal.
|
|
49
|
+
*/
|
|
50
|
+
export function loadAccountRegistry(portalId, cfg = fallbackConfig()) {
|
|
51
|
+
const pid = String(portalId);
|
|
52
|
+
const file = registryPath(pid, cfg);
|
|
53
|
+
let reg;
|
|
54
|
+
if (existsSync(file)) {
|
|
55
|
+
try {
|
|
56
|
+
reg = loadRegistry(JSON.parse(readFileSync(file, 'utf8')));
|
|
57
|
+
} catch (e) {
|
|
58
|
+
throw new Error(`Corrupt registry ${file}: ${e.message}`);
|
|
59
|
+
}
|
|
60
|
+
} else {
|
|
61
|
+
reg = emptyRegistry(pid);
|
|
62
|
+
}
|
|
63
|
+
// The registry MUST carry this account's portal id (it may be absent in an
|
|
64
|
+
// older/empty file). Force it to the account we're operating on.
|
|
65
|
+
reg.portalId = pid;
|
|
66
|
+
return reg;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* persistAccountRegistry(portalId, registry) -> void
|
|
71
|
+
*
|
|
72
|
+
* Serialize the registry to .sync-state/<portalId>.registry.json (creating the
|
|
73
|
+
* gitignored dir as needed), via saveRegistry (drops memoized reverse indexes) +
|
|
74
|
+
* stableStringify (sorted keys, trailing newline) for a stable file.
|
|
75
|
+
*/
|
|
76
|
+
export function persistAccountRegistry(portalId, registry, cfg = fallbackConfig()) {
|
|
77
|
+
const dir = syncStateDir(cfg);
|
|
78
|
+
mkdirSync(dir, { recursive: true });
|
|
79
|
+
// Write to a temp file then atomically rename, so a crash mid-write can never
|
|
80
|
+
// leave a half-written (corrupt) registry — the live file is always either the
|
|
81
|
+
// old complete version or the new complete version.
|
|
82
|
+
const final = registryPath(portalId, cfg);
|
|
83
|
+
const tmp = `${final}.tmp-${process.pid}`;
|
|
84
|
+
writeFileSync(tmp, stableStringify(saveRegistry(registry)));
|
|
85
|
+
renameSync(tmp, final);
|
|
86
|
+
}
|