hubspot-cms-sync 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +52 -0
  3. package/bin/hubspot-cms-sync.mjs +115 -0
  4. package/docs/CONFIGURATION.md +83 -0
  5. package/docs/GITHUB_ACTIONS.md +70 -0
  6. package/docs/MIGRATION_PLAN.md +361 -0
  7. package/docs/PLAN_REVIEW.md +42 -0
  8. package/docs/SKILL_DISTRIBUTION.md +79 -0
  9. package/examples/github-actions/ci.yml +56 -0
  10. package/examples/github-actions/preview.yml +71 -0
  11. package/examples/github-actions/publish.yml +82 -0
  12. package/examples/hubspot-cms-sync.config.mjs +45 -0
  13. package/examples/site.manifest.json +19 -0
  14. package/package.json +41 -0
  15. package/skill/SKILL.md +54 -0
  16. package/skill/references/commands.md +54 -0
  17. package/skill/references/config.md +25 -0
  18. package/skill/references/failures.md +58 -0
  19. package/skill/references/github-actions.md +56 -0
  20. package/skill/references/screenshots-and-fidelity.md +33 -0
  21. package/src/adapters/assets.mjs +576 -0
  22. package/src/adapters/blog.mjs +921 -0
  23. package/src/adapters/content.mjs +213 -0
  24. package/src/adapters/forms.mjs +569 -0
  25. package/src/adapters/pages.mjs +463 -0
  26. package/src/adapters/theme.mjs +503 -0
  27. package/src/config.mjs +113 -0
  28. package/src/corpus-scan.mjs +248 -0
  29. package/src/cta-inventory.mjs +352 -0
  30. package/src/index.mjs +3 -0
  31. package/src/lib/canonical.mjs +234 -0
  32. package/src/lib/hub.mjs +197 -0
  33. package/src/lib/orchestrate.mjs +141 -0
  34. package/src/lib/refs.mjs +398 -0
  35. package/src/lib/sync-state.mjs +86 -0
  36. package/src/manifest.mjs +353 -0
  37. package/src/preflight.mjs +385 -0
  38. package/src/pull.mjs +99 -0
  39. package/src/push.mjs +354 -0
  40. package/src/republish.mjs +102 -0
@@ -0,0 +1,248 @@
1
+ #!/usr/bin/env node
2
+ // scripts/corpus-scan.mjs — CORPUS SCAN: guards the committed canonical content tree
3
+ // against NON-PORTABLE values that would break a push into a fresh HubSpot account.
4
+ //
5
+ // Stage 1-3 (sync/lib/refs.mjs) canonicalize per-account ids into LOGICAL tokens
6
+ // (@form / @cta / @asset / @menu / @portal) on pull, and resolve() injects the target
7
+ // account's ids on push — HARD-FAILING if a token has no target mapping. That round-trip
8
+ // only holds if the committed tree contains tokens, not raw ids. This scanner is the
9
+ // invariant check: it walks content/** (the canonical store) plus templates/, modules/
10
+ // and js/ (which embed refs in hand-authored HubL/JS) and FLAGS every forbidden literal:
11
+ //
12
+ // - literal portal ids 529456 (prod) / 246389711 (dev)
13
+ // - raw form GUIDs "form_id": "<guid>"
14
+ // - hosted asset URLs https://…/hubfs/<portal>/… + *.hubspotusercontent*
15
+ // - hbspt.cta.load(<portal>,…) untokenized CTA loader
16
+ // - {{cta('<guid>')}} untokenized CTA shortcode
17
+ // - bare CTA GUIDs "guid":"<guid>", cta/redirect/…, pg=<guid>, hs-cta-<guid>
18
+ // - page / blog / module numeric ids "id": <bigint>, contentId, etc.
19
+ //
20
+ // A line is CLEAN if the only refs on it are @logical tokens. The scan is PURE (string +
21
+ // fs walk, no API) and exported as `scan(dir)` so the node:test suite can drive it over
22
+ // deterministic fixtures rather than the live (still-dirty) tree.
23
+ //
24
+ // node scripts/corpus-scan.mjs [dir...] # default dirs: content templates modules js
25
+ // exit 0 = clean, 1 = forbidden values found
26
+ //
27
+ // NOTE: today's content/ still holds ~145 raw junk pages; running the CLI documents that
28
+ // debt. The TEST uses fixtures (test/integration/corpus.test.mjs) so it stays deterministic.
29
+
30
+ import { readdirSync, statSync, readFileSync } from 'node:fs';
31
+ import { join, relative, extname } from 'node:path';
32
+
33
+ import { KNOWN_PORTALS } from './lib/refs.mjs';
34
+
35
+ const GUID = '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}';
36
+ const PORTALS = KNOWN_PORTALS.join('|'); // 529456|246389711
37
+
38
+ export const DEFAULT_DIRS = ['content', 'templates', 'modules', 'js'];
39
+ const SCAN_EXT = new Set(['.json', '.html', '.js', '.mjs', '.css', '.hubl', '.txt', '.md']);
40
+
41
+ // ---------------------------------------------------------------------------
42
+ // RULES — each has an id, a human label, and a `find(line)` that yields the
43
+ // offending substring(s). Rules describe NON-PORTABLE shapes; anything that is
44
+ // already an `@logical` token is, by construction, not matched by these.
45
+ //
46
+ // Ordering matters only for which rule "claims" a match in the report; a single
47
+ // bad substring may satisfy several rules, which is fine — we de-dupe per line by
48
+ // the matched text so a hosted-URL hit isn't double-counted as a bare-portal hit.
49
+ // ---------------------------------------------------------------------------
50
+ export const RULES = [
51
+ {
52
+ id: 'hosted-asset-url',
53
+ label: 'hosted hubfs/hubspotusercontent asset URL (use @asset:<path>)',
54
+ re: new RegExp(
55
+ // Any HubSpot file host path shape: /hubfs/<portal>/, /hub/<portal>/hubfs/,
56
+ // /hs-fs/hubfs/[<portal>/] (theseventhsense.com has NO portal segment), OR any
57
+ // hubspotusercontent host. Group is the whole URL up to a delimiter.
58
+ `https?://[a-z0-9.-]+/(?:hub/\\d{5,}/hubfs|hs-fs/hubfs(?:/\\d{5,})?|hubfs/\\d{5,})/[^"'\\\\\\s),]+` +
59
+ `|https?://[a-z0-9.-]*hubspotusercontent[a-z0-9.-]*/[^"'\\\\\\s),]+`,
60
+ 'gi',
61
+ ),
62
+ },
63
+ {
64
+ id: 'googleusercontent-url',
65
+ label: 'foreign googleusercontent image URL (use @asset:googleusercontent/<blob>)',
66
+ re: new RegExp(`https?://lh[0-9]+\\.googleusercontent\\.com/[^"'\\\\\\s),]+`, 'gi'),
67
+ },
68
+ {
69
+ id: 'cta-load',
70
+ label: 'hbspt.cta.load(<portal>,…) (use hbspt.cta.load(@portal,\'@cta:key\'…))',
71
+ re: new RegExp(`hbspt\\.cta\\.load\\(\\s*\\d{5,}\\s*,\\s*['"]${GUID}['"]`, 'gi'),
72
+ },
73
+ {
74
+ id: 'cta-shortcode',
75
+ label: "untokenized {{cta('<guid>')}} / {{ cta(\"<guid>\") }} (use {{cta('@cta:key')}})",
76
+ re: new RegExp(`\\{\\{\\s*cta\\(\\s*['"]${GUID}['"]`, 'gi'),
77
+ },
78
+ {
79
+ id: 'form-guid',
80
+ label: 'raw form GUID in "form_id" (use @form:key)',
81
+ re: new RegExp(`"form_id"\\s*:\\s*"${GUID}"`, 'gi'),
82
+ },
83
+ {
84
+ id: 'cta-guid',
85
+ label: 'bare CTA GUID (use @cta:key)',
86
+ re: new RegExp(
87
+ `(?:"guid"\\s*:\\s*"|/cta/(?:redirect|default)/\\d{5,}/|[?&]pg=|hs-cta(?:-wrapper|-img|-ie-element|-node)?-|data-hs-img-pg=")${GUID}`,
88
+ 'gi',
89
+ ),
90
+ },
91
+ {
92
+ id: 'portal-id',
93
+ label: 'literal portal id (use @portal)',
94
+ re: new RegExp(`\\b(?:${PORTALS})\\b`, 'g'),
95
+ },
96
+ {
97
+ id: 'numeric-content-id',
98
+ label: 'numeric page/blog/module id (canonical store must key by slug/path, not id)',
99
+ // Long numeric ids assigned to id-bearing JSON keys. HubSpot ids are >= 10 digits;
100
+ // we bound at 8+ to also catch shorter legacy ids while not biting small counts.
101
+ // Match only a COMPLETE numeric id value — a quoted run of digits with the
102
+ // closing quote right after (`"id": "4937909260"`), or an unquoted number at a
103
+ // value boundary (`"module_id": 1730194537`). The closing-quote/boundary
104
+ // requirement avoids false-positives on field-definition UUIDs whose all-decimal
105
+ // 8-hex prefix would otherwise match (`"id": "85836571-317e-..."`).
106
+ re: new RegExp(
107
+ `"(?:id|contentId|content_id|pageId|page_id|blogId|blog_id|moduleId|module_id|parentId|parent_id|portalId|portal_id|formId|menuId|themeId|groupId)"\\s*:\\s*"?(\\d{8,})"?(?![\\d-])`,
108
+ 'g',
109
+ ),
110
+ },
111
+ ];
112
+
113
+ // `@logical` token grammar (mirrors refs.mjs TOKEN_RE). A line carrying ONLY these
114
+ // for its identity is portable and must NOT be flagged.
115
+ const TOKEN_RE = /@(?:form|cta|menu):[A-Za-z0-9_-]+|@asset:[^\s"'\\)]+|@portal\b/g;
116
+
117
+ // ---------------------------------------------------------------------------
118
+ // scanText(text, file) -> [{ file, line, rule, match }]
119
+ // Pure: runs each rule line-by-line. A match that is wholly inside an `@logical`
120
+ // token span is ignored (defensive — tokens never contain raw ids, but e.g. an
121
+ // @asset:path could in theory embed digits). Per (line, matchText) pairs are
122
+ // de-duplicated, preferring the most specific rule (RULES order).
123
+ // ---------------------------------------------------------------------------
124
+ export function scanText(text, file = '<text>') {
125
+ if (typeof text !== 'string' || text.length === 0) return [];
126
+ const lines = text.split('\n');
127
+ const out = [];
128
+
129
+ for (let i = 0; i < lines.length; i++) {
130
+ const line = lines[i];
131
+ if (line.length === 0) continue;
132
+
133
+ // Spans covered by an @logical token — matches inside these are portable.
134
+ const tokenSpans = [];
135
+ for (const t of line.matchAll(TOKEN_RE)) tokenSpans.push([t.index, t.index + t[0].length]);
136
+ const inToken = (start, end) =>
137
+ tokenSpans.some(([s, e]) => start >= s && end <= e);
138
+
139
+ const seen = new Set(); // matchText already claimed on this line (most-specific wins)
140
+ for (const rule of RULES) {
141
+ for (const m of line.matchAll(rule.re)) {
142
+ const start = m.index;
143
+ const end = start + m[0].length;
144
+ if (inToken(start, end)) continue;
145
+ if (seen.has(m[0])) continue;
146
+ // For portal-id, skip a hit that is actually part of a longer match a more
147
+ // specific rule already claimed (e.g. the portal inside a hosted URL).
148
+ if (rule.id === 'portal-id' && [...seen].some((s) => s.includes(m[0]))) continue;
149
+ seen.add(m[0]);
150
+ out.push({ file, line: i + 1, rule: rule.id, label: rule.label, match: m[0] });
151
+ }
152
+ }
153
+ }
154
+ return out;
155
+ }
156
+
157
+ // ---------------------------------------------------------------------------
158
+ // walk(dir) -> [absolute file paths] of scannable files. Skips dot-dirs,
159
+ // node_modules, and the gitignored .sync-state. Pure-ish (fs reads only).
160
+ // ---------------------------------------------------------------------------
161
+ export function walk(dir) {
162
+ const files = [];
163
+ let entries;
164
+ try {
165
+ entries = readdirSync(dir);
166
+ } catch {
167
+ return files; // missing dir is not an error — caller passes a default set
168
+ }
169
+ for (const name of entries) {
170
+ if (name.startsWith('.')) continue;
171
+ if (name === 'node_modules') continue;
172
+ const full = join(dir, name);
173
+ let st;
174
+ try {
175
+ st = statSync(full);
176
+ } catch {
177
+ continue;
178
+ }
179
+ if (st.isDirectory()) files.push(...walk(full));
180
+ else if (SCAN_EXT.has(extname(name))) files.push(full);
181
+ }
182
+ return files;
183
+ }
184
+
185
+ // ---------------------------------------------------------------------------
186
+ // scan(dir | [dirs]) -> { findings, files, scanned }
187
+ // Walks the given root(s), scans each file, returns all findings with paths
188
+ // relative to the first root for stable, portable output. PURE wrt API (no network).
189
+ // ---------------------------------------------------------------------------
190
+ export function scan(dirs = DEFAULT_DIRS) {
191
+ const roots = Array.isArray(dirs) ? dirs : [dirs];
192
+ const base = roots[0];
193
+ const findings = [];
194
+ const files = [];
195
+ for (const root of roots) {
196
+ for (const f of walk(root)) {
197
+ files.push(f);
198
+ let text;
199
+ try {
200
+ text = readFileSync(f, 'utf8');
201
+ } catch {
202
+ continue;
203
+ }
204
+ const rel = relative(base, f) || f;
205
+ findings.push(...scanText(text, rel.startsWith('..') ? f : rel));
206
+ }
207
+ }
208
+ return { findings, files: findings.length ? [...new Set(findings.map((x) => x.file))] : [], scanned: files.length };
209
+ }
210
+
211
+ // ---------------------------------------------------------------------------
212
+ // CLI
213
+ // ---------------------------------------------------------------------------
214
+ function main(argv) {
215
+ const dirs = argv.length ? argv : DEFAULT_DIRS;
216
+ const { findings, scanned } = scan(dirs);
217
+ if (!findings.length) {
218
+ console.log(`corpus-scan: clean — ${scanned} file(s) scanned, 0 non-portable values.`);
219
+ return 0;
220
+ }
221
+ // Group by file for a readable file:match list.
222
+ const byFile = new Map();
223
+ for (const f of findings) {
224
+ if (!byFile.has(f.file)) byFile.set(f.file, []);
225
+ byFile.get(f.file).push(f);
226
+ }
227
+ const byRule = new Map();
228
+ for (const f of findings) byRule.set(f.rule, (byRule.get(f.rule) || 0) + 1);
229
+
230
+ console.error(`corpus-scan: FAIL — ${findings.length} non-portable value(s) in ${byFile.size} file(s) (${scanned} scanned).\n`);
231
+ for (const [file, hits] of [...byFile.entries()].sort((a, b) => b[1].length - a[1].length)) {
232
+ console.error(`${file} (${hits.length})`);
233
+ for (const h of hits.slice(0, 50)) {
234
+ console.error(` L${h.line} ${h.rule}: ${h.match}`);
235
+ }
236
+ if (hits.length > 50) console.error(` …and ${hits.length - 50} more`);
237
+ }
238
+ console.error('\nby rule:');
239
+ for (const [rule, n] of [...byRule.entries()].sort((a, b) => b[1] - a[1])) {
240
+ console.error(` ${rule}: ${n}`);
241
+ }
242
+ return 1;
243
+ }
244
+
245
+ // ESM entry guard.
246
+ if (import.meta.url === `file://${process.argv[1]}`) {
247
+ process.exit(main(process.argv.slice(2)));
248
+ }
@@ -0,0 +1,352 @@
1
+ // sync/cta-inventory.mjs — READ-ONLY legacy-CTA inventory + resolution helpers.
2
+ //
3
+ // WHY (codex #3/#5, gap-closure "REVISED approach"): legacy HubSpot CTAs are NOT
4
+ // portable. There is no working v3 CTA CRUD API (every documented list endpoint —
5
+ // /cms/v3/cta, /content/api/v2/cta(-buttons), /calls-to-action/v2/buttons, … —
6
+ // 404s on a real portal; the legacy CTA editor is sunset). The ONLY CTAs in this
7
+ // corpus live in legacy blog/landing-page bodies as the classic embed block:
8
+ //
9
+ // <!--HubSpot Call-to-Action Code --><span class="hs-cta-wrapper" id="hs-cta-wrapper-<GUID>">
10
+ // <span class="hs-cta-node hs-cta-<GUID>" id="hs-cta-<GUID>">
11
+ // <!--[if lte IE 8]><div id="hs-cta-ie-element"></div><![endif]-->
12
+ // <a href="https://cta-redirect.hubspot.com/cta/redirect/<PORTAL>/<GUID>" [target=…] >
13
+ // <img class="hs-cta-img" id="hs-cta-img-<GUID>" src="https://no-cache.hubspot.com/cta/default/<PORTAL>/<GUID>.png" alt="<NAME>"/>
14
+ // </a>
15
+ // </span>
16
+ // <script src="https://js.hscta.net/cta/current.js"></script>
17
+ // <script>hbspt.cta.load(<PORTAL>, '<GUID>', {});</script>
18
+ // </span><!-- end HubSpot Call-to-Action Code -->
19
+ //
20
+ // Those carry a per-account portal id + a CTA GUID — neither is portable, and
21
+ // canonicalize() would turn them into `@cta:<guid-prefix>` tokens that NO adapter
22
+ // can resolve (the push preflight then fails-closed). Blind link-conversion would
23
+ // be silent fidelity loss (codex #5: analytics / redirect / styling).
24
+ //
25
+ // REVISED approach: build a one-time inventory mapping each CTA GUID to its
26
+ // { destinationHref, renderedHtml, name, tracked } by RESOLVING the public
27
+ // cta-redirect interstitial (the same URL the embed's own fallback <a> points at).
28
+ // That interstitial is an HTML page whose body contains
29
+ // var redirectUrl = "<final destination>";
30
+ // so we extract the real destination href without any private API. The blog
31
+ // canonicalizer (blog.mjs) then rewrites each embed to a styled, portable
32
+ // <a class="btn" href="<destination>">…</a> — NO @cta token, NO per-account GUID.
33
+ //
34
+ // PRODUCTION 529456 is READ-ONLY. This tool only READS (an outbound GET to the
35
+ // public cta-redirect host + GETs against the account); it NEVER writes to any
36
+ // account. Output is cached to the gitignored .sync-state/<portal>.cta-inventory.json.
37
+ //
38
+ // Usage: node sync/cta-inventory.mjs <account> [--content content] [--refresh]
39
+ //
40
+ // Pure helpers (no I/O — unit-testable without network):
41
+ // ctaGuidsInText(text) -> [guid, …] (every CTA guid shape in a string)
42
+ // extractRedirectUrl(html) -> destination href | null (from the interstitial)
43
+ // ctaNameFromEmbed(html, guid) -> alt-text name | null
44
+ // resolveCtaEmbeds(text, inv) -> { text, unresolved:[guid…], notes:[…] }
45
+
46
+ import { readFileSync, writeFileSync, mkdirSync, existsSync, readdirSync, statSync } from 'node:fs';
47
+ import { join, resolve as resolvePath } from 'node:path';
48
+
49
+ import { account as realAccount } from './lib/hub.mjs';
50
+
51
+ const GUID = '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}';
52
+ const CTA_REDIRECT_HOST = 'https://cta-redirect.hubspot.com';
53
+
54
+ // ── pure: enumerate every CTA GUID shape in a string ────────────────────────────
55
+
56
+ // Every place a legacy CTA GUID appears in an embed: the redirect <a href>, the
57
+ // hbspt.cta.load() call, the wrapper/node/img ids, the data-hs-img-pg attr, and the
58
+ // {{cta('guid')}} HubL shortcode. Deduped, source-order preserved.
59
+ const GUID_SHAPES = [
60
+ new RegExp(`cta/redirect/\\d{5,}/(${GUID})`, 'g'),
61
+ new RegExp(`cta/default/\\d{5,}/(${GUID})`, 'g'),
62
+ new RegExp(`hbspt\\.cta\\.load\\(\\s*\\d{5,}\\s*,\\s*['"](${GUID})['"]`, 'g'),
63
+ new RegExp(`hs-cta(?:-wrapper|-node|-img|-ie-element)?-(${GUID})`, 'g'),
64
+ new RegExp(`data-hs-img-pg=["'](${GUID})["']`, 'g'),
65
+ new RegExp(`\\{\\{\\s*cta\\(\\s*['"](${GUID})['"]`, 'g'),
66
+ ];
67
+
68
+ export function ctaGuidsInText(text) {
69
+ if (typeof text !== 'string' || text.length === 0) return [];
70
+ const seen = new Set();
71
+ const out = [];
72
+ for (const re of GUID_SHAPES) {
73
+ for (const m of text.matchAll(re)) {
74
+ const g = m[1].toLowerCase();
75
+ if (!seen.has(g)) {
76
+ seen.add(g);
77
+ out.push(g);
78
+ }
79
+ }
80
+ }
81
+ return out;
82
+ }
83
+
84
+ // ── pure: extract the destination href from a cta-redirect interstitial ──────────
85
+
86
+ // The interstitial sets `var redirectUrl = "<dest>";` (and uses window.location).
87
+ // Fall back to a plain meta-refresh / Location-style URL if the JS var is absent.
88
+ export function extractRedirectUrl(html) {
89
+ if (typeof html !== 'string' || html.length === 0) return null;
90
+ const jsVar = html.match(/var\s+redirectUrl\s*=\s*["']([^"']+)["']/);
91
+ if (jsVar) return decodeHtml(jsVar[1]);
92
+ const meta = html.match(/<meta[^>]+http-equiv=["']refresh["'][^>]+url=([^"'>\s]+)/i);
93
+ if (meta) return decodeHtml(meta[1]);
94
+ const loc = html.match(/window\.location(?:\.href)?\s*=\s*["']([^"']+)["']/);
95
+ if (loc) return decodeHtml(loc[1]);
96
+ return null;
97
+ }
98
+
99
+ function decodeHtml(s) {
100
+ return String(s)
101
+ .replace(/&amp;/g, '&')
102
+ .replace(/&#x2F;/gi, '/')
103
+ .replace(/&#47;/g, '/')
104
+ .replace(/&quot;/g, '"');
105
+ }
106
+
107
+ // ── pure: best-effort human name from the embed (the image alt text) ─────────────
108
+
109
+ export function ctaNameFromEmbed(html, guid) {
110
+ if (typeof html !== 'string') return null;
111
+ // Find the <img …id="hs-cta-img-<guid>"…alt="…"> for this guid and read its alt.
112
+ const re = new RegExp(
113
+ `hs-cta-img-${guid.replace(/[-]/g, '\\-')}[^>]*?\\balt=["']([^"']*)["']`,
114
+ 'i',
115
+ );
116
+ const m = html.match(re);
117
+ if (m && m[1]) return m[1];
118
+ // Fallback: any alt on an hs-cta-img near this guid.
119
+ const any = html.match(/hs-cta-img[^>]*?\balt=["']([^"']+)["']/i);
120
+ return any ? any[1] : null;
121
+ }
122
+
123
+ // ── pure: rewrite CTA embeds in a body to portable styled <a> links ──────────────
124
+ //
125
+ // resolveCtaEmbeds(text, inventory) -> { text, unresolved, notes }
126
+ //
127
+ // inventory: { [guid]: { destinationHref, name?, tracked? } }
128
+ //
129
+ // For each whole CTA embed block we find, look up its guid in the inventory:
130
+ // • known + has destinationHref + NOT still-tracked → replace the WHOLE block with
131
+ // <a class="btn" href="<dest>"[ target=_blank]>…label…</a> (fully portable).
132
+ // • unknown guid, or flagged still-tracked, or no destination → PRESERVE the raw
133
+ // embed HTML verbatim and record a LOUD note + the guid in `unresolved` (never
134
+ // silently dropped — the operator/preflight must see it).
135
+ //
136
+ // Idempotent: a body with no embed block is returned unchanged; an already-resolved
137
+ // <a class="btn"> is left alone (it carries no hs-cta markup to match).
138
+ const CTA_BLOCK_RE =
139
+ /<!--\s*HubSpot Call-to-Action Code\s*-->[\s\S]*?<!--\s*end HubSpot Call-to-Action Code\s*-->/gi;
140
+
141
+ export function resolveCtaEmbeds(text, inventory = {}) {
142
+ const notes = [];
143
+ const unresolved = [];
144
+ if (typeof text !== 'string' || text.length === 0) return { text, unresolved, notes };
145
+
146
+ const out = text.replace(CTA_BLOCK_RE, (block) => {
147
+ const guids = ctaGuidsInText(block);
148
+ const guid = guids[0];
149
+ if (!guid) {
150
+ notes.push('⚠ CTA embed found with no recognizable GUID — preserved raw HTML.');
151
+ return block;
152
+ }
153
+ const entry = inventory[guid];
154
+ if (!entry || !entry.destinationHref) {
155
+ unresolved.push(guid);
156
+ notes.push(
157
+ `⚠ CTA ${guid} not in inventory (or no destination) — preserved raw embed HTML. ` +
158
+ `Run \`node sync/cta-inventory.mjs <account>\` to resolve it.`,
159
+ );
160
+ return block;
161
+ }
162
+ if (entry.tracked === true) {
163
+ unresolved.push(guid);
164
+ notes.push(
165
+ `⚠ CTA ${guid} ("${entry.name || '?'}") is flagged STILL-TRACKED — preserved raw ` +
166
+ `embed HTML to avoid losing analytics/redirect behavior (codex #5). Resolve manually.`,
167
+ );
168
+ return block;
169
+ }
170
+ const label = ctaNameFromEmbed(block, guid) || entry.name || 'Learn more';
171
+ const targetBlank = /target=["']?_blank/i.test(block);
172
+ return buildResolvedLink(entry.destinationHref, label, { targetBlank });
173
+ });
174
+
175
+ return { text: out, unresolved, notes };
176
+ }
177
+
178
+ // Build the portable replacement anchor. A styled button link, no per-account ids.
179
+ export function buildResolvedLink(href, label, { targetBlank = false } = {}) {
180
+ const safeHref = escapeAttr(href);
181
+ const safeLabel = escapeText(label);
182
+ const tgt = targetBlank ? ' target="_blank" rel="noopener"' : '';
183
+ return `<a class="btn cta-btn" href="${safeHref}"${tgt}>${safeLabel}</a>`;
184
+ }
185
+
186
+ function escapeAttr(s) {
187
+ return String(s).replace(/&/g, '&amp;').replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
188
+ }
189
+ function escapeText(s) {
190
+ return String(s).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
191
+ }
192
+
193
+ // ── inventory I/O (cache under the gitignored .sync-state) ───────────────────────
194
+
195
+ export function inventoryPath(portalId) {
196
+ return join(resolvePath('.sync-state'), `${portalId}.cta-inventory.json`);
197
+ }
198
+
199
+ export function loadInventory(portalId) {
200
+ const p = inventoryPath(portalId);
201
+ if (!existsSync(p)) return {};
202
+ try {
203
+ return JSON.parse(readFileSync(p, 'utf8'));
204
+ } catch {
205
+ return {};
206
+ }
207
+ }
208
+
209
+ function saveInventory(portalId, inv) {
210
+ mkdirSync(resolvePath('.sync-state'), { recursive: true });
211
+ const ordered = {};
212
+ for (const k of Object.keys(inv).sort()) ordered[k] = inv[k];
213
+ writeFileSync(inventoryPath(portalId), JSON.stringify(ordered, null, 2) + '\n');
214
+ }
215
+
216
+ // ── scan committed content for CTA guids ─────────────────────────────────────────
217
+
218
+ // Recursively collect every CTA GUID referenced under a content dir. Defaults to
219
+ // the blog (CTAs are blog/landing-page-content-only per codex #3/#5) but accepts
220
+ // any subtree so the operator can widen the scan.
221
+ export function scanContentForCtaGuids(contentDir, sub = '') {
222
+ const root = join(resolvePath(contentDir), sub);
223
+ const guids = new Set();
224
+ if (!existsSync(root)) return [];
225
+ const walk = (dir) => {
226
+ for (const name of readdirSync(dir)) {
227
+ const full = join(dir, name);
228
+ const st = statSync(full);
229
+ if (st.isDirectory()) {
230
+ walk(full);
231
+ } else if (name.endsWith('.json')) {
232
+ for (const g of ctaGuidsInText(readFileSync(full, 'utf8'))) guids.add(g);
233
+ }
234
+ }
235
+ };
236
+ walk(root);
237
+ return [...guids].sort();
238
+ }
239
+
240
+ // ── resolution against the public cta-redirect interstitial ──────────────────────
241
+
242
+ // Resolve one CTA GUID to { destinationHref, name, renderedHtml, tracked, status }.
243
+ // READ-ONLY: a single outbound GET to the PUBLIC cta-redirect host (no account key,
244
+ // no write). `tracked` is true when we could not extract a destination (the CTA may
245
+ // still be live/tracked and must be preserved, not link-converted).
246
+ export async function resolveCta(portalId, guid, { fetchFn = fetch } = {}) {
247
+ const redirectUrl = `${CTA_REDIRECT_HOST}/cta/redirect/${portalId}/${guid}`;
248
+ let html = '';
249
+ let status = 0;
250
+ try {
251
+ const res = await fetchFn(redirectUrl, { redirect: 'manual' });
252
+ status = res.status;
253
+ // A 3xx with a Location header is a clean destination; otherwise read the body.
254
+ const loc = res.headers?.get?.('location');
255
+ if (loc && /^https?:/i.test(loc)) {
256
+ return {
257
+ destinationHref: loc,
258
+ name: null,
259
+ renderedHtml: redirectUrl,
260
+ tracked: false,
261
+ status,
262
+ };
263
+ }
264
+ html = await res.text();
265
+ } catch (e) {
266
+ return { destinationHref: null, name: null, renderedHtml: redirectUrl, tracked: true, status, error: e.message };
267
+ }
268
+ const destinationHref = extractRedirectUrl(html);
269
+ return {
270
+ destinationHref,
271
+ name: ctaNameFromEmbed(html, guid),
272
+ renderedHtml: redirectUrl,
273
+ // If we could not extract a destination, treat as still-tracked/unknown so the
274
+ // canonicalizer PRESERVES the raw embed rather than dropping it.
275
+ tracked: destinationHref == null,
276
+ status,
277
+ };
278
+ }
279
+
280
+ // ── CLI ──────────────────────────────────────────────────────────────────────────
281
+
282
+ export async function buildInventory(
283
+ name,
284
+ { contentDir = 'content', sub = 'blog', refresh = false, account = realAccount, resolveFn = resolveCta, log = console.log } = {},
285
+ ) {
286
+ const acct = account(name);
287
+ const guids = scanContentForCtaGuids(contentDir, sub);
288
+ log(`cta-inventory: account "${acct.name}" (portal ${acct.portalId}) — ${guids.length} CTA guid(s) found under ${join(contentDir, sub)}`);
289
+
290
+ const inv = refresh ? {} : loadInventory(acct.portalId);
291
+ let resolved = 0;
292
+ let stillTracked = 0;
293
+ for (const guid of guids) {
294
+ if (inv[guid] && inv[guid].destinationHref && !refresh) continue;
295
+ const r = await resolveFn(acct.portalId, guid);
296
+ inv[guid] = {
297
+ destinationHref: r.destinationHref || null,
298
+ name: r.name || null,
299
+ renderedHtml: r.renderedHtml || null,
300
+ tracked: r.tracked === true,
301
+ };
302
+ if (r.destinationHref) {
303
+ resolved++;
304
+ log(` ✓ ${guid} -> ${r.destinationHref}${r.name ? ` (${r.name})` : ''}`);
305
+ } else {
306
+ stillTracked++;
307
+ log(` ⚠ ${guid} -> UNRESOLVED (status ${r.status}) — preserved as still-tracked/unknown`);
308
+ }
309
+ }
310
+ saveInventory(acct.portalId, inv);
311
+ log(`cta-inventory: ${resolved} resolved, ${stillTracked} still-tracked/unknown, ${Object.keys(inv).length} total -> ${inventoryPath(acct.portalId)}`);
312
+ return inv;
313
+ }
314
+
315
+ async function main(argv) {
316
+ const args = argv.slice(2);
317
+ const name = args.find((a) => !a.startsWith('--'));
318
+ if (!name) {
319
+ console.error('Usage: node sync/cta-inventory.mjs <account> [--content <dir>] [--sub <subdir>] [--refresh]');
320
+ process.exit(2);
321
+ }
322
+ const contentDir = optVal(args, '--content') || 'content';
323
+ const sub = optVal(args, '--sub') ?? 'blog';
324
+ const refresh = args.includes('--refresh');
325
+ await buildInventory(name, { contentDir, sub, refresh });
326
+ }
327
+
328
+ function optVal(args, flag) {
329
+ const i = args.indexOf(flag);
330
+ return i >= 0 ? args[i + 1] : undefined;
331
+ }
332
+
333
+ // Run as CLI only when invoked directly.
334
+ if (import.meta.url === `file://${process.argv[1]}`) {
335
+ main(process.argv).catch((e) => {
336
+ console.error(e.message || e);
337
+ process.exit(1);
338
+ });
339
+ }
340
+
341
+ export default {
342
+ ctaGuidsInText,
343
+ extractRedirectUrl,
344
+ ctaNameFromEmbed,
345
+ resolveCtaEmbeds,
346
+ buildResolvedLink,
347
+ loadInventory,
348
+ inventoryPath,
349
+ scanContentForCtaGuids,
350
+ resolveCta,
351
+ buildInventory,
352
+ };
package/src/index.mjs ADDED
@@ -0,0 +1,3 @@
1
+ export { loadConfig } from './config.mjs';
2
+ export { pull } from './pull.mjs';
3
+ export { push, preflightRefs } from './push.mjs';