hubspot-cms-sync 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +52 -0
- package/bin/hubspot-cms-sync.mjs +115 -0
- package/docs/CONFIGURATION.md +83 -0
- package/docs/GITHUB_ACTIONS.md +70 -0
- package/docs/MIGRATION_PLAN.md +361 -0
- package/docs/PLAN_REVIEW.md +42 -0
- package/docs/SKILL_DISTRIBUTION.md +79 -0
- package/examples/github-actions/ci.yml +56 -0
- package/examples/github-actions/preview.yml +71 -0
- package/examples/github-actions/publish.yml +82 -0
- package/examples/hubspot-cms-sync.config.mjs +45 -0
- package/examples/site.manifest.json +19 -0
- package/package.json +41 -0
- package/skill/SKILL.md +54 -0
- package/skill/references/commands.md +54 -0
- package/skill/references/config.md +25 -0
- package/skill/references/failures.md +58 -0
- package/skill/references/github-actions.md +56 -0
- package/skill/references/screenshots-and-fidelity.md +33 -0
- package/src/adapters/assets.mjs +576 -0
- package/src/adapters/blog.mjs +921 -0
- package/src/adapters/content.mjs +213 -0
- package/src/adapters/forms.mjs +569 -0
- package/src/adapters/pages.mjs +463 -0
- package/src/adapters/theme.mjs +503 -0
- package/src/config.mjs +113 -0
- package/src/corpus-scan.mjs +248 -0
- package/src/cta-inventory.mjs +352 -0
- package/src/index.mjs +3 -0
- package/src/lib/canonical.mjs +234 -0
- package/src/lib/hub.mjs +197 -0
- package/src/lib/orchestrate.mjs +141 -0
- package/src/lib/refs.mjs +398 -0
- package/src/lib/sync-state.mjs +86 -0
- package/src/manifest.mjs +353 -0
- package/src/preflight.mjs +385 -0
- package/src/pull.mjs +99 -0
- package/src/push.mjs +354 -0
- package/src/republish.mjs +102 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Screenshots And Fidelity Reference
|
|
2
|
+
|
|
3
|
+
Use the consuming repo's configured verification commands before inventing new
|
|
4
|
+
checks. The config usually exposes them under `verification.commands`.
|
|
5
|
+
|
|
6
|
+
## Before Capture
|
|
7
|
+
|
|
8
|
+
1. Confirm the preview or production base URL from the configured env var.
|
|
9
|
+
2. Confirm the target has been published or republished.
|
|
10
|
+
3. Run link and form checks when the repo provides them.
|
|
11
|
+
|
|
12
|
+
## Screenshot Workflow
|
|
13
|
+
|
|
14
|
+
Use Playwright or the repo's chosen browser test runner.
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
the consuming repo verification commands
|
|
18
|
+
npx playwright test verify/fidelity.spec.mjs
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Compare screenshots against the repo's accepted baselines. If baselines need to
|
|
22
|
+
change, keep that diff separate from content sync changes when possible.
|
|
23
|
+
|
|
24
|
+
## Reporting
|
|
25
|
+
|
|
26
|
+
Report:
|
|
27
|
+
|
|
28
|
+
- target name and base URL
|
|
29
|
+
- pages checked
|
|
30
|
+
- failed selectors, links, forms, or screenshot names
|
|
31
|
+
- artifact paths
|
|
32
|
+
- checks skipped because credentials, base URLs, or browser dependencies were
|
|
33
|
+
unavailable
|
|
@@ -0,0 +1,576 @@
|
|
|
1
|
+
// sync/adapters/assets.mjs — File Manager image sync for pages + blog.
|
|
2
|
+
//
|
|
3
|
+
// CODEX FINDING #4 (the contract this adapter exists to enforce):
|
|
4
|
+
// Canonical content committed to git stores REPO ASSET PATHS / logical
|
|
5
|
+
// `@asset:<path>` keys, NEVER hosted URLs. The per-account
|
|
6
|
+
// portal -> hostedURL map is volatile state living in
|
|
7
|
+
// `.sync-state/<portalId>.rehosted.json` (gitignored), NOT committed.
|
|
8
|
+
//
|
|
9
|
+
// HOW THE @asset KEY IS DEFINED (must agree with sync/lib/refs.mjs):
|
|
10
|
+
// refs.mjs collapses any `…/hubfs/<portal>/<pathTail>` URL into the single
|
|
11
|
+
// token `@asset:<pathTail>` (the portal + host are discarded — they are
|
|
12
|
+
// per-account). That `<pathTail>` (e.g. `Sucess.jpg`,
|
|
13
|
+
// `Stock%20images/Double%20exposure.jpeg`) is at once:
|
|
14
|
+
// • the logical registry key (registry.assets[<pathTail>])
|
|
15
|
+
// • the repo path under content/assets/<pathTail> (bytes committed)
|
|
16
|
+
// We keep the tail BYTE-FOR-BYTE (including any %20) so the on-disk path, the
|
|
17
|
+
// registry key, and the `@asset:` token are the same string and round-trip.
|
|
18
|
+
//
|
|
19
|
+
// PULL (read source acct -> write canonical bytes + register source URLs):
|
|
20
|
+
// 1. scan canonical content (pages/*.json, pages/*.widgets.json, blog/**)
|
|
21
|
+
// for `@asset:<path>` tokens — these were produced by refs.canonicalize.
|
|
22
|
+
// 2. for each path, find a downloadable URL ON THE SOURCE ACCOUNT
|
|
23
|
+
// (File Manager search by name, hubfs reconstruction fallback) and
|
|
24
|
+
// download the bytes to content/assets/<path> (COMMIT these bytes).
|
|
25
|
+
// 3. record source-URL -> @asset in the registry (registry.assets[path] =
|
|
26
|
+
// sourceURL) and mirror it to .sync-state/<portalId>.rehosted.json.
|
|
27
|
+
//
|
|
28
|
+
// PUSH (read committed bytes -> upload to target -> register target URLs):
|
|
29
|
+
// for each content/assets/<path>, upload to the TARGET File Manager with
|
|
30
|
+
// OVERWRITE (codex #4: the legacy overwrite:false made duplicates), then
|
|
31
|
+
// record @asset -> target hosted URL in registry.assets[path] so the
|
|
32
|
+
// content / blog / theme adapters can resolve() their `@asset:` tokens to a
|
|
33
|
+
// concrete URL. dependsOn: [] — assets POPULATE the registry, depend on no
|
|
34
|
+
// other adapter.
|
|
35
|
+
//
|
|
36
|
+
// READ-ONLY PROD (529456): this adapter never hardcodes a portal; the
|
|
37
|
+
// orchestrator passes `acct`. push() writes to whatever `acct` it is given;
|
|
38
|
+
// the orchestrator is responsible for never passing prod to a push.
|
|
39
|
+
|
|
40
|
+
import {
|
|
41
|
+
readFileSync,
|
|
42
|
+
writeFileSync,
|
|
43
|
+
renameSync,
|
|
44
|
+
mkdirSync,
|
|
45
|
+
existsSync,
|
|
46
|
+
readdirSync,
|
|
47
|
+
statSync,
|
|
48
|
+
} from 'node:fs';
|
|
49
|
+
import { join, dirname, resolve as pathResolve } from 'node:path';
|
|
50
|
+
import { homedir } from 'node:os';
|
|
51
|
+
import { fileURLToPath } from 'node:url';
|
|
52
|
+
|
|
53
|
+
import { hub } from '../lib/hub.mjs';
|
|
54
|
+
import { stableStringify } from '../lib/canonical.mjs';
|
|
55
|
+
|
|
56
|
+
const API = 'https://api.hubapi.com';
|
|
57
|
+
|
|
58
|
+
export const name = 'assets';
|
|
59
|
+
// Assets POPULATE the registry (logical -> hosted url) for everyone else.
|
|
60
|
+
// Nothing has to run before assets, so this is empty.
|
|
61
|
+
export const dependsOn = [];
|
|
62
|
+
|
|
63
|
+
// Folder under the target File Manager that re-hosted assets live in. A single
|
|
64
|
+
// flat-ish namespace keeps overwrite-by-path deterministic across runs.
|
|
65
|
+
const TARGET_FOLDER = '/synced-assets';
|
|
66
|
+
|
|
67
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
68
|
+
// PURE: path <-> logical mapping. `@asset:<path>` <-> content/assets/<path>.
|
|
69
|
+
// Exported for unit testing (no network).
|
|
70
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
71
|
+
|
|
72
|
+
const ASSET_TOKEN_RE = /@asset:([^\s"'\\)]+)/g;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* assetTokenToPath('@asset:Sucess.jpg') -> 'Sucess.jpg'
|
|
76
|
+
* Also accepts a bare path tail (idempotent). Returns null for anything that
|
|
77
|
+
* is not an @asset token / path.
|
|
78
|
+
*/
|
|
79
|
+
export function assetTokenToPath(token) {
|
|
80
|
+
if (typeof token !== 'string' || token.length === 0) return null;
|
|
81
|
+
const m = token.match(/^@asset:([^\s"'\\)]+)$/);
|
|
82
|
+
if (m) return m[1];
|
|
83
|
+
// already a bare path tail
|
|
84
|
+
if (token.startsWith('@')) return null;
|
|
85
|
+
return token;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** pathToAssetToken('Sucess.jpg') -> '@asset:Sucess.jpg' */
|
|
89
|
+
export function pathToAssetToken(path) {
|
|
90
|
+
return `@asset:${path}`;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* assetRepoPath(contentDir, '<pathTail>') -> absolute file path under
|
|
95
|
+
* content/assets/<pathTail>. The tail is kept verbatim (slashes become real
|
|
96
|
+
* sub-directories) so it matches the `@asset:` token and the registry key.
|
|
97
|
+
*/
|
|
98
|
+
export function assetRepoPath(contentDir, path) {
|
|
99
|
+
return join(contentDir, 'assets', path);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
103
|
+
// ASSET-SCHEME UNIFICATION (codex #6).
|
|
104
|
+
//
|
|
105
|
+
// Two adapters emit `@asset:<key>` tokens with DIFFERENT committed-bytes trees:
|
|
106
|
+
// • the assets adapter: key = the hubfs path tail (e.g. `Sucess.jpg`),
|
|
107
|
+
// bytes committed at content/assets/<key>.
|
|
108
|
+
// • the blog adapter: key = a sha1-prefixed manifest filename
|
|
109
|
+
// (e.g. `4e7bf9bad5-Inbox.png`), bytes committed at
|
|
110
|
+
// content/blog/assets/<key> (blog.rehostAssets uploads these itself).
|
|
111
|
+
//
|
|
112
|
+
// A blog-manifest `@asset` is therefore a legitimate, satisfiable ref whose
|
|
113
|
+
// bytes live OUTSIDE content/assets/. The single source of truth for "where can
|
|
114
|
+
// an @asset key's committed bytes live" is `assetRepoCandidates` below; both the
|
|
115
|
+
// assets adapter and the push preflight consult it, so the two schemes resolve and
|
|
116
|
+
// preflight identically. We RECOGNIZE both trees rather than migrate blog bytes:
|
|
117
|
+
// migrating would have to rewrite the manifest keys + blog.pull tokenization + move
|
|
118
|
+
// 51 committed files (and re-key registry.assets), all of which the blog adapter
|
|
119
|
+
// owns. Recognition is purely additive and keeps each adapter's bytes where it
|
|
120
|
+
// already commits them. (See docs note in the unification report.)
|
|
121
|
+
//
|
|
122
|
+
// The blog tree's name is centralized here so the preflight need not hard-code it.
|
|
123
|
+
export const BLOG_ASSETS_REL = ['blog', 'assets'];
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* blogAssetRepoPath(contentDir, '<key>') -> absolute path under
|
|
127
|
+
* content/blog/assets/<key> (the blog adapter's manifest byte tree).
|
|
128
|
+
*/
|
|
129
|
+
export function blogAssetRepoPath(contentDir, path) {
|
|
130
|
+
return join(contentDir, ...BLOG_ASSETS_REL, path);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* assetRepoCandidates(contentDir, '<key>') -> the ordered list of absolute file
|
|
135
|
+
* paths where an @asset key's committed bytes may live, across BOTH schemes:
|
|
136
|
+
* 1. content/assets/<key> (assets adapter — hubfs tail)
|
|
137
|
+
* 2. content/blog/assets/<key> (blog adapter — manifest filename)
|
|
138
|
+
* Pure (no I/O). Callers test each with existsSync.
|
|
139
|
+
*/
|
|
140
|
+
export function assetRepoCandidates(contentDir, path) {
|
|
141
|
+
return [assetRepoPath(contentDir, path), blogAssetRepoPath(contentDir, path)];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* resolveAssetBytesPath(contentDir, '<key>', existsFn) -> the first candidate
|
|
146
|
+
* path (assets tree, then blog tree) whose bytes are committed, or null if an
|
|
147
|
+
* @asset key has committed bytes in NEITHER tree. `existsFn` defaults to fs
|
|
148
|
+
* existsSync but is injectable so the push preflight can pass its fake fs.
|
|
149
|
+
* This is the one function that unifies the two @asset schemes for "are the
|
|
150
|
+
* bytes here?" — used by both the assets adapter (push) and the preflight.
|
|
151
|
+
*/
|
|
152
|
+
export function resolveAssetBytesPath(contentDir, path, existsFn = existsSync) {
|
|
153
|
+
for (const cand of assetRepoCandidates(contentDir, path)) {
|
|
154
|
+
if (existsFn(cand)) return cand;
|
|
155
|
+
}
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* extractAssetPaths(str) -> string[] of unique `<pathTail>`s referenced by
|
|
161
|
+
* `@asset:` tokens in the given canonical string. Pure.
|
|
162
|
+
*/
|
|
163
|
+
export function extractAssetPaths(str) {
|
|
164
|
+
if (typeof str !== 'string' || str.length === 0) return [];
|
|
165
|
+
const out = new Set();
|
|
166
|
+
for (const m of str.matchAll(ASSET_TOKEN_RE)) out.add(m[1]);
|
|
167
|
+
return [...out];
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
171
|
+
// PURE: File Manager upload options. The codex #4 fix lives here — OVERWRITE.
|
|
172
|
+
// Exported so a unit test can assert overwrite:true without any network.
|
|
173
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* uploadOptions(path) -> the `options` object posted to /files/v3/files.
|
|
177
|
+
* overwrite:true (codex #4 — the legacy overwrite:false created a new
|
|
178
|
+
* duplicate file every push, so pull->push->pull never converged). Public so
|
|
179
|
+
* pages/blog can hotlink the result; EXACT_FOLDER scope so overwrite targets
|
|
180
|
+
* the same path deterministically.
|
|
181
|
+
*/
|
|
182
|
+
export function uploadOptions() {
|
|
183
|
+
return {
|
|
184
|
+
access: 'PUBLIC_INDEXABLE',
|
|
185
|
+
overwrite: true,
|
|
186
|
+
duplicateValidationStrategy: 'NONE',
|
|
187
|
+
duplicateValidationScope: 'EXACT_FOLDER',
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// fileName + folderPath the upload should target for a given asset path tail.
|
|
192
|
+
// A nested tail like `Stock%20images/Double%20exposure.jpeg` becomes
|
|
193
|
+
// folderPath=`/synced-assets/Stock%20images`, fileName=`Double%20exposure.jpeg`
|
|
194
|
+
// so overwrite-by-path stays stable.
|
|
195
|
+
export function uploadTarget(path) {
|
|
196
|
+
// DECODE each segment for File Manager: the @asset key keeps URL-encoding
|
|
197
|
+
// (`%20`) so the token/registry/on-disk path all match, but File Manager
|
|
198
|
+
// REJECTS `%` (and #?&;*^!$|) in folder/file names — so a nested key like
|
|
199
|
+
// `Google%20Drive%20Integration/x.jpg` must upload to folder
|
|
200
|
+
// `Google Drive Integration`. The hosted URL re-encodes the space, so resolve()
|
|
201
|
+
// still maps the encoded @asset token to the served URL.
|
|
202
|
+
const dec = (s) => {
|
|
203
|
+
try {
|
|
204
|
+
return decodeURIComponent(s);
|
|
205
|
+
} catch {
|
|
206
|
+
return s;
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
const segs = String(path).split('/').map(dec);
|
|
210
|
+
const fileName = segs.pop();
|
|
211
|
+
const sub = segs.join('/');
|
|
212
|
+
const folderPath = sub ? `${TARGET_FOLDER}/${sub}` : TARGET_FOLDER;
|
|
213
|
+
return { fileName, folderPath };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
217
|
+
// .sync-state/<portalId>.rehosted.json — per-account, gitignored URL cache.
|
|
218
|
+
// Maps `<pathTail> -> hostedURL` for THIS account (source URLs after pull,
|
|
219
|
+
// target URLs after push). NOT committed.
|
|
220
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
221
|
+
|
|
222
|
+
function syncStateDir() {
|
|
223
|
+
const here = dirname(fileURLToPath(import.meta.url)); // sync/adapters
|
|
224
|
+
return pathResolve(here, '..', '..', '.sync-state');
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
function rehostedPath(portalId) {
|
|
228
|
+
return join(syncStateDir(), `${portalId}.rehosted.json`);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
export function loadRehosted(portalId) {
|
|
232
|
+
const f = rehostedPath(portalId);
|
|
233
|
+
if (!existsSync(f)) return {};
|
|
234
|
+
try {
|
|
235
|
+
return JSON.parse(readFileSync(f, 'utf8'));
|
|
236
|
+
} catch {
|
|
237
|
+
return {};
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
export function saveRehosted(portalId, map) {
|
|
242
|
+
const dir = syncStateDir();
|
|
243
|
+
mkdirSync(dir, { recursive: true });
|
|
244
|
+
// Atomic write: serialize to a per-pid temp file then rename into place. A
|
|
245
|
+
// direct writeFileSync can be observed (or interrupted) mid-write — a crash or
|
|
246
|
+
// a concurrent reader between truncate and the final bytes would see a
|
|
247
|
+
// half-written / empty `{}` cache, which on the NEXT push silently means
|
|
248
|
+
// "nothing is rehosted" and re-uploads all 207 assets (the idempotency bug this
|
|
249
|
+
// adapter exists to prevent). rename(2) is atomic on the same filesystem, so the
|
|
250
|
+
// live cache file is always either the previous complete version or the new
|
|
251
|
+
// complete version — never an empty/truncated one.
|
|
252
|
+
const final = rehostedPath(portalId);
|
|
253
|
+
const tmp = `${final}.tmp-${process.pid}`;
|
|
254
|
+
writeFileSync(tmp, stableStringify(map));
|
|
255
|
+
renameSync(tmp, final);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
259
|
+
// Scan the committed canonical tree for `@asset:` references.
|
|
260
|
+
// Sources: content/pages/*.json (+ *.widgets.json), content/blog/** *.json.
|
|
261
|
+
// theme/templates are ALSO @asset carriers, but the assets they reference are
|
|
262
|
+
// likewise tokenized; reading every *.json under contentDir covers pages+blog,
|
|
263
|
+
// and the optional `extraDirs` lets the orchestrator widen the scan.
|
|
264
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
265
|
+
|
|
266
|
+
function walkJson(dir, acc) {
|
|
267
|
+
if (!existsSync(dir)) return acc;
|
|
268
|
+
for (const ent of readdirSync(dir, { withFileTypes: true })) {
|
|
269
|
+
const full = join(dir, ent.name);
|
|
270
|
+
if (ent.isDirectory()) walkJson(full, acc);
|
|
271
|
+
else if (ent.isFile() && ent.name.endsWith('.json')) acc.push(full);
|
|
272
|
+
}
|
|
273
|
+
return acc;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* collectReferencedAssetPaths(contentDir) -> string[] unique `<pathTail>`s
|
|
278
|
+
* referenced anywhere in the canonical content tree (pages + blog).
|
|
279
|
+
*/
|
|
280
|
+
export function collectReferencedAssetPaths(contentDir) {
|
|
281
|
+
const files = [];
|
|
282
|
+
walkJson(join(contentDir, 'pages'), files);
|
|
283
|
+
walkJson(join(contentDir, 'landing-pages'), files);
|
|
284
|
+
walkJson(join(contentDir, 'blog'), files);
|
|
285
|
+
const paths = new Set();
|
|
286
|
+
for (const f of files) {
|
|
287
|
+
// skip our own state/manifest files if they ever live under content/
|
|
288
|
+
let text;
|
|
289
|
+
try {
|
|
290
|
+
text = readFileSync(f, 'utf8');
|
|
291
|
+
} catch {
|
|
292
|
+
continue;
|
|
293
|
+
}
|
|
294
|
+
for (const p of extractAssetPaths(text)) paths.add(p);
|
|
295
|
+
}
|
|
296
|
+
return [...paths];
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
300
|
+
// Source-URL resolution (PULL). Given a path tail, find a URL on the SOURCE
|
|
301
|
+
// account we can actually download. Order:
|
|
302
|
+
// 1. an existing .sync-state rehosted entry (already known this account),
|
|
303
|
+
// 2. File Manager search by file name (recovers dead legacy CDN URLs — the
|
|
304
|
+
// blog-sync.mjs fileManagerUrl trick),
|
|
305
|
+
// 3. reconstruct the canonical hubfs URL from the account's portal id.
|
|
306
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
307
|
+
|
|
308
|
+
async function fileManagerUrl(acct, path) {
|
|
309
|
+
// search by the bare file-name stem (matches blog-sync.mjs behaviour)
|
|
310
|
+
const name = decodeURIComponent(path.split('/').pop());
|
|
311
|
+
const stem = name.replace(/\.[^.]+$/, '');
|
|
312
|
+
const { ok, json } = await hub(
|
|
313
|
+
acct,
|
|
314
|
+
'GET',
|
|
315
|
+
`/files/v3/files/search?name=${encodeURIComponent(stem)}&limit=5`,
|
|
316
|
+
);
|
|
317
|
+
if (!ok) return null;
|
|
318
|
+
const results = json.results || [];
|
|
319
|
+
const hit =
|
|
320
|
+
results.find((f) => `${f.name}.${f.extension}`.toLowerCase() === name.toLowerCase()) ||
|
|
321
|
+
results[0];
|
|
322
|
+
return hit?.url || null;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function reconstructHubfsUrl(portalId, path) {
|
|
326
|
+
// canonical legacy host; recovery via File Manager handles the dead ones.
|
|
327
|
+
return `https://cdn2.hubspot.net/hubfs/${portalId}/${path}`;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
async function downloadBytes(url) {
|
|
331
|
+
const res = await fetch(encodeURI(url));
|
|
332
|
+
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
|
333
|
+
return Buffer.from(await res.arrayBuffer());
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
337
|
+
// Target upload (PUSH). Uploads bytes with OVERWRITE; returns the hosted URL.
|
|
338
|
+
// Network-injectable `doFetch` for unit testing the option payload.
|
|
339
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
340
|
+
|
|
341
|
+
export async function uploadAsset(acct, buf, path, doFetch = fetch) {
|
|
342
|
+
const { fileName, folderPath } = uploadTarget(path);
|
|
343
|
+
// Retry on transient throttling (429) / server errors (5xx). A bulk push of
|
|
344
|
+
// ~200 files reliably trips the Files API rate limit; without backoff a single
|
|
345
|
+
// transient 429 fails the whole push (assets hard-fails on failed>0). FormData
|
|
346
|
+
// is single-use, so rebuild it each attempt.
|
|
347
|
+
let res;
|
|
348
|
+
for (let attempt = 0; attempt < 5; attempt++) {
|
|
349
|
+
const form = new FormData();
|
|
350
|
+
form.append('file', new Blob([buf]), fileName);
|
|
351
|
+
form.append('fileName', fileName);
|
|
352
|
+
form.append('folderPath', folderPath);
|
|
353
|
+
form.append('options', JSON.stringify(uploadOptions()));
|
|
354
|
+
res = await doFetch(`${API}/files/v3/files`, {
|
|
355
|
+
method: 'POST',
|
|
356
|
+
headers: { Authorization: `Bearer ${acct.key}` },
|
|
357
|
+
body: form,
|
|
358
|
+
});
|
|
359
|
+
if (res.ok) {
|
|
360
|
+
const j = await res.json();
|
|
361
|
+
return j.url || j.objects?.[0]?.url || null;
|
|
362
|
+
}
|
|
363
|
+
if (res.status !== 429 && res.status < 500) break; // non-retryable client error
|
|
364
|
+
await new Promise((r) => setTimeout(r, 600 * 2 ** attempt)); // 0.6s,1.2s,2.4s,4.8s
|
|
365
|
+
}
|
|
366
|
+
const j = await res.json().catch(() => ({}));
|
|
367
|
+
throw new Error(`upload ${fileName} -> ${res.status}: ${j.message || ''}`);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
371
|
+
// pull(acct, { contentDir, registry }) -> { pulled, notes }
|
|
372
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
373
|
+
|
|
374
|
+
export async function pull(acct, { contentDir, registry }) {
|
|
375
|
+
const notes = [];
|
|
376
|
+
const paths = collectReferencedAssetPaths(contentDir);
|
|
377
|
+
const rehosted = loadRehosted(acct.portalId);
|
|
378
|
+
let downloaded = 0;
|
|
379
|
+
let reused = 0;
|
|
380
|
+
let failed = 0;
|
|
381
|
+
|
|
382
|
+
for (const path of paths) {
|
|
383
|
+
// New downloads land in the unified content/assets/<path> tree; but bytes may
|
|
384
|
+
// already be committed in EITHER tree (the blog adapter commits its manifest
|
|
385
|
+
// assets under content/blog/assets/<path>), so an existing blog-manifest asset
|
|
386
|
+
// counts as already-committed and is never re-downloaded. (codex #6.)
|
|
387
|
+
const repoFile = assetRepoPath(contentDir, path);
|
|
388
|
+
const committedFile = resolveAssetBytesPath(contentDir, path);
|
|
389
|
+
|
|
390
|
+
// Resolve a downloadable source URL for this account.
|
|
391
|
+
let sourceUrl = rehosted[path] || null;
|
|
392
|
+
if (!sourceUrl) {
|
|
393
|
+
try {
|
|
394
|
+
sourceUrl = await fileManagerUrl(acct, path);
|
|
395
|
+
} catch {
|
|
396
|
+
sourceUrl = null;
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
if (!sourceUrl) sourceUrl = reconstructHubfsUrl(acct.portalId, path);
|
|
400
|
+
|
|
401
|
+
// Already have the bytes committed (in either tree) -> just (re)register the
|
|
402
|
+
// source URL; never re-download.
|
|
403
|
+
if (committedFile) {
|
|
404
|
+
reused++;
|
|
405
|
+
} else {
|
|
406
|
+
let buf = null;
|
|
407
|
+
try {
|
|
408
|
+
buf = await downloadBytes(sourceUrl);
|
|
409
|
+
} catch {
|
|
410
|
+
// last-ditch File Manager recovery for a dead reconstructed URL
|
|
411
|
+
try {
|
|
412
|
+
const alt = await fileManagerUrl(acct, path);
|
|
413
|
+
if (alt && alt !== sourceUrl) {
|
|
414
|
+
buf = await downloadBytes(alt);
|
|
415
|
+
sourceUrl = alt;
|
|
416
|
+
}
|
|
417
|
+
} catch {
|
|
418
|
+
/* fall through to failure */
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
if (!buf) {
|
|
422
|
+
failed++;
|
|
423
|
+
notes.push(`download failed: @asset:${path}`);
|
|
424
|
+
continue;
|
|
425
|
+
}
|
|
426
|
+
mkdirSync(dirname(repoFile), { recursive: true });
|
|
427
|
+
writeFileSync(repoFile, buf);
|
|
428
|
+
downloaded++;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Record source URL -> @asset for this account (registry + state cache).
|
|
432
|
+
registry.assets[path] = sourceUrl;
|
|
433
|
+
rehosted[path] = sourceUrl;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
saveRehosted(acct.portalId, rehosted);
|
|
437
|
+
notes.unshift(
|
|
438
|
+
`assets pull: ${paths.length} referenced | downloaded ${downloaded} | reused ${reused} | failed ${failed}`,
|
|
439
|
+
);
|
|
440
|
+
return { pulled: downloaded, notes };
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
444
|
+
// push(acct, { contentDir, registry }) -> { pushed, notes }
|
|
445
|
+
// ───────────────────────────────────────────────────────────────────────────
|
|
446
|
+
|
|
447
|
+
export async function push(acct, { contentDir, registry }) {
|
|
448
|
+
const notes = [];
|
|
449
|
+
const assetsDir = join(contentDir, 'assets');
|
|
450
|
+
// Union of referenced paths and bytes-on-disk: upload anything we have a file
|
|
451
|
+
// for, so content/blog/theme can resolve every @asset they reference.
|
|
452
|
+
const referenced = new Set(collectReferencedAssetPaths(contentDir));
|
|
453
|
+
const onDisk = new Set(listAssetFiles(assetsDir));
|
|
454
|
+
const paths = [...new Set([...referenced, ...onDisk])];
|
|
455
|
+
|
|
456
|
+
// The rehosted cache (.sync-state/<portal>.rehosted.json) is the per-account
|
|
457
|
+
// path -> hosted-URL map. It is the primary reuse source, but it is gitignored
|
|
458
|
+
// volatile state that can be lost, truncated, or never written. The per-account
|
|
459
|
+
// REGISTRY (registry.assets[path]) is the SAME mapping and is persisted by the
|
|
460
|
+
// orchestrator ATOMICALLY after every adapter — so it is the durable backstop.
|
|
461
|
+
// Seed the rehosted map from any target hosted URLs already in the registry so a
|
|
462
|
+
// missing/empty cache still yields REUSE (uploaded 0 | reused N) on a re-push
|
|
463
|
+
// instead of silently re-uploading all 207 assets. We only seed concrete http(s)
|
|
464
|
+
// URLs (a registry entry can also be `true` "known-but-url-built-by-caller",
|
|
465
|
+
// which is not a reusable hosted URL).
|
|
466
|
+
const rehosted = loadRehosted(acct.portalId);
|
|
467
|
+
for (const [k, v] of Object.entries(registry.assets || {})) {
|
|
468
|
+
if (rehosted[k] == null && typeof v === 'string' && /^https?:\/\//.test(v)) {
|
|
469
|
+
rehosted[k] = v;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
let uploaded = 0;
|
|
473
|
+
let reused = 0;
|
|
474
|
+
let missing = 0;
|
|
475
|
+
let failed = 0;
|
|
476
|
+
// Referenced @asset tokens whose bytes are NOT committed. These are fatal:
|
|
477
|
+
// pushing past them would leave the content/blog/theme resolve() either
|
|
478
|
+
// hard-failing later (confusing) or — if a stale rehosted entry exists from a
|
|
479
|
+
// prior run — silently resolving to a DRIFTED url. We collect every one so the
|
|
480
|
+
// abort error names them all, then throw after the loop (data-loss guard).
|
|
481
|
+
const missingReferenced = [];
|
|
482
|
+
|
|
483
|
+
for (const path of paths) {
|
|
484
|
+
// Bytes may live in EITHER scheme's tree: content/assets/<path> (this
|
|
485
|
+
// adapter) or content/blog/assets/<path> (the blog manifest). We upload from
|
|
486
|
+
// wherever they are committed so a blog-manifest @asset referenced by a page
|
|
487
|
+
// (or vice-versa) resolves. (codex #6 unification.) The blog adapter ALSO
|
|
488
|
+
// rehosts its manifest assets, but overwrite-by-path makes a double upload
|
|
489
|
+
// idempotent, and finding bytes here keeps the assets-adapter scan from
|
|
490
|
+
// hard-failing on a blog-only @asset.
|
|
491
|
+
const repoFile = resolveAssetBytesPath(contentDir, path);
|
|
492
|
+
if (!repoFile) {
|
|
493
|
+
// referenced but bytes not committed in either tree — record so push can
|
|
494
|
+
// hard-fail below.
|
|
495
|
+
missing++;
|
|
496
|
+
notes.push(`missing bytes for @asset:${path} (run pull)`);
|
|
497
|
+
if (referenced.has(path)) missingReferenced.push(path);
|
|
498
|
+
continue;
|
|
499
|
+
}
|
|
500
|
+
// Already hosted on THIS account (cached from a prior pull/push) — reuse the
|
|
501
|
+
// URL instead of re-uploading. Re-uploading every referenced asset on each
|
|
502
|
+
// push is wasteful and trips the Files API rate limit on bulk runs; the
|
|
503
|
+
// bytes are byte-stable, so the cached URL is correct. (Set $ASSET_FORCE=1
|
|
504
|
+
// to force a re-upload.)
|
|
505
|
+
if (rehosted[path] && !process.env.ASSET_FORCE) {
|
|
506
|
+
registry.assets[path] = rehosted[path];
|
|
507
|
+
reused++;
|
|
508
|
+
continue;
|
|
509
|
+
}
|
|
510
|
+
let buf;
|
|
511
|
+
try {
|
|
512
|
+
buf = readFileSync(repoFile);
|
|
513
|
+
} catch (e) {
|
|
514
|
+
failed++;
|
|
515
|
+
notes.push(`read failed @asset:${path}: ${e.message}`);
|
|
516
|
+
continue;
|
|
517
|
+
}
|
|
518
|
+
let url;
|
|
519
|
+
try {
|
|
520
|
+
url = await uploadAsset(acct, buf, path);
|
|
521
|
+
} catch (e) {
|
|
522
|
+
failed++;
|
|
523
|
+
notes.push(`upload failed @asset:${path}: ${e.message}`);
|
|
524
|
+
continue;
|
|
525
|
+
}
|
|
526
|
+
// @asset -> target hosted URL, so resolve() in content/blog/theme works.
|
|
527
|
+
registry.assets[path] = url;
|
|
528
|
+
rehosted[path] = url;
|
|
529
|
+
uploaded++;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// Persist any URLs we DID resolve this run before aborting, so a re-run after
|
|
533
|
+
// the missing bytes are committed reuses them and stays idempotent.
|
|
534
|
+
saveRehosted(acct.portalId, rehosted);
|
|
535
|
+
notes.unshift(
|
|
536
|
+
`assets push: ${paths.length} asset(s) | uploaded ${uploaded} | reused ${reused} | missing-bytes ${missing} | failed ${failed}`,
|
|
537
|
+
);
|
|
538
|
+
|
|
539
|
+
// DATA-LOSS GUARD: a referenced @asset with no committed bytes aborts the
|
|
540
|
+
// whole push (the orchestrator's contract — throw to stop before a consumer
|
|
541
|
+
// resolves a missing/stale ref). Names every offender so the operator can
|
|
542
|
+
// `pull` once and re-push.
|
|
543
|
+
if (missingReferenced.length > 0) {
|
|
544
|
+
throw new Error(
|
|
545
|
+
`assets push: ${missingReferenced.length} referenced @asset(s) missing committed bytes — run \`pull\` first: ` +
|
|
546
|
+
missingReferenced.map((p) => `@asset:${p}`).join(', '),
|
|
547
|
+
);
|
|
548
|
+
}
|
|
549
|
+
// An upload that actually failed (network/API) is likewise fatal — don't let a
|
|
550
|
+
// consumer resolve a token we never uploaded.
|
|
551
|
+
if (failed > 0) {
|
|
552
|
+
throw new Error(`assets push: ${failed} asset upload(s) failed — see notes`);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
return { pushed: uploaded, notes };
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
// List every committed asset's path tail (relative to content/assets), with
|
|
559
|
+
// '/' separators, so it matches the `@asset:<tail>` / registry key form.
|
|
560
|
+
export function listAssetFiles(assetsDir) {
|
|
561
|
+
if (!existsSync(assetsDir)) return [];
|
|
562
|
+
const out = [];
|
|
563
|
+
const walk = (dir, prefix) => {
|
|
564
|
+
for (const ent of readdirSync(dir, { withFileTypes: true })) {
|
|
565
|
+
if (ent.name === 'manifest.json') continue; // legacy sidecar, not an asset
|
|
566
|
+
const full = join(dir, ent.name);
|
|
567
|
+
const rel = prefix ? `${prefix}/${ent.name}` : ent.name;
|
|
568
|
+
if (ent.isDirectory()) walk(full, rel);
|
|
569
|
+
else if (ent.isFile() && statSync(full).size >= 0) out.push(rel);
|
|
570
|
+
}
|
|
571
|
+
};
|
|
572
|
+
walk(assetsDir, '');
|
|
573
|
+
return out;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
export default { name, dependsOn, pull, push };
|