@writechoice/mint-cli 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +19 -0
- package/package.json +1 -1
- package/src/commands/metadata.js +357 -0
- package/src/utils/config.js +24 -0
package/bin/cli.js
CHANGED
|
@@ -189,6 +189,25 @@ fix
|
|
|
189
189
|
await fixImages(mergedOptions);
|
|
190
190
|
});
|
|
191
191
|
|
|
192
|
+
// Metadata command
|
|
193
|
+
program
|
|
194
|
+
.command("metadata [baseUrl]")
|
|
195
|
+
.description("Fetch meta tags from live pages and write them into MDX frontmatter")
|
|
196
|
+
.option("-f, --file <path>", "Process a single MDX file")
|
|
197
|
+
.option("-d, --dir <path>", "Process MDX files in a specific directory")
|
|
198
|
+
.option("-c, --concurrency <number>", "Number of parallel HTTP requests", "15")
|
|
199
|
+
.option("--dry-run", "Preview changes without writing files")
|
|
200
|
+
.option("--quiet", "Suppress terminal output")
|
|
201
|
+
.action(async (baseUrl, options) => {
|
|
202
|
+
const { loadConfig, mergeMetadataConfig } = await import("../src/utils/config.js");
|
|
203
|
+
const { runMetadata } = await import("../src/commands/metadata.js");
|
|
204
|
+
|
|
205
|
+
const config = loadConfig();
|
|
206
|
+
const mergedOptions = mergeMetadataConfig({ ...options, baseUrl }, config);
|
|
207
|
+
mergedOptions.verbose = !mergedOptions.quiet;
|
|
208
|
+
await runMetadata(mergedOptions);
|
|
209
|
+
});
|
|
210
|
+
|
|
192
211
|
// Config command
|
|
193
212
|
program
|
|
194
213
|
.command("config")
|
package/package.json
CHANGED
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metadata Command
|
|
3
|
+
*
|
|
4
|
+
* Fetches meta tags from live documentation pages and writes them into
|
|
5
|
+
* the frontmatter of the corresponding MDX source files.
|
|
6
|
+
*
|
|
7
|
+
* URL mapping:
|
|
8
|
+
* baseUrl + "/" + relative-path-from-scan-dir (without .mdx)
|
|
9
|
+
*
|
|
10
|
+
* Example:
|
|
11
|
+
* baseUrl = https://docs.example.com
|
|
12
|
+
* file = docs/api/reference.mdx
|
|
13
|
+
* scan dir = docs/
|
|
14
|
+
* → URL = https://docs.example.com/api/reference
|
|
15
|
+
*
|
|
16
|
+
* Existing frontmatter keys are updated (overwritten).
|
|
17
|
+
* Missing keys are appended at the end of the frontmatter block.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { existsSync, readdirSync, statSync, readFileSync, writeFileSync } from "fs";
|
|
21
|
+
import { join, relative, resolve } from "path";
|
|
22
|
+
import chalk from "chalk";
|
|
23
|
+
|
|
24
|
+
const EXCLUDED_DIRS = ["node_modules", ".git"];
|
|
25
|
+
|
|
26
|
+
export const DEFAULT_META_TAGS = [
|
|
27
|
+
"og:title",
|
|
28
|
+
"og:description",
|
|
29
|
+
"og:image",
|
|
30
|
+
"og:url",
|
|
31
|
+
"twitter:title",
|
|
32
|
+
"twitter:description",
|
|
33
|
+
"twitter:image",
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
37
|
+
// HTML helpers
|
|
38
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Parses an HTML attribute string into a key→value object.
|
|
42
|
+
* Handles both double and single-quoted values.
|
|
43
|
+
*/
|
|
44
|
+
function parseHtmlAttributes(attrStr) {
|
|
45
|
+
const attrs = {};
|
|
46
|
+
const re = /(\w[\w-]*)=(?:"([^"]*)"|'([^']*)')/g;
|
|
47
|
+
let m;
|
|
48
|
+
while ((m = re.exec(attrStr)) !== null) {
|
|
49
|
+
attrs[m[1]] = m[2] !== undefined ? m[2] : m[3];
|
|
50
|
+
}
|
|
51
|
+
return attrs;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Extracts the requested meta tag values from an HTML string.
|
|
56
|
+
* Looks at property, name, and itemprop attributes.
|
|
57
|
+
* Returns { "og:title": "...", ... }
|
|
58
|
+
*/
|
|
59
|
+
function extractMetaTags(html, tags) {
|
|
60
|
+
const results = {};
|
|
61
|
+
const metaRe = /<meta\s+([^>]+?)(?:\s*\/?>)/gi;
|
|
62
|
+
let m;
|
|
63
|
+
while ((m = metaRe.exec(html)) !== null) {
|
|
64
|
+
const attrs = parseHtmlAttributes(m[1]);
|
|
65
|
+
const tagName = attrs.property || attrs.name || attrs.itemprop;
|
|
66
|
+
if (tagName && tags.includes(tagName) && attrs.content && attrs.content.trim()) {
|
|
67
|
+
results[tagName] = attrs.content.trim();
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return results;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Fetches a URL and returns the extracted meta tags.
|
|
75
|
+
*/
|
|
76
|
+
async function fetchMetaTags(url, tags) {
|
|
77
|
+
try {
|
|
78
|
+
const res = await fetch(url, {
|
|
79
|
+
headers: {
|
|
80
|
+
"User-Agent":
|
|
81
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
82
|
+
},
|
|
83
|
+
signal: AbortSignal.timeout(30_000),
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
if (!res.ok) {
|
|
87
|
+
return { error: `HTTP ${res.status}`, tags: {} };
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const html = await res.text();
|
|
91
|
+
return { error: null, tags: extractMetaTags(html, tags) };
|
|
92
|
+
} catch (err) {
|
|
93
|
+
return { error: err.message, tags: {} };
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
98
|
+
// Concurrency
|
|
99
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Runs an array of async task factories with a maximum concurrency.
|
|
103
|
+
*/
|
|
104
|
+
async function runConcurrent(tasks, concurrency) {
|
|
105
|
+
const results = new Array(tasks.length);
|
|
106
|
+
const queue = tasks.map((task, idx) => ({ task, idx }));
|
|
107
|
+
|
|
108
|
+
async function worker() {
|
|
109
|
+
while (queue.length > 0) {
|
|
110
|
+
const { task, idx } = queue.shift();
|
|
111
|
+
results[idx] = await task();
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
await Promise.all(Array.from({ length: Math.min(concurrency, tasks.length) }, worker));
|
|
116
|
+
return results;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
120
|
+
// URL construction
|
|
121
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
122
|
+
|
|
123
|
+
function fileToUrl(filePath, scanDir, baseUrl) {
|
|
124
|
+
const rel = relative(scanDir, filePath)
|
|
125
|
+
.replace(/\.mdx$/, "")
|
|
126
|
+
.replace(/\\/g, "/");
|
|
127
|
+
return baseUrl.replace(/\/$/, "") + "/" + rel;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
131
|
+
// Frontmatter helpers
|
|
132
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
133
|
+
|
|
134
|
+
const FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?/;
|
|
135
|
+
|
|
136
|
+
function escapeRe(str) {
|
|
137
|
+
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Formats a string value for YAML output.
|
|
142
|
+
* Always produces a quoted scalar to avoid YAML interpretation issues.
|
|
143
|
+
*/
|
|
144
|
+
function yamlValue(str) {
|
|
145
|
+
if (!str.includes('"')) return `"${str}"`;
|
|
146
|
+
if (!str.includes("'")) return `'${str}'`;
|
|
147
|
+
// Both quotes present — escape double quotes
|
|
148
|
+
return `"${str.replace(/"/g, '\\"')}"`;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Applies meta data to the MDX file content.
|
|
153
|
+
* Updates existing frontmatter keys, appends missing ones.
|
|
154
|
+
* Returns { newContent, updated: string[], added: string[], skipped: boolean }
|
|
155
|
+
*/
|
|
156
|
+
function applyMetaToContent(content, metaData) {
|
|
157
|
+
const fmMatch = FRONTMATTER_RE.exec(content);
|
|
158
|
+
if (!fmMatch) {
|
|
159
|
+
return { newContent: content, updated: [], added: [], skipped: true };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
let fmText = fmMatch[1];
|
|
163
|
+
const fmEnd = fmMatch[0].length;
|
|
164
|
+
const body = content.slice(fmEnd);
|
|
165
|
+
|
|
166
|
+
const updated = [];
|
|
167
|
+
const added = [];
|
|
168
|
+
|
|
169
|
+
for (const [key, value] of Object.entries(metaData)) {
|
|
170
|
+
// Keys containing colons must be quoted in YAML
|
|
171
|
+
const yamlKey = key.includes(":") ? `"${key}"` : key;
|
|
172
|
+
const newLine = `${yamlKey}: ${yamlValue(value)}`;
|
|
173
|
+
|
|
174
|
+
// Match existing key in any of its quoting variants
|
|
175
|
+
const keyEsc = escapeRe(key);
|
|
176
|
+
const existingRe = new RegExp(
|
|
177
|
+
`^(?:${keyEsc}|"${keyEsc}"|'${keyEsc}')\\s*:.*$`,
|
|
178
|
+
"m"
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
if (existingRe.test(fmText)) {
|
|
182
|
+
fmText = fmText.replace(existingRe, newLine);
|
|
183
|
+
updated.push(key);
|
|
184
|
+
} else {
|
|
185
|
+
fmText += `\n${newLine}`;
|
|
186
|
+
added.push(key);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const newContent = `---\n${fmText}\n---\n${body}`;
|
|
191
|
+
return { newContent, updated, added, skipped: false };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
195
|
+
// File discovery
|
|
196
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
197
|
+
|
|
198
|
+
function findMdxFiles(repoRoot, directory = null, file = null) {
|
|
199
|
+
if (file) {
|
|
200
|
+
const fullPath = resolve(repoRoot, file);
|
|
201
|
+
return existsSync(fullPath) ? [fullPath] : [];
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const searchDirs = directory ? [resolve(repoRoot, directory)] : [repoRoot];
|
|
205
|
+
const mdxFiles = [];
|
|
206
|
+
|
|
207
|
+
function walkDirectory(dir) {
|
|
208
|
+
const dirName = dir.split("/").pop();
|
|
209
|
+
if (EXCLUDED_DIRS.includes(dirName)) return;
|
|
210
|
+
|
|
211
|
+
try {
|
|
212
|
+
const entries = readdirSync(dir);
|
|
213
|
+
for (const entry of entries) {
|
|
214
|
+
const fullPath = join(dir, entry);
|
|
215
|
+
const stat = statSync(fullPath);
|
|
216
|
+
if (stat.isDirectory()) {
|
|
217
|
+
walkDirectory(fullPath);
|
|
218
|
+
} else if (stat.isFile() && entry.endsWith(".mdx")) {
|
|
219
|
+
mdxFiles.push(fullPath);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
} catch (error) {
|
|
223
|
+
console.error(`Error reading directory ${dir}: ${error.message}`);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
for (const dir of searchDirs) {
|
|
228
|
+
if (existsSync(dir)) walkDirectory(dir);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return mdxFiles.sort();
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
235
|
+
// Main export
|
|
236
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
export async function runMetadata(options) {
|
|
239
|
+
const repoRoot = process.cwd();
|
|
240
|
+
|
|
241
|
+
if (!options.quiet) {
|
|
242
|
+
console.log(chalk.bold("\n🏷️ Metadata Fetcher\n"));
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
if (!options.baseUrl) {
|
|
246
|
+
console.error(
|
|
247
|
+
chalk.red("✗ No base URL provided. Pass --base-url or set 'source' in config.json.")
|
|
248
|
+
);
|
|
249
|
+
process.exit(1);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const tags = options.tags || DEFAULT_META_TAGS;
|
|
253
|
+
const concurrency = options.concurrency || 15;
|
|
254
|
+
const scanDir = options.dir ? resolve(repoRoot, options.dir) : repoRoot;
|
|
255
|
+
|
|
256
|
+
const files = findMdxFiles(repoRoot, options.dir, options.file);
|
|
257
|
+
|
|
258
|
+
if (files.length === 0) {
|
|
259
|
+
console.error(chalk.red("✗ No MDX files found."));
|
|
260
|
+
process.exit(1);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (!options.quiet) {
|
|
264
|
+
console.log(`Base URL : ${options.baseUrl}`);
|
|
265
|
+
console.log(`Tags : ${tags.join(", ")}`);
|
|
266
|
+
console.log(`Files : ${files.length} MDX file(s)`);
|
|
267
|
+
console.log(`Concurrency: ${concurrency}\n`);
|
|
268
|
+
if (options.dryRun) {
|
|
269
|
+
console.log(chalk.yellow("Dry run — no files will be written\n"));
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
let processed = 0;
|
|
274
|
+
let skipped = 0;
|
|
275
|
+
let errors = 0;
|
|
276
|
+
const changed = [];
|
|
277
|
+
|
|
278
|
+
const tasks = files.map((filePath) => async () => {
|
|
279
|
+
const url = fileToUrl(filePath, scanDir, options.baseUrl);
|
|
280
|
+
const relPath = relative(repoRoot, filePath);
|
|
281
|
+
|
|
282
|
+
const { error, tags: metaData } = await fetchMetaTags(url, tags);
|
|
283
|
+
|
|
284
|
+
processed++;
|
|
285
|
+
|
|
286
|
+
if (error) {
|
|
287
|
+
if (!options.quiet) {
|
|
288
|
+
console.log(`${chalk.red("✗")} ${chalk.cyan(relPath)} — ${error}`);
|
|
289
|
+
}
|
|
290
|
+
errors++;
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (Object.keys(metaData).length === 0) {
|
|
295
|
+
if (options.verbose) {
|
|
296
|
+
console.log(`${chalk.gray("–")} ${chalk.cyan(relPath)} — no meta tags found`);
|
|
297
|
+
}
|
|
298
|
+
skipped++;
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const content = readFileSync(filePath, "utf-8");
|
|
303
|
+
const { newContent, updated, added, skipped: noFm } = applyMetaToContent(content, metaData);
|
|
304
|
+
|
|
305
|
+
if (noFm) {
|
|
306
|
+
if (options.verbose) {
|
|
307
|
+
console.log(`${chalk.gray("–")} ${chalk.cyan(relPath)} — no frontmatter, skipped`);
|
|
308
|
+
}
|
|
309
|
+
skipped++;
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const totalChanges = updated.length + added.length;
|
|
314
|
+
if (totalChanges > 0) {
|
|
315
|
+
changed.push({ relPath, updated, added });
|
|
316
|
+
if (options.verbose) {
|
|
317
|
+
const parts = [];
|
|
318
|
+
if (updated.length) parts.push(`updated: ${updated.join(", ")}`);
|
|
319
|
+
if (added.length) parts.push(`added: ${added.join(", ")}`);
|
|
320
|
+
console.log(`${chalk.green("✓")} ${chalk.cyan(relPath)} — ${parts.join(" | ")}`);
|
|
321
|
+
}
|
|
322
|
+
if (!options.dryRun) {
|
|
323
|
+
writeFileSync(filePath, newContent, "utf-8");
|
|
324
|
+
}
|
|
325
|
+
} else {
|
|
326
|
+
if (options.verbose) {
|
|
327
|
+
console.log(`${chalk.gray("–")} ${chalk.cyan(relPath)} — already up to date`);
|
|
328
|
+
}
|
|
329
|
+
skipped++;
|
|
330
|
+
}
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
await runConcurrent(tasks, concurrency);
|
|
334
|
+
|
|
335
|
+
// Summary
|
|
336
|
+
if (!options.quiet) {
|
|
337
|
+
if (changed.length > 0) {
|
|
338
|
+
const verb = options.dryRun ? "Would update" : "Updated";
|
|
339
|
+
console.log(chalk.green(`\n✓ ${verb} ${changed.length} file(s)`));
|
|
340
|
+
|
|
341
|
+
if (!options.verbose) {
|
|
342
|
+
for (const { relPath, updated, added } of changed) {
|
|
343
|
+
const parts = [];
|
|
344
|
+
if (updated.length) parts.push(`updated: ${updated.length}`);
|
|
345
|
+
if (added.length) parts.push(`added: ${added.length}`);
|
|
346
|
+
console.log(` ${chalk.cyan(relPath)} — ${parts.join(" | ")} tag(s)`);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
} else {
|
|
350
|
+
console.log(chalk.yellow("⚠️ No files needed updating."));
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (errors > 0) {
|
|
354
|
+
console.log(chalk.yellow(`\n⚠️ ${errors} file(s) had fetch errors.`));
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
package/src/utils/config.js
CHANGED
|
@@ -198,6 +198,30 @@ export function mergeH1Config(options, config) {
|
|
|
198
198
|
};
|
|
199
199
|
}
|
|
200
200
|
|
|
201
|
+
/**
|
|
202
|
+
* Merges config file with CLI options for the metadata command
|
|
203
|
+
* CLI options take precedence over config file
|
|
204
|
+
*
|
|
205
|
+
* @param {Object} options - CLI options
|
|
206
|
+
* @param {Object|null} config - Loaded config object
|
|
207
|
+
* @returns {Object} Merged options
|
|
208
|
+
*/
|
|
209
|
+
export function mergeMetadataConfig(options, config) {
|
|
210
|
+
const metaConfig = config?.metadata || {};
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
baseUrl: options.baseUrl || config?.source || null,
|
|
214
|
+
file: options.file || metaConfig.file || null,
|
|
215
|
+
dir: options.dir || metaConfig.dir || null,
|
|
216
|
+
concurrency: options.concurrency != null
|
|
217
|
+
? parseInt(options.concurrency, 10)
|
|
218
|
+
: (metaConfig.concurrency ?? 15),
|
|
219
|
+
tags: metaConfig.tags || null, // null means use defaults
|
|
220
|
+
dryRun: options.dryRun !== undefined ? options.dryRun : (metaConfig["dry-run"] ?? false),
|
|
221
|
+
quiet: options.quiet !== undefined ? options.quiet : (metaConfig.quiet ?? false),
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
|
|
201
225
|
/**
|
|
202
226
|
* Validates that required fields are present
|
|
203
227
|
* @param {string|undefined} baseUrl - Base URL
|