@larkiny/astro-github-loader 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1016 @@
1
+ import { existsSync, promises as fs } from "node:fs";
2
+ import { fileURLToPath, pathToFileURL } from "node:url";
3
+ import path, { join, dirname, basename, extname } from "node:path";
4
+ import picomatch from "picomatch";
5
+ import { globalLinkTransform, generateAutoLinkMappings } from "./github.link-transform.js";
6
+ import { getLatestCommitInfo, loadImportState, createConfigId } from "./github.dryrun.js";
7
+ import { INVALID_SERVICE_RESPONSE, INVALID_STRING_ERROR, INVALID_URL_ERROR, } from "./github.constants.js";
8
+ /**
9
+ * Generates a unique identifier from a file path by removing the extension
10
+ * @param filePath - The file path to generate ID from
11
+ * @return {string} The generated identifier as a string with extension removed
12
+ * @internal
13
+ */
14
+ export function generateId(filePath) {
15
+ let id = filePath;
16
+ // Remove file extension for ID generation
17
+ const lastDotIndex = id.lastIndexOf('.');
18
+ if (lastDotIndex > 0) {
19
+ id = id.substring(0, lastDotIndex);
20
+ }
21
+ return id;
22
+ }
23
+ /**
24
+ * Applies path mapping logic to get the final filename for a file
25
+ *
26
+ * Supports two types of path mappings:
27
+ * - **File mapping**: Exact file path match (e.g., 'docs/README.md' -> 'docs/overview.md')
28
+ * - **Folder mapping**: Folder path with trailing slash (e.g., 'docs/capabilities/' -> 'docs/')
29
+ *
30
+ * @param filePath - Original source file path
31
+ * @param matchedPattern - The pattern that matched this file
32
+ * @param options - Import options containing path mappings
33
+ * @returns Final filename after applying path mapping logic
34
+ * @internal
35
+ */
36
+ export function applyRename(filePath, matchedPattern, options) {
37
+ if (options?.includes && matchedPattern && matchedPattern.index < options.includes.length) {
38
+ const includePattern = options.includes[matchedPattern.index];
39
+ if (includePattern.pathMappings) {
40
+ // First check for exact file match (current behavior - backwards compatible)
41
+ if (includePattern.pathMappings[filePath]) {
42
+ const mappingValue = includePattern.pathMappings[filePath];
43
+ return typeof mappingValue === 'string' ? mappingValue : mappingValue.target;
44
+ }
45
+ // Then check for folder-to-folder mappings
46
+ for (const [sourceFolder, mappingValue] of Object.entries(includePattern.pathMappings)) {
47
+ // Check if this is a folder mapping (ends with /) and file is within it
48
+ if (sourceFolder.endsWith('/') && filePath.startsWith(sourceFolder)) {
49
+ // Replace the source folder path with target folder path
50
+ const targetFolder = typeof mappingValue === 'string' ? mappingValue : mappingValue.target;
51
+ const relativePath = filePath.slice(sourceFolder.length);
52
+ return path.posix.join(targetFolder, relativePath);
53
+ }
54
+ }
55
+ }
56
+ }
57
+ // Return original filename if no path mapping found
58
+ return basename(filePath);
59
+ }
60
+ /**
61
+ * Generates a local file path based on the matched pattern and file path
62
+ * @param filePath - The original file path from the repository
63
+ * @param matchedPattern - The pattern that matched this file (or null if no includes specified)
64
+ * @param options - Import options containing includes patterns for path mapping lookups
65
+ * @return {string} The local file path where this content should be stored
66
+ * @internal
67
+ */
68
+ export function generatePath(filePath, matchedPattern, options) {
69
+ if (matchedPattern) {
70
+ // Extract the directory part from the pattern (before any glob wildcards)
71
+ const pattern = matchedPattern.pattern;
72
+ const beforeGlob = pattern.split(/[*?{]/)[0];
73
+ // Remove the pattern prefix from the file path to get the relative path
74
+ let relativePath = filePath;
75
+ if (beforeGlob && filePath.startsWith(beforeGlob)) {
76
+ relativePath = filePath.substring(beforeGlob.length);
77
+ // Remove leading slash if present
78
+ if (relativePath.startsWith('/')) {
79
+ relativePath = relativePath.substring(1);
80
+ }
81
+ }
82
+ // If no relative path remains, use just the filename
83
+ if (!relativePath) {
84
+ relativePath = basename(filePath);
85
+ }
86
+ // Apply path mapping logic
87
+ const finalFilename = applyRename(filePath, matchedPattern, options);
88
+ // Always apply path mapping if applyRename returned something different from the original basename
89
+ // OR if there are pathMappings configured (since empty string mappings might return same basename)
90
+ const hasPathMappings = options?.includes?.[matchedPattern.index]?.pathMappings &&
91
+ Object.keys(options.includes[matchedPattern.index].pathMappings).length > 0;
92
+ if (finalFilename !== basename(filePath) || hasPathMappings) {
93
+ // Check if applyRename returned a full path (contains path separators) or just a filename
94
+ if (finalFilename.includes('/') || finalFilename.includes('\\')) {
95
+ // applyRename returned a full relative path - need to extract relative part
96
+ // Remove the pattern prefix to get the relative path within the pattern context
97
+ const beforeGlob = pattern.split(/[*?{]/)[0];
98
+ if (beforeGlob && finalFilename.startsWith(beforeGlob)) {
99
+ relativePath = finalFilename.substring(beforeGlob.length);
100
+ // Remove leading slash if present
101
+ if (relativePath.startsWith('/')) {
102
+ relativePath = relativePath.substring(1);
103
+ }
104
+ }
105
+ else {
106
+ relativePath = finalFilename;
107
+ }
108
+ }
109
+ else {
110
+ // applyRename returned just a filename
111
+ // If the filename is different due to pathMapping, use it directly
112
+ // This handles cases where pathMappings flatten directory structures
113
+ relativePath = finalFilename;
114
+ }
115
+ }
116
+ return join(matchedPattern.basePath, relativePath);
117
+ }
118
+ // Should not happen since we always use includes
119
+ throw new Error("No matched pattern provided - includes are required");
120
+ }
121
+ /**
122
+ * Synchronizes a file by ensuring the target directory exists and then writing the specified content to the file at the given path.
123
+ *
124
+ * @param {string} path - The path of the file to synchronize, including its directory and filename.
125
+ * @param {string} content - The content to write into the file.
126
+ * @return {Promise<void>} - A promise that resolves when the file has been successfully written.
127
+ * @internal
128
+ */
129
+ export async function syncFile(path, content) {
130
+ const dir = path.substring(0, path.lastIndexOf("/"));
131
+ // Ensure the directory exists
132
+ if (dir && !existsSync(dir)) {
133
+ await fs.mkdir(dir, { recursive: true });
134
+ }
135
+ // Write the file to the filesystem and store
136
+ await fs.writeFile(path, content, "utf-8");
137
+ }
138
+ /**
139
+ * Default asset patterns for common image and media file types
140
+ * @internal
141
+ */
142
+ const DEFAULT_ASSET_PATTERNS = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.bmp'];
143
+ /**
144
+ * Checks if a file path should be included and returns the matching pattern
145
+ * @param filePath - The file path to check (relative to the repository root)
146
+ * @param options - Import options containing includes patterns
147
+ * @returns Object with include status and matched pattern, or null if not included
148
+ * @internal
149
+ */
150
+ export function shouldIncludeFile(filePath, options) {
151
+ const { includes } = options;
152
+ // If no include patterns specified, include all files
153
+ if (!includes || includes.length === 0) {
154
+ return { included: true, matchedPattern: null };
155
+ }
156
+ // Check each include pattern to find a match
157
+ for (let i = 0; i < includes.length; i++) {
158
+ const includePattern = includes[i];
159
+ const matcher = picomatch(includePattern.pattern);
160
+ if (matcher(filePath)) {
161
+ return {
162
+ included: true,
163
+ matchedPattern: {
164
+ pattern: includePattern.pattern,
165
+ basePath: includePattern.basePath,
166
+ index: i
167
+ }
168
+ };
169
+ }
170
+ }
171
+ // No patterns matched
172
+ return { included: false, matchedPattern: null };
173
+ }
174
+ /**
175
+ * Detects asset references in markdown content using regex patterns
176
+ * @param content - The markdown content to parse
177
+ * @param assetPatterns - File extensions to treat as assets
178
+ * @returns Array of detected asset paths
179
+ * @internal
180
+ */
181
+ export function detectAssets(content, assetPatterns = DEFAULT_ASSET_PATTERNS) {
182
+ const assets = [];
183
+ const patterns = assetPatterns.map(ext => ext.toLowerCase());
184
+ // Match markdown images: ![alt](path)
185
+ const imageRegex = /!\[[^\]]*\]\(([^)]+)\)/g;
186
+ let match;
187
+ while ((match = imageRegex.exec(content)) !== null) {
188
+ const assetPath = match[1];
189
+ // Only include relative paths and assets matching our patterns
190
+ if (assetPath.startsWith('./') || assetPath.startsWith('../') || !assetPath.includes('://')) {
191
+ const ext = extname(assetPath).toLowerCase();
192
+ if (patterns.includes(ext)) {
193
+ assets.push(assetPath);
194
+ }
195
+ }
196
+ }
197
+ // Match HTML img tags: <img src="path">
198
+ const htmlImgRegex = /<img[^>]+src\s*=\s*["']([^"']+)["'][^>]*>/gi;
199
+ while ((match = htmlImgRegex.exec(content)) !== null) {
200
+ const assetPath = match[1];
201
+ if (assetPath.startsWith('./') || assetPath.startsWith('../') || !assetPath.includes('://')) {
202
+ const ext = extname(assetPath).toLowerCase();
203
+ if (patterns.includes(ext)) {
204
+ assets.push(assetPath);
205
+ }
206
+ }
207
+ }
208
+ return [...new Set(assets)]; // Remove duplicates
209
+ }
210
+ /**
211
+ * Downloads an asset from GitHub and saves it locally
212
+ * @param octokit - GitHub API client
213
+ * @param owner - Repository owner
214
+ * @param repo - Repository name
215
+ * @param ref - Git reference
216
+ * @param assetPath - Path to the asset in the repository
217
+ * @param localPath - Local path where the asset should be saved
218
+ * @param signal - Abort signal for cancellation
219
+ * @returns Promise that resolves when the asset is downloaded
220
+ * @internal
221
+ */
222
+ export async function downloadAsset(octokit, owner, repo, ref, assetPath, localPath, signal) {
223
+ try {
224
+ const { data } = await octokit.rest.repos.getContent({
225
+ owner,
226
+ repo,
227
+ path: assetPath,
228
+ ref,
229
+ request: { signal },
230
+ });
231
+ if (Array.isArray(data) || data.type !== 'file' || !data.download_url) {
232
+ throw new Error(`Asset ${assetPath} is not a valid file (type: ${data.type}, downloadUrl: ${data.download_url})`);
233
+ }
234
+ const response = await fetch(data.download_url, { signal });
235
+ if (!response.ok) {
236
+ throw new Error(`Failed to download asset: ${response.status} ${response.statusText}`);
237
+ }
238
+ const buffer = await response.arrayBuffer();
239
+ const dir = dirname(localPath);
240
+ if (!existsSync(dir)) {
241
+ await fs.mkdir(dir, { recursive: true });
242
+ }
243
+ await fs.writeFile(localPath, new Uint8Array(buffer));
244
+ }
245
+ catch (error) {
246
+ if (error.status === 404) {
247
+ throw new Error(`Asset not found: ${assetPath}`);
248
+ }
249
+ throw error;
250
+ }
251
+ }
252
+ /**
253
+ * Transforms asset references in markdown content to use local paths
254
+ * @param content - The markdown content to transform
255
+ * @param assetMap - Map of original asset paths to new local paths
256
+ * @returns Transformed content with updated asset references
257
+ * @internal
258
+ */
259
+ export function transformAssetReferences(content, assetMap) {
260
+ let transformedContent = content;
261
+ for (const [originalPath, newPath] of assetMap) {
262
+ // Transform markdown images
263
+ const imageRegex = new RegExp(`(!)\\[([^\\]]*)\\]\\(\\s*${escapeRegExp(originalPath)}\\s*\\)`, 'g');
264
+ transformedContent = transformedContent.replace(imageRegex, `$1[$2](${newPath})`);
265
+ // Transform HTML img tags
266
+ const htmlRegex = new RegExp(`(<img[^>]+src\\s*=\\s*["'])${escapeRegExp(originalPath)}(["'][^>]*>)`, 'gi');
267
+ transformedContent = transformedContent.replace(htmlRegex, `$1${newPath}$2`);
268
+ }
269
+ return transformedContent;
270
+ }
271
+ /**
272
+ * Escapes special regex characters in a string
273
+ * @internal
274
+ */
275
+ function escapeRegExp(string) {
276
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
277
+ }
278
+ /**
279
+ * Processes assets in markdown content by detecting, downloading, and transforming references
280
+ * @param content - The markdown content to process
281
+ * @param options - Configuration options including asset settings
282
+ * @param octokit - GitHub API client
283
+ * @param signal - Abort signal for cancellation
284
+ * @returns Promise that resolves to transformed content
285
+ * @internal
286
+ */
287
+ async function processAssets(content, filePath, options, octokit, logger, signal) {
288
+ const { owner, repo, ref = 'main', assetsPath, assetsBaseUrl, assetPatterns } = options;
289
+ logger.verbose(`🖼️ Processing assets for ${filePath}`);
290
+ logger.debug(` assetsPath: ${assetsPath}`);
291
+ logger.debug(` assetsBaseUrl: ${assetsBaseUrl}`);
292
+ if (!assetsPath || !assetsBaseUrl) {
293
+ logger.verbose(` ⏭️ Skipping asset processing - missing assetsPath or assetsBaseUrl`);
294
+ return { content, assetsDownloaded: 0, assetsCached: 0 };
295
+ }
296
+ // Detect assets in the content
297
+ const detectedAssets = detectAssets(content, assetPatterns);
298
+ logger.verbose(` 📸 Detected ${detectedAssets.length} assets`);
299
+ if (detectedAssets.length > 0) {
300
+ logger.debug(` Assets: ${detectedAssets.join(', ')}`);
301
+ }
302
+ if (detectedAssets.length === 0) {
303
+ return { content, assetsDownloaded: 0, assetsCached: 0 };
304
+ }
305
+ const assetMap = new Map();
306
+ let assetsDownloaded = 0;
307
+ let assetsCached = 0;
308
+ // Process each detected asset
309
+ await Promise.all(detectedAssets.map(async (assetPath) => {
310
+ logger.logAssetProcessing("Processing", assetPath);
311
+ try {
312
+ // Resolve the asset path relative to the current markdown file
313
+ const resolvedAssetPath = resolveAssetPath(filePath, assetPath);
314
+ logger.debug(` 🔗 Resolved path: ${resolvedAssetPath}`);
315
+ // Generate unique filename to avoid conflicts
316
+ const originalFilename = basename(assetPath);
317
+ const ext = extname(originalFilename);
318
+ const nameWithoutExt = basename(originalFilename, ext);
319
+ const uniqueFilename = `${nameWithoutExt}-${Date.now()}${ext}`;
320
+ const localPath = join(assetsPath, uniqueFilename);
321
+ logger.debug(` 💾 Local path: ${localPath}`);
322
+ // Check if asset already exists (simple cache check)
323
+ if (existsSync(localPath)) {
324
+ logger.logAssetProcessing("Cached", assetPath);
325
+ assetsCached++;
326
+ }
327
+ else {
328
+ // Download the asset
329
+ logger.logAssetProcessing("Downloading", assetPath, `from ${owner}/${repo}@${ref}:${resolvedAssetPath}`);
330
+ await downloadAsset(octokit, owner, repo, ref, resolvedAssetPath, localPath, signal);
331
+ logger.logAssetProcessing("Downloaded", assetPath);
332
+ assetsDownloaded++;
333
+ }
334
+ // Generate URL for the transformed reference
335
+ const assetUrl = `${assetsBaseUrl}/${uniqueFilename}`.replace(/\/+/g, '/');
336
+ logger.debug(` 🔄 Transform: ${assetPath} -> ${assetUrl}`);
337
+ // Map the transformation
338
+ assetMap.set(assetPath, assetUrl);
339
+ }
340
+ catch (error) {
341
+ logger.warn(` ❌ Failed to process asset ${assetPath}: ${error}`);
342
+ }
343
+ }));
344
+ logger.verbose(` 🗺️ Processed ${assetMap.size} assets: ${assetsDownloaded} downloaded, ${assetsCached} cached`);
345
+ // Transform the content with new asset references
346
+ const transformedContent = transformAssetReferences(content, assetMap);
347
+ return { content: transformedContent, assetsDownloaded, assetsCached };
348
+ }
349
+ /**
350
+ * Resolves an asset path relative to a base path
351
+ * @internal
352
+ */
353
+ function resolveAssetPath(basePath, assetPath) {
354
+ if (assetPath.startsWith('./')) {
355
+ return join(dirname(basePath), assetPath.slice(2));
356
+ }
357
+ else if (assetPath.startsWith('../')) {
358
+ return join(dirname(basePath), assetPath);
359
+ }
360
+ return assetPath;
361
+ }
362
+ /**
363
+ * Synchronizes an entry by fetching its contents, validating its metadata, and storing or rendering it as needed.
364
+ *
365
+ * @param {LoaderContext} context - The loader context containing the required utilities, metadata, and configuration.
366
+ * @param {Object} urls - Object containing URL data.
367
+ * @param {string | URL | null} urls.url - The URL of the entry to fetch. Throws an error if null or invalid.
368
+ * @param {string} urls.editUrl - The URL for editing the entry.
369
+ * @param {RootOptions} options - Configuration settings for processing the entry such as file paths and custom options.
370
+ * @param {any} octokit - GitHub API client for downloading assets.
371
+ * @param {RequestInit} [init] - Optional parameter for customizing the fetch request.
372
+ * @return {Promise<void>} Resolves when the entry has been successfully processed and stored. Throws errors if invalid URL, missing configuration, or other issues occur.
373
+ * @internal
374
+ */
375
+ export async function syncEntry(context, { url, editUrl }, filePath, options, octokit, init = {}) {
376
+ // Exit on null or if the URL is invalid
377
+ if (url === null || (typeof url !== "string" && !(url instanceof URL))) {
378
+ throw new TypeError(INVALID_URL_ERROR);
379
+ }
380
+ // Validate URL
381
+ if (typeof url === "string")
382
+ url = new URL(url);
383
+ const { meta, store, generateDigest, entryTypes, logger, parseData, config } = context;
384
+ function configForFile(file) {
385
+ const ext = file.split(".").at(-1);
386
+ if (!ext) {
387
+ logger.warn(`No extension found for ${file}`);
388
+ return;
389
+ }
390
+ return entryTypes?.get(`.${ext}`);
391
+ }
392
+ // Custom ID, TODO: Allow custom id generators
393
+ let id = generateId(filePath);
394
+ init.headers = getHeaders({
395
+ init: init.headers,
396
+ meta,
397
+ id,
398
+ });
399
+ let res = await fetch(url, init);
400
+ if (res.status === 304) {
401
+ // Only skip if the local file actually exists
402
+ const includeResult = shouldIncludeFile(filePath, options);
403
+ const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
404
+ const fileUrl = pathToFileURL(relativePath);
405
+ if (existsSync(fileURLToPath(fileUrl))) {
406
+ logger.info(`Skipping ${id} as it has not changed`);
407
+ return;
408
+ }
409
+ else {
410
+ logger.info(`File ${id} missing locally, re-fetching despite 304`);
411
+ // File is missing locally, fetch without ETag headers
412
+ const freshInit = { ...init };
413
+ freshInit.headers = new Headers(init.headers);
414
+ freshInit.headers.delete('If-None-Match');
415
+ freshInit.headers.delete('If-Modified-Since');
416
+ res = await fetch(url, freshInit);
417
+ if (!res.ok)
418
+ throw new Error(res.statusText);
419
+ }
420
+ }
421
+ if (!res.ok)
422
+ throw new Error(res.statusText);
423
+ let contents = await res.text();
424
+ const entryType = configForFile(filePath || "tmp.md");
425
+ if (!entryType)
426
+ throw new Error("No entry type found");
427
+ // Process assets FIRST if configuration is provided - before content transforms
428
+ // This ensures asset detection works with original markdown links before they get transformed
429
+ if (options.assetsPath && options.assetsBaseUrl) {
430
+ try {
431
+ // Create a dummy logger for syncEntry since it uses Astro's logger
432
+ const dummyLogger = {
433
+ verbose: (msg) => logger.info(msg),
434
+ debug: (msg) => logger.debug(msg),
435
+ warn: (msg) => logger.warn(msg),
436
+ logAssetProcessing: (action, path, details) => {
437
+ const msg = details ? `Asset ${action}: ${path} - ${details}` : `Asset ${action}: ${path}`;
438
+ logger.info(msg);
439
+ }
440
+ };
441
+ const assetResult = await processAssets(contents, filePath, options, octokit, dummyLogger, init.signal || undefined);
442
+ contents = assetResult.content;
443
+ }
444
+ catch (error) {
445
+ logger.warn(`Asset processing failed for ${id}: ${error.message}`);
446
+ }
447
+ }
448
+ // Apply content transforms if provided - both global and pattern-specific
449
+ // This runs after asset processing so transforms work with processed content
450
+ const includeResultForTransforms = shouldIncludeFile(filePath, options);
451
+ const transformsToApply = [];
452
+ // Add global transforms first
453
+ if (options.transforms && options.transforms.length > 0) {
454
+ transformsToApply.push(...options.transforms);
455
+ }
456
+ // Add pattern-specific transforms
457
+ if (includeResultForTransforms.included && includeResultForTransforms.matchedPattern && options.includes) {
458
+ const matchedInclude = options.includes[includeResultForTransforms.matchedPattern.index];
459
+ if (matchedInclude.transforms && matchedInclude.transforms.length > 0) {
460
+ transformsToApply.push(...matchedInclude.transforms);
461
+ }
462
+ }
463
+ if (transformsToApply.length > 0) {
464
+ const transformContext = {
465
+ id,
466
+ path: filePath,
467
+ options,
468
+ matchedPattern: includeResultForTransforms.included ? includeResultForTransforms.matchedPattern : undefined,
469
+ };
470
+ for (const transform of transformsToApply) {
471
+ try {
472
+ contents = transform(contents, transformContext);
473
+ }
474
+ catch (error) {
475
+ logger.warn(`Transform failed for ${id}: ${error}`);
476
+ }
477
+ }
478
+ }
479
+ const includeResult = shouldIncludeFile(filePath, options);
480
+ const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
481
+ const fileUrl = pathToFileURL(relativePath);
482
+ const { body, data } = await entryType.getEntryInfo({
483
+ contents,
484
+ fileUrl: fileUrl,
485
+ });
486
+ const existingEntry = store.get(id);
487
+ const digest = generateDigest(contents);
488
+ if (existingEntry &&
489
+ existingEntry.digest === digest &&
490
+ existingEntry.filePath) {
491
+ return;
492
+ }
493
+ // Write file to path
494
+ if (!existsSync(fileURLToPath(fileUrl))) {
495
+ logger.verbose(`Writing ${id} to ${fileUrl}`);
496
+ await syncFile(fileURLToPath(fileUrl), contents);
497
+ }
498
+ const parsedData = await parseData({
499
+ id,
500
+ data,
501
+ filePath: fileUrl.toString(),
502
+ });
503
+ if (entryType.getRenderFunction) {
504
+ logger.verbose(`Rendering ${id}`);
505
+ const render = await entryType.getRenderFunction(config);
506
+ let rendered = undefined;
507
+ try {
508
+ rendered = await render?.({
509
+ id,
510
+ data,
511
+ body,
512
+ filePath: fileUrl.toString(),
513
+ digest,
514
+ });
515
+ }
516
+ catch (error) {
517
+ logger.error(`Error rendering ${id}: ${error.message}`);
518
+ }
519
+ store.set({
520
+ id,
521
+ data: parsedData,
522
+ body,
523
+ filePath: relativePath,
524
+ digest,
525
+ rendered,
526
+ });
527
+ }
528
+ else if ("contentModuleTypes" in entryType) {
529
+ store.set({
530
+ id,
531
+ data: parsedData,
532
+ body,
533
+ filePath: relativePath,
534
+ digest,
535
+ deferredRender: true,
536
+ });
537
+ }
538
+ else {
539
+ store.set({ id, data: parsedData, body, filePath: relativePath, digest });
540
+ }
541
+ syncHeaders({
542
+ headers: res.headers,
543
+ meta,
544
+ id,
545
+ });
546
+ }
547
+ /**
548
+ * Converts a given GitHub repository path into a collection entry by fetching the content
549
+ * from the GitHub repository using the provided Octokit instance and options.
550
+ * Handles both files and directories, recursively processing directories if needed.
551
+ * @internal
552
+ */
553
+ export async function toCollectionEntry({ context, octokit, options, signal, force = false, }) {
554
+ const { owner, repo, ref = "main" } = options || {};
555
+ if (typeof repo !== "string" || typeof owner !== "string")
556
+ throw new TypeError(INVALID_STRING_ERROR);
557
+ // Get logger from context - it should be our Logger instance (initialize early)
558
+ const logger = context.logger;
559
+ // Repository-level caching - simple all-or-nothing approach
560
+ const configName = options.name || `${owner}/${repo}`;
561
+ const configId = createConfigId(options);
562
+ if (!force) {
563
+ try {
564
+ const state = await loadImportState(process.cwd());
565
+ const currentState = state.imports[configId];
566
+ if (currentState && currentState.lastCommitSha) {
567
+ logger.debug(`🔍 Checking repository changes for ${configName}...`);
568
+ const latestCommit = await getLatestCommitInfo(octokit, options, signal);
569
+ if (latestCommit && currentState.lastCommitSha === latestCommit.sha) {
570
+ logger.info(`✅ Repository ${configName} unchanged (${latestCommit.sha.slice(0, 7)}) - skipping import`);
571
+ return {
572
+ processed: 0,
573
+ updated: 0,
574
+ unchanged: 0,
575
+ assetsDownloaded: 0,
576
+ assetsCached: 0,
577
+ };
578
+ }
579
+ else if (latestCommit) {
580
+ logger.info(`🔄 Repository ${configName} changed (${currentState.lastCommitSha?.slice(0, 7) || 'unknown'} -> ${latestCommit.sha.slice(0, 7)}) - proceeding with import`);
581
+ }
582
+ }
583
+ else {
584
+ logger.debug(`📥 First time importing ${configName} - no previous state found`);
585
+ }
586
+ }
587
+ catch (error) {
588
+ logger.warn(`Failed to check repository state for ${configName}: ${error instanceof Error ? error.message : String(error)}`);
589
+ // Continue with import if state check fails
590
+ }
591
+ }
592
+ else {
593
+ logger.info(`🔄 Force mode enabled for ${configName} - proceeding with full import`);
594
+ }
595
+ // Get all unique directory prefixes from include patterns to limit scanning
596
+ const directoriesToScan = new Set();
597
+ if (options.includes && options.includes.length > 0) {
598
+ for (const includePattern of options.includes) {
599
+ // Extract directory part from pattern (before any glob wildcards)
600
+ const pattern = includePattern.pattern;
601
+ const beforeGlob = pattern.split(/[*?{]/)[0];
602
+ const dirPart = beforeGlob.includes('/') ? beforeGlob.substring(0, beforeGlob.lastIndexOf('/')) : '';
603
+ directoriesToScan.add(dirPart);
604
+ }
605
+ }
606
+ else {
607
+ // If no includes specified, scan from root
608
+ directoriesToScan.add('');
609
+ }
610
+ // Collect all files first (with content transforms applied)
611
+ const allFiles = [];
612
+ for (const dirPath of directoriesToScan) {
613
+ const files = await collectFilesRecursively(dirPath);
614
+ allFiles.push(...files);
615
+ }
616
+ // Track statistics
617
+ const stats = {
618
+ processed: 0,
619
+ updated: 0,
620
+ unchanged: 0,
621
+ assetsDownloaded: 0,
622
+ assetsCached: 0,
623
+ };
624
+ // Apply link transformation if configured
625
+ let processedFiles = allFiles;
626
+ if (options.linkTransform) {
627
+ logger.verbose(`Applying link transformation to ${allFiles.length} files`);
628
+ // Generate automatic link mappings from pathMappings
629
+ const autoGeneratedMappings = options.includes
630
+ ? generateAutoLinkMappings(options.includes, options.linkTransform.stripPrefixes)
631
+ : [];
632
+ // Combine auto-generated mappings with user-defined mappings
633
+ const allLinkMappings = [
634
+ ...autoGeneratedMappings,
635
+ ...(options.linkTransform.linkMappings || [])
636
+ ];
637
+ logger.debug(`Generated ${autoGeneratedMappings.length} automatic link mappings from pathMappings`);
638
+ processedFiles = globalLinkTransform(allFiles, {
639
+ stripPrefixes: options.linkTransform.stripPrefixes,
640
+ customHandlers: options.linkTransform.customHandlers,
641
+ linkMappings: allLinkMappings,
642
+ logger,
643
+ });
644
+ }
645
+ // Now store all processed files
646
+ stats.processed = processedFiles.length;
647
+ for (const file of processedFiles) {
648
+ logger.logFileProcessing("Storing", file.sourcePath);
649
+ const result = await storeProcessedFile(file, context, options);
650
+ if (result) {
651
+ stats.updated++;
652
+ }
653
+ else {
654
+ stats.unchanged++;
655
+ }
656
+ }
657
+ return stats;
658
+ // Helper function to collect files without storing them
659
+ async function collectFilesRecursively(path) {
660
+ const collectedFiles = [];
661
+ // Fetch the content
662
+ const { data, status } = await octokit.rest.repos.getContent({
663
+ owner,
664
+ repo,
665
+ path,
666
+ ref,
667
+ request: { signal },
668
+ });
669
+ if (status !== 200)
670
+ throw new Error(INVALID_SERVICE_RESPONSE);
671
+ // Handle single file
672
+ if (!Array.isArray(data)) {
673
+ const filePath = data.path;
674
+ if (data.type === "file") {
675
+ const fileData = await collectFileData({ url: data.download_url, editUrl: data.url }, filePath);
676
+ if (fileData) {
677
+ collectedFiles.push(fileData);
678
+ }
679
+ }
680
+ return collectedFiles;
681
+ }
682
+ // Directory listing - process files and recurse into subdirectories
683
+ const filteredEntries = data
684
+ .filter(({ type, path }) => {
685
+ // Always include directories for recursion
686
+ if (type === "dir")
687
+ return true;
688
+ // Apply filtering logic to files
689
+ if (type === "file") {
690
+ return shouldIncludeFile(path, options).included;
691
+ }
692
+ return false;
693
+ });
694
+ for (const { type, path, download_url, url } of filteredEntries) {
695
+ if (type === "dir") {
696
+ // Recurse into subdirectory
697
+ const subDirFiles = await collectFilesRecursively(path);
698
+ collectedFiles.push(...subDirFiles);
699
+ }
700
+ else if (type === "file") {
701
+ // Process file
702
+ const fileData = await collectFileData({ url: download_url, editUrl: url }, path);
703
+ if (fileData) {
704
+ collectedFiles.push(fileData);
705
+ }
706
+ }
707
+ }
708
+ return collectedFiles;
709
+ }
710
+ // Helper function to collect file data with content transforms applied
711
+ async function collectFileData({ url, editUrl }, filePath) {
712
+ if (url === null || typeof url !== "string") {
713
+ return null;
714
+ }
715
+ const urlObj = new URL(url);
716
+ // Determine if file needs renaming and generate appropriate ID
717
+ const includeCheck = shouldIncludeFile(filePath, options);
718
+ const matchedPattern = includeCheck.included ? includeCheck.matchedPattern : null;
719
+ // Check if this file has a path mapping
720
+ const hasPathMapping = matchedPattern &&
721
+ options?.includes &&
722
+ matchedPattern.index < options.includes.length &&
723
+ options.includes[matchedPattern.index].pathMappings &&
724
+ options.includes[matchedPattern.index].pathMappings[filePath];
725
+ // Generate ID based on appropriate path
726
+ const id = hasPathMapping ?
727
+ generateId(generatePath(filePath, matchedPattern, options)) : // Use path-mapped path for ID
728
+ generateId(filePath); // Use original path for ID
729
+ const finalPath = generatePath(filePath, matchedPattern, options);
730
+ let contents;
731
+ logger.logFileProcessing("Fetching", filePath, `from ${urlObj.toString()}`);
732
+ // Download file content
733
+ const init = { signal, headers: getHeaders({ init: {}, meta: context.meta, id }) };
734
+ let res = null;
735
+ // Fetch with retries (simplified version of syncEntry logic)
736
+ for (let attempt = 0; attempt < 3; attempt++) {
737
+ try {
738
+ res = await fetch(urlObj, init);
739
+ if (res.ok)
740
+ break;
741
+ }
742
+ catch (error) {
743
+ if (attempt === 2)
744
+ throw error;
745
+ await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1)));
746
+ }
747
+ }
748
+ if (!res) {
749
+ throw new Error(`No response received for ${urlObj.toString()}`);
750
+ }
751
+ if (res.status === 304) {
752
+ // File not modified, read existing content from disk if it exists
753
+ const includeResult = shouldIncludeFile(filePath, options);
754
+ const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
755
+ const fileUrl = pathToFileURL(relativePath);
756
+ if (existsSync(fileURLToPath(fileUrl))) {
757
+ logger.logFileProcessing("Using cached", filePath, "304 not modified");
758
+ const { promises: fs } = await import('node:fs');
759
+ contents = await fs.readFile(fileURLToPath(fileUrl), 'utf-8');
760
+ }
761
+ else {
762
+ // File is missing locally, re-fetch without cache headers
763
+ logger.logFileProcessing("Re-fetching", filePath, "missing locally despite 304");
764
+ const freshInit = { ...init };
765
+ freshInit.headers = new Headers(init.headers);
766
+ freshInit.headers.delete('If-None-Match');
767
+ freshInit.headers.delete('If-Modified-Since');
768
+ res = await fetch(urlObj, freshInit);
769
+ if (!res.ok) {
770
+ throw new Error(`Failed to fetch file content from ${urlObj.toString()}: ${res.status} ${res.statusText || 'Unknown error'}`);
771
+ }
772
+ contents = await res.text();
773
+ }
774
+ }
775
+ else if (!res.ok) {
776
+ throw new Error(`Failed to fetch file content from ${urlObj.toString()}: ${res.status} ${res.statusText || 'Unknown error'}`);
777
+ }
778
+ else {
779
+ contents = await res.text();
780
+ }
781
+ // Process assets FIRST if configuration is provided
782
+ let fileAssetsDownloaded = 0;
783
+ let fileAssetsCached = 0;
784
+ if (options.assetsPath && options.assetsBaseUrl) {
785
+ try {
786
+ const assetResult = await processAssets(contents, filePath, options, octokit, logger, signal);
787
+ contents = assetResult.content;
788
+ fileAssetsDownloaded = assetResult.assetsDownloaded;
789
+ fileAssetsCached = assetResult.assetsCached;
790
+ }
791
+ catch (error) {
792
+ logger.warn(`Asset processing failed for ${id}: ${error instanceof Error ? error.message : String(error)}`);
793
+ }
794
+ }
795
+ // Apply content transforms
796
+ const includeResult = shouldIncludeFile(filePath, options);
797
+ const transformsToApply = [];
798
+ // Add global transforms first
799
+ if (options.transforms && options.transforms.length > 0) {
800
+ transformsToApply.push(...options.transforms);
801
+ }
802
+ // Add pattern-specific transforms
803
+ if (includeResult.included && includeResult.matchedPattern && options.includes) {
804
+ const matchedInclude = options.includes[includeResult.matchedPattern.index];
805
+ if (matchedInclude.transforms && matchedInclude.transforms.length > 0) {
806
+ transformsToApply.push(...matchedInclude.transforms);
807
+ }
808
+ }
809
+ if (transformsToApply.length > 0) {
810
+ const transformContext = {
811
+ id,
812
+ path: filePath,
813
+ options,
814
+ matchedPattern: includeResult.included ? includeResult.matchedPattern : undefined,
815
+ };
816
+ for (const transform of transformsToApply) {
817
+ try {
818
+ contents = transform(contents, transformContext);
819
+ }
820
+ catch (error) {
821
+ context.logger?.warn(`Transform failed for ${id}: ${error}`);
822
+ }
823
+ }
824
+ }
825
+ // Build link context for this file
826
+ const linkContext = includeResult.included && includeResult.matchedPattern ? {
827
+ sourcePath: filePath,
828
+ targetPath: finalPath,
829
+ basePath: includeResult.matchedPattern.basePath,
830
+ pathMappings: options.includes?.[includeResult.matchedPattern.index]?.pathMappings,
831
+ matchedPattern: includeResult.matchedPattern,
832
+ } : undefined;
833
+ // Use the finalPath we already computed
834
+ return {
835
+ sourcePath: filePath,
836
+ targetPath: finalPath,
837
+ content: contents,
838
+ id,
839
+ linkContext,
840
+ };
841
+ }
842
+ // Helper function to store a processed file
843
+ async function storeProcessedFile(file, context, options) {
844
+ const { store, generateDigest, entryTypes, logger, parseData, config } = context;
845
+ function configForFile(filePath) {
846
+ const ext = filePath.split(".").at(-1);
847
+ if (!ext) {
848
+ logger.warn(`No extension found for ${filePath}`);
849
+ return;
850
+ }
851
+ return entryTypes?.get(`.${ext}`);
852
+ }
853
+ const entryType = configForFile(file.sourcePath || "tmp.md");
854
+ if (!entryType)
855
+ throw new Error("No entry type found");
856
+ const fileUrl = pathToFileURL(file.targetPath);
857
+ const { body, data } = await entryType.getEntryInfo({
858
+ contents: file.content,
859
+ fileUrl: fileUrl,
860
+ });
861
+ // Generate digest for storage (repository-level caching handles change detection)
862
+ const digest = generateDigest(file.content);
863
+ const existingEntry = store.get(file.id);
864
+ if (existingEntry) {
865
+ logger.debug(`🔄 File ${file.id} - updating`);
866
+ }
867
+ else {
868
+ logger.debug(`📄 File ${file.id} - adding`);
869
+ }
870
+ // Write file to disk
871
+ if (!existsSync(fileURLToPath(fileUrl))) {
872
+ logger.verbose(`Writing ${file.id} to ${fileUrl}`);
873
+ await syncFile(fileURLToPath(fileUrl), file.content);
874
+ }
875
+ const parsedData = await parseData({
876
+ id: file.id,
877
+ data,
878
+ filePath: fileUrl.toString(),
879
+ });
880
+ // Store in content store
881
+ if (entryType.getRenderFunction) {
882
+ logger.verbose(`Rendering ${file.id}`);
883
+ const render = await entryType.getRenderFunction(config);
884
+ let rendered = undefined;
885
+ try {
886
+ rendered = await render?.({
887
+ id: file.id,
888
+ data,
889
+ body,
890
+ filePath: fileUrl.toString(),
891
+ digest,
892
+ });
893
+ }
894
+ catch (error) {
895
+ logger.error(`Error rendering ${file.id}: ${error.message}`);
896
+ }
897
+ logger.debug(`🔍 Storing collection entry: ${file.id} (${file.sourcePath} -> ${file.targetPath})`);
898
+ store.set({
899
+ id: file.id,
900
+ data: parsedData,
901
+ body,
902
+ filePath: file.targetPath,
903
+ digest,
904
+ rendered,
905
+ });
906
+ }
907
+ else if ("contentModuleTypes" in entryType) {
908
+ store.set({
909
+ id: file.id,
910
+ data: parsedData,
911
+ body,
912
+ filePath: file.targetPath,
913
+ digest,
914
+ deferredRender: true,
915
+ });
916
+ }
917
+ else {
918
+ store.set({
919
+ id: file.id,
920
+ data: parsedData,
921
+ body,
922
+ filePath: file.targetPath,
923
+ digest
924
+ });
925
+ }
926
+ return { id: file.id, filePath: file.targetPath };
927
+ }
928
+ async function processDirectoryRecursively(path) {
929
+ // Fetch the content
930
+ const { data, status } = await octokit.rest.repos.getContent({
931
+ owner,
932
+ repo,
933
+ path,
934
+ ref,
935
+ request: { signal },
936
+ });
937
+ if (status !== 200)
938
+ throw new Error(INVALID_SERVICE_RESPONSE);
939
+ // Matches for regular files
940
+ if (!Array.isArray(data)) {
941
+ const filePath = data.path;
942
+ switch (data.type) {
943
+ // Return
944
+ case "file":
945
+ return await syncEntry(context, { url: data.download_url, editUrl: data.url }, filePath, options, octokit, { signal });
946
+ default:
947
+ throw new Error("Invalid type");
948
+ }
949
+ }
950
+ // Directory listing with filtering - process sequentially
951
+ const filteredEntries = data
952
+ .filter(({ type, path }) => {
953
+ // Always include directories for recursion
954
+ if (type === "dir")
955
+ return true;
956
+ // Apply filtering logic to files
957
+ if (type === "file") {
958
+ return shouldIncludeFile(path, options).included;
959
+ }
960
+ return false;
961
+ });
962
+ const results = [];
963
+ for (const { type, path, download_url, url } of filteredEntries) {
964
+ switch (type) {
965
+ // Recurse
966
+ case "dir":
967
+ results.push(await processDirectoryRecursively(path));
968
+ break;
969
+ // Return
970
+ case "file":
971
+ results.push(await syncEntry(context, { url: download_url, editUrl: url }, path, options, octokit, { signal }));
972
+ break;
973
+ default:
974
+ throw new Error("Invalid type");
975
+ }
976
+ }
977
+ return results;
978
+ } // End of processDirectoryRecursively function
979
+ }
980
+ /**
981
+ * Get the headers needed to make a conditional request.
982
+ * Uses the etag and last-modified values from the meta store.
983
+ * @internal
984
+ */
985
+ export function getHeaders({ init, meta, id, }) {
986
+ const tag = `${id}-etag`;
987
+ const lastModifiedTag = `${id}-last-modified`;
988
+ const etag = meta.get(tag);
989
+ const lastModified = meta.get(lastModifiedTag);
990
+ const headers = new Headers(init);
991
+ if (etag) {
992
+ headers.set("If-None-Match", etag);
993
+ }
994
+ else if (lastModified) {
995
+ headers.set("If-Modified-Since", lastModified);
996
+ }
997
+ return headers;
998
+ }
999
+ /**
1000
+ * Store the etag or last-modified headers from a response in the meta store.
1001
+ * @internal
1002
+ */
1003
+ export function syncHeaders({ headers, meta, id, }) {
1004
+ const etag = headers.get("etag");
1005
+ const lastModified = headers.get("last-modified");
1006
+ const tag = `${id}-etag`;
1007
+ const lastModifiedTag = `${id}-last-modified`;
1008
+ meta.delete(tag);
1009
+ meta.delete(lastModifiedTag);
1010
+ if (etag) {
1011
+ meta.set(tag, etag);
1012
+ }
1013
+ else if (lastModified) {
1014
+ meta.set(lastModifiedTag, lastModified);
1015
+ }
1016
+ }