@larkiny/astro-github-loader 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1205 @@
1
+ import { existsSync, promises as fs } from "node:fs";
2
+ import { fileURLToPath, pathToFileURL } from "node:url";
3
+ import path, { join, dirname, basename, extname } from "node:path";
4
+ import picomatch from "picomatch";
5
+ import { globalLinkTransform, generateAutoLinkMappings, type ImportedFile } from "./github.link-transform.js";
6
+ import type { Logger } from "./github.logger.js";
7
+ import { getLatestCommitInfo, loadImportState, createConfigId } from "./github.dryrun.js";
8
+
9
+ import {
10
+ INVALID_SERVICE_RESPONSE,
11
+ INVALID_STRING_ERROR,
12
+ INVALID_URL_ERROR,
13
+ } from "./github.constants.js";
14
+
15
+ import type { LoaderContext, CollectionEntryOptions, ImportOptions, RenderedContent, MatchedPattern } from "./github.types.js";
16
+
17
+ export interface ImportStats {
18
+ processed: number;
19
+ updated: number;
20
+ unchanged: number;
21
+ assetsDownloaded?: number;
22
+ assetsCached?: number;
23
+ }
24
+
25
+ /**
26
+ * Generates a unique identifier from a file path by removing the extension
27
+ * @param filePath - The file path to generate ID from
28
+ * @return {string} The generated identifier as a string with extension removed
29
+ * @internal
30
+ */
31
+ export function generateId(filePath: string): string {
32
+ let id = filePath;
33
+
34
+ // Remove file extension for ID generation
35
+ const lastDotIndex = id.lastIndexOf('.');
36
+ if (lastDotIndex > 0) {
37
+ id = id.substring(0, lastDotIndex);
38
+ }
39
+ return id;
40
+ }
41
+
42
+
43
+ /**
44
+ * Applies path mapping logic to get the final filename for a file
45
+ *
46
+ * Supports two types of path mappings:
47
+ * - **File mapping**: Exact file path match (e.g., 'docs/README.md' -> 'docs/overview.md')
48
+ * - **Folder mapping**: Folder path with trailing slash (e.g., 'docs/capabilities/' -> 'docs/')
49
+ *
50
+ * @param filePath - Original source file path
51
+ * @param matchedPattern - The pattern that matched this file
52
+ * @param options - Import options containing path mappings
53
+ * @returns Final filename after applying path mapping logic
54
+ * @internal
55
+ */
56
+ export function applyRename(filePath: string, matchedPattern?: MatchedPattern | null, options?: ImportOptions): string {
57
+ if (options?.includes && matchedPattern && matchedPattern.index < options.includes.length) {
58
+ const includePattern = options.includes[matchedPattern.index];
59
+
60
+ if (includePattern.pathMappings) {
61
+ // First check for exact file match (current behavior - backwards compatible)
62
+ if (includePattern.pathMappings[filePath]) {
63
+ const mappingValue = includePattern.pathMappings[filePath];
64
+ return typeof mappingValue === 'string' ? mappingValue : mappingValue.target;
65
+ }
66
+
67
+ // Then check for folder-to-folder mappings
68
+ for (const [sourceFolder, mappingValue] of Object.entries(includePattern.pathMappings)) {
69
+ // Check if this is a folder mapping (ends with /) and file is within it
70
+ if (sourceFolder.endsWith('/') && filePath.startsWith(sourceFolder)) {
71
+ // Replace the source folder path with target folder path
72
+ const targetFolder = typeof mappingValue === 'string' ? mappingValue : mappingValue.target;
73
+ const relativePath = filePath.slice(sourceFolder.length);
74
+ return path.posix.join(targetFolder, relativePath);
75
+ }
76
+ }
77
+ }
78
+ }
79
+
80
+ // Return original filename if no path mapping found
81
+ return basename(filePath);
82
+ }
83
+
84
+ /**
85
+ * Generates a local file path based on the matched pattern and file path
86
+ * @param filePath - The original file path from the repository
87
+ * @param matchedPattern - The pattern that matched this file (or null if no includes specified)
88
+ * @param options - Import options containing includes patterns for path mapping lookups
89
+ * @return {string} The local file path where this content should be stored
90
+ * @internal
91
+ */
92
+ export function generatePath(filePath: string, matchedPattern?: MatchedPattern | null, options?: ImportOptions): string {
93
+ if (matchedPattern) {
94
+ // Extract the directory part from the pattern (before any glob wildcards)
95
+ const pattern = matchedPattern.pattern;
96
+ const beforeGlob = pattern.split(/[*?{]/)[0];
97
+
98
+ // Remove the pattern prefix from the file path to get the relative path
99
+ let relativePath = filePath;
100
+ if (beforeGlob && filePath.startsWith(beforeGlob)) {
101
+ relativePath = filePath.substring(beforeGlob.length);
102
+ // Remove leading slash if present
103
+ if (relativePath.startsWith('/')) {
104
+ relativePath = relativePath.substring(1);
105
+ }
106
+ }
107
+
108
+ // If no relative path remains, use just the filename
109
+ if (!relativePath) {
110
+ relativePath = basename(filePath);
111
+ }
112
+
113
+ // Apply path mapping logic
114
+ const finalFilename = applyRename(filePath, matchedPattern, options);
115
+ // Always apply path mapping if applyRename returned something different from the original basename
116
+ // OR if there are pathMappings configured (since empty string mappings might return same basename)
117
+ const hasPathMappings = options?.includes?.[matchedPattern.index]?.pathMappings &&
118
+ Object.keys(options.includes[matchedPattern.index].pathMappings!).length > 0;
119
+ if (finalFilename !== basename(filePath) || hasPathMappings) {
120
+ // Check if applyRename returned a full path (contains path separators) or just a filename
121
+ if (finalFilename.includes('/') || finalFilename.includes('\\')) {
122
+ // applyRename returned a full relative path - need to extract relative part
123
+ // Remove the pattern prefix to get the relative path within the pattern context
124
+ const beforeGlob = pattern.split(/[*?{]/)[0];
125
+ if (beforeGlob && finalFilename.startsWith(beforeGlob)) {
126
+ relativePath = finalFilename.substring(beforeGlob.length);
127
+ // Remove leading slash if present
128
+ if (relativePath.startsWith('/')) {
129
+ relativePath = relativePath.substring(1);
130
+ }
131
+ } else {
132
+ relativePath = finalFilename;
133
+ }
134
+ } else {
135
+ // applyRename returned just a filename
136
+ // If the filename is different due to pathMapping, use it directly
137
+ // This handles cases where pathMappings flatten directory structures
138
+ relativePath = finalFilename;
139
+ }
140
+ }
141
+
142
+ return join(matchedPattern.basePath, relativePath);
143
+ }
144
+
145
+ // Should not happen since we always use includes
146
+ throw new Error("No matched pattern provided - includes are required");
147
+ }
148
+
149
+ /**
150
+ * Synchronizes a file by ensuring the target directory exists and then writing the specified content to the file at the given path.
151
+ *
152
+ * @param {string} path - The path of the file to synchronize, including its directory and filename.
153
+ * @param {string} content - The content to write into the file.
154
+ * @return {Promise<void>} - A promise that resolves when the file has been successfully written.
155
+ * @internal
156
+ */
157
+ export async function syncFile(path: string, content: string) {
158
+ const dir = path.substring(0, path.lastIndexOf("/"));
159
+
160
+ // Ensure the directory exists
161
+ if (dir && !existsSync(dir)) {
162
+ await fs.mkdir(dir, { recursive: true });
163
+ }
164
+
165
+ // Write the file to the filesystem and store
166
+ await fs.writeFile(path, content, "utf-8");
167
+ }
168
+
169
+ /**
170
+ * Default asset patterns for common image and media file types
171
+ * @internal
172
+ */
173
+ const DEFAULT_ASSET_PATTERNS = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.bmp'];
174
+
175
+ /**
176
+ * Checks if a file path should be included and returns the matching pattern
177
+ * @param filePath - The file path to check (relative to the repository root)
178
+ * @param options - Import options containing includes patterns
179
+ * @returns Object with include status and matched pattern, or null if not included
180
+ * @internal
181
+ */
182
+ export function shouldIncludeFile(filePath: string, options: ImportOptions): { included: true; matchedPattern: MatchedPattern | null } | { included: false; matchedPattern: null } {
183
+ const { includes } = options;
184
+
185
+ // If no include patterns specified, include all files
186
+ if (!includes || includes.length === 0) {
187
+ return { included: true, matchedPattern: null };
188
+ }
189
+
190
+ // Check each include pattern to find a match
191
+ for (let i = 0; i < includes.length; i++) {
192
+ const includePattern = includes[i];
193
+ const matcher = picomatch(includePattern.pattern);
194
+
195
+ if (matcher(filePath)) {
196
+ return {
197
+ included: true,
198
+ matchedPattern: {
199
+ pattern: includePattern.pattern,
200
+ basePath: includePattern.basePath,
201
+ index: i
202
+ }
203
+ };
204
+ }
205
+ }
206
+
207
+ // No patterns matched
208
+ return { included: false, matchedPattern: null };
209
+ }
210
+
211
+ /**
212
+ * Detects asset references in markdown content using regex patterns
213
+ * @param content - The markdown content to parse
214
+ * @param assetPatterns - File extensions to treat as assets
215
+ * @returns Array of detected asset paths
216
+ * @internal
217
+ */
218
+ export function detectAssets(content: string, assetPatterns: string[] = DEFAULT_ASSET_PATTERNS): string[] {
219
+ const assets: string[] = [];
220
+ const patterns = assetPatterns.map(ext => ext.toLowerCase());
221
+
222
+ // Match markdown images: ![alt](path)
223
+ const imageRegex = /!\[[^\]]*\]\(([^)]+)\)/g;
224
+ let match;
225
+
226
+ while ((match = imageRegex.exec(content)) !== null) {
227
+ const assetPath = match[1];
228
+ // Only include relative paths and assets matching our patterns
229
+ if (assetPath.startsWith('./') || assetPath.startsWith('../') || !assetPath.includes('://')) {
230
+ const ext = extname(assetPath).toLowerCase();
231
+ if (patterns.includes(ext)) {
232
+ assets.push(assetPath);
233
+ }
234
+ }
235
+ }
236
+
237
+ // Match HTML img tags: <img src="path">
238
+ const htmlImgRegex = /<img[^>]+src\s*=\s*["']([^"']+)["'][^>]*>/gi;
239
+ while ((match = htmlImgRegex.exec(content)) !== null) {
240
+ const assetPath = match[1];
241
+ if (assetPath.startsWith('./') || assetPath.startsWith('../') || !assetPath.includes('://')) {
242
+ const ext = extname(assetPath).toLowerCase();
243
+ if (patterns.includes(ext)) {
244
+ assets.push(assetPath);
245
+ }
246
+ }
247
+ }
248
+
249
+ return [...new Set(assets)]; // Remove duplicates
250
+ }
251
+
252
+ /**
253
+ * Downloads an asset from GitHub and saves it locally
254
+ * @param octokit - GitHub API client
255
+ * @param owner - Repository owner
256
+ * @param repo - Repository name
257
+ * @param ref - Git reference
258
+ * @param assetPath - Path to the asset in the repository
259
+ * @param localPath - Local path where the asset should be saved
260
+ * @param signal - Abort signal for cancellation
261
+ * @returns Promise that resolves when the asset is downloaded
262
+ * @internal
263
+ */
264
+ export async function downloadAsset(
265
+ octokit: any,
266
+ owner: string,
267
+ repo: string,
268
+ ref: string,
269
+ assetPath: string,
270
+ localPath: string,
271
+ signal?: AbortSignal
272
+ ): Promise<void> {
273
+ try {
274
+ const { data } = await octokit.rest.repos.getContent({
275
+ owner,
276
+ repo,
277
+ path: assetPath,
278
+ ref,
279
+ request: { signal },
280
+ });
281
+
282
+ if (Array.isArray(data) || data.type !== 'file' || !data.download_url) {
283
+ throw new Error(`Asset ${assetPath} is not a valid file (type: ${data.type}, downloadUrl: ${data.download_url})`);
284
+ }
285
+
286
+ const response = await fetch(data.download_url, { signal });
287
+ if (!response.ok) {
288
+ throw new Error(`Failed to download asset: ${response.status} ${response.statusText}`);
289
+ }
290
+
291
+ const buffer = await response.arrayBuffer();
292
+ const dir = dirname(localPath);
293
+
294
+ if (!existsSync(dir)) {
295
+ await fs.mkdir(dir, { recursive: true });
296
+ }
297
+
298
+ await fs.writeFile(localPath, new Uint8Array(buffer));
299
+ } catch (error: any) {
300
+ if (error.status === 404) {
301
+ throw new Error(`Asset not found: ${assetPath}`);
302
+ }
303
+ throw error;
304
+ }
305
+ }
306
+
307
+ /**
308
+ * Transforms asset references in markdown content to use local paths
309
+ * @param content - The markdown content to transform
310
+ * @param assetMap - Map of original asset paths to new local paths
311
+ * @returns Transformed content with updated asset references
312
+ * @internal
313
+ */
314
+ export function transformAssetReferences(content: string, assetMap: Map<string, string>): string {
315
+ let transformedContent = content;
316
+
317
+ for (const [originalPath, newPath] of assetMap) {
318
+ // Transform markdown images
319
+ const imageRegex = new RegExp(`(!)\\[([^\\]]*)\\]\\(\\s*${escapeRegExp(originalPath)}\\s*\\)`, 'g');
320
+ transformedContent = transformedContent.replace(imageRegex, `$1[$2](${newPath})`);
321
+
322
+ // Transform HTML img tags
323
+ const htmlRegex = new RegExp(`(<img[^>]+src\\s*=\\s*["'])${escapeRegExp(originalPath)}(["'][^>]*>)`, 'gi');
324
+ transformedContent = transformedContent.replace(htmlRegex, `$1${newPath}$2`);
325
+ }
326
+
327
+ return transformedContent;
328
+ }
329
+
330
+ /**
331
+ * Escapes special regex characters in a string
332
+ * @internal
333
+ */
334
+ function escapeRegExp(string: string): string {
335
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
336
+ }
337
+
338
+ /**
339
+ * Processes assets in markdown content by detecting, downloading, and transforming references
340
+ * @param content - The markdown content to process
341
+ * @param options - Configuration options including asset settings
342
+ * @param octokit - GitHub API client
343
+ * @param signal - Abort signal for cancellation
344
+ * @returns Promise that resolves to transformed content
345
+ * @internal
346
+ */
347
+ async function processAssets(
348
+ content: string,
349
+ filePath: string,
350
+ options: ImportOptions,
351
+ octokit: any,
352
+ logger: Logger,
353
+ signal?: AbortSignal
354
+ ): Promise<{ content: string; assetsDownloaded: number; assetsCached: number }> {
355
+ const { owner, repo, ref = 'main', assetsPath, assetsBaseUrl, assetPatterns } = options;
356
+
357
+ logger.verbose(`🖼️ Processing assets for ${filePath}`);
358
+ logger.debug(` assetsPath: ${assetsPath}`);
359
+ logger.debug(` assetsBaseUrl: ${assetsBaseUrl}`);
360
+
361
+ if (!assetsPath || !assetsBaseUrl) {
362
+ logger.verbose(` ⏭️ Skipping asset processing - missing assetsPath or assetsBaseUrl`);
363
+ return { content, assetsDownloaded: 0, assetsCached: 0 };
364
+ }
365
+
366
+ // Detect assets in the content
367
+ const detectedAssets = detectAssets(content, assetPatterns);
368
+ logger.verbose(` 📸 Detected ${detectedAssets.length} assets`);
369
+ if (detectedAssets.length > 0) {
370
+ logger.debug(` Assets: ${detectedAssets.join(', ')}`);
371
+ }
372
+
373
+ if (detectedAssets.length === 0) {
374
+ return { content, assetsDownloaded: 0, assetsCached: 0 };
375
+ }
376
+
377
+ const assetMap = new Map<string, string>();
378
+ let assetsDownloaded = 0;
379
+ let assetsCached = 0;
380
+
381
+ // Process each detected asset
382
+ await Promise.all(detectedAssets.map(async (assetPath) => {
383
+ logger.logAssetProcessing("Processing", assetPath);
384
+ try {
385
+ // Resolve the asset path relative to the current markdown file
386
+ const resolvedAssetPath = resolveAssetPath(filePath, assetPath);
387
+ logger.debug(` 🔗 Resolved path: ${resolvedAssetPath}`);
388
+
389
+ // Generate unique filename to avoid conflicts
390
+ const originalFilename = basename(assetPath);
391
+ const ext = extname(originalFilename);
392
+ const nameWithoutExt = basename(originalFilename, ext);
393
+ const uniqueFilename = `${nameWithoutExt}-${Date.now()}${ext}`;
394
+ const localPath = join(assetsPath, uniqueFilename);
395
+ logger.debug(` 💾 Local path: ${localPath}`);
396
+
397
+ // Check if asset already exists (simple cache check)
398
+ if (existsSync(localPath)) {
399
+ logger.logAssetProcessing("Cached", assetPath);
400
+ assetsCached++;
401
+ } else {
402
+ // Download the asset
403
+ logger.logAssetProcessing("Downloading", assetPath, `from ${owner}/${repo}@${ref}:${resolvedAssetPath}`);
404
+ await downloadAsset(octokit, owner, repo, ref, resolvedAssetPath, localPath, signal);
405
+ logger.logAssetProcessing("Downloaded", assetPath);
406
+ assetsDownloaded++;
407
+ }
408
+
409
+ // Generate URL for the transformed reference
410
+ const assetUrl = `${assetsBaseUrl}/${uniqueFilename}`.replace(/\/+/g, '/');
411
+ logger.debug(` 🔄 Transform: ${assetPath} -> ${assetUrl}`);
412
+
413
+ // Map the transformation
414
+ assetMap.set(assetPath, assetUrl);
415
+ } catch (error) {
416
+ logger.warn(` ❌ Failed to process asset ${assetPath}: ${error}`);
417
+ }
418
+ }));
419
+
420
+ logger.verbose(` 🗺️ Processed ${assetMap.size} assets: ${assetsDownloaded} downloaded, ${assetsCached} cached`);
421
+
422
+ // Transform the content with new asset references
423
+ const transformedContent = transformAssetReferences(content, assetMap);
424
+ return { content: transformedContent, assetsDownloaded, assetsCached };
425
+ }
426
+
427
+ /**
428
+ * Resolves an asset path relative to a base path
429
+ * @internal
430
+ */
431
+ function resolveAssetPath(basePath: string, assetPath: string): string {
432
+ if (assetPath.startsWith('./')) {
433
+ return join(dirname(basePath), assetPath.slice(2));
434
+ } else if (assetPath.startsWith('../')) {
435
+ return join(dirname(basePath), assetPath);
436
+ }
437
+ return assetPath;
438
+ }
439
+
440
+ /**
441
+ * Synchronizes an entry by fetching its contents, validating its metadata, and storing or rendering it as needed.
442
+ *
443
+ * @param {LoaderContext} context - The loader context containing the required utilities, metadata, and configuration.
444
+ * @param {Object} urls - Object containing URL data.
445
+ * @param {string | URL | null} urls.url - The URL of the entry to fetch. Throws an error if null or invalid.
446
+ * @param {string} urls.editUrl - The URL for editing the entry.
447
+ * @param {RootOptions} options - Configuration settings for processing the entry such as file paths and custom options.
448
+ * @param {any} octokit - GitHub API client for downloading assets.
449
+ * @param {RequestInit} [init] - Optional parameter for customizing the fetch request.
450
+ * @return {Promise<void>} Resolves when the entry has been successfully processed and stored. Throws errors if invalid URL, missing configuration, or other issues occur.
451
+ * @internal
452
+ */
453
+ export async function syncEntry(
454
+ context: LoaderContext,
455
+ { url, editUrl }: { url: string | URL | null; editUrl: string },
456
+ filePath: string,
457
+ options: ImportOptions,
458
+ octokit: any,
459
+ init: RequestInit = {},
460
+ ) {
461
+ // Exit on null or if the URL is invalid
462
+ if (url === null || (typeof url !== "string" && !(url instanceof URL))) {
463
+ throw new TypeError(INVALID_URL_ERROR);
464
+ }
465
+ // Validate URL
466
+ if (typeof url === "string") url = new URL(url);
467
+
468
+ const { meta, store, generateDigest, entryTypes, logger, parseData, config } =
469
+ context;
470
+
471
+ function configForFile(file: string) {
472
+ const ext = file.split(".").at(-1);
473
+ if (!ext) {
474
+ logger.warn(`No extension found for ${file}`);
475
+ return;
476
+ }
477
+ return entryTypes?.get(`.${ext}`);
478
+ }
479
+ // Custom ID, TODO: Allow custom id generators
480
+ let id = generateId(filePath);
481
+
482
+ init.headers = getHeaders({
483
+ init: init.headers,
484
+ meta,
485
+ id,
486
+ });
487
+
488
+ let res = await fetch(url, init);
489
+
490
+ if (res.status === 304) {
491
+ // Only skip if the local file actually exists
492
+ const includeResult = shouldIncludeFile(filePath, options);
493
+ const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
494
+ const fileUrl = pathToFileURL(relativePath);
495
+
496
+ if (existsSync(fileURLToPath(fileUrl))) {
497
+ logger.info(`Skipping ${id} as it has not changed`);
498
+ return;
499
+ } else {
500
+ logger.info(`File ${id} missing locally, re-fetching despite 304`);
501
+ // File is missing locally, fetch without ETag headers
502
+ const freshInit = { ...init };
503
+ freshInit.headers = new Headers(init.headers);
504
+ freshInit.headers.delete('If-None-Match');
505
+ freshInit.headers.delete('If-Modified-Since');
506
+
507
+ res = await fetch(url, freshInit);
508
+ if (!res.ok) throw new Error(res.statusText);
509
+ }
510
+ }
511
+ if (!res.ok) throw new Error(res.statusText);
512
+ let contents = await res.text();
513
+ const entryType = configForFile(filePath || "tmp.md");
514
+ if (!entryType) throw new Error("No entry type found");
515
+
516
+ // Process assets FIRST if configuration is provided - before content transforms
517
+ // This ensures asset detection works with original markdown links before they get transformed
518
+ if (options.assetsPath && options.assetsBaseUrl) {
519
+ try {
520
+ // Create a dummy logger for syncEntry since it uses Astro's logger
521
+ const dummyLogger = {
522
+ verbose: (msg: string) => logger.info(msg),
523
+ debug: (msg: string) => logger.debug(msg),
524
+ warn: (msg: string) => logger.warn(msg),
525
+ logAssetProcessing: (action: string, path: string, details?: string) => {
526
+ const msg = details ? `Asset ${action}: ${path} - ${details}` : `Asset ${action}: ${path}`;
527
+ logger.info(msg);
528
+ }
529
+ };
530
+ const assetResult = await processAssets(contents, filePath, options, octokit, dummyLogger as Logger, init.signal || undefined);
531
+ contents = assetResult.content;
532
+ } catch (error: any) {
533
+ logger.warn(`Asset processing failed for ${id}: ${error.message}`);
534
+ }
535
+ }
536
+
537
+ // Apply content transforms if provided - both global and pattern-specific
538
+ // This runs after asset processing so transforms work with processed content
539
+ const includeResultForTransforms = shouldIncludeFile(filePath, options);
540
+ const transformsToApply: any[] = [];
541
+
542
+ // Add global transforms first
543
+ if (options.transforms && options.transforms.length > 0) {
544
+ transformsToApply.push(...options.transforms);
545
+ }
546
+
547
+ // Add pattern-specific transforms
548
+ if (includeResultForTransforms.included && includeResultForTransforms.matchedPattern && options.includes) {
549
+ const matchedInclude = options.includes[includeResultForTransforms.matchedPattern.index];
550
+ if (matchedInclude.transforms && matchedInclude.transforms.length > 0) {
551
+ transformsToApply.push(...matchedInclude.transforms);
552
+ }
553
+ }
554
+
555
+ if (transformsToApply.length > 0) {
556
+ const transformContext = {
557
+ id,
558
+ path: filePath,
559
+ options,
560
+ matchedPattern: includeResultForTransforms.included ? includeResultForTransforms.matchedPattern : undefined,
561
+ };
562
+
563
+ for (const transform of transformsToApply) {
564
+ try {
565
+ contents = transform(contents, transformContext);
566
+ } catch (error) {
567
+ logger.warn(`Transform failed for ${id}: ${error}`);
568
+ }
569
+ }
570
+ }
571
+
572
+ const includeResult = shouldIncludeFile(filePath, options);
573
+ const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
574
+ const fileUrl = pathToFileURL(relativePath);
575
+ const { body, data } = await entryType.getEntryInfo({
576
+ contents,
577
+ fileUrl: fileUrl,
578
+ });
579
+
580
+ const existingEntry = store.get(id);
581
+
582
+ const digest = generateDigest(contents);
583
+
584
+ if (
585
+ existingEntry &&
586
+ existingEntry.digest === digest &&
587
+ existingEntry.filePath
588
+ ) {
589
+ return;
590
+ }
591
+ // Write file to path
592
+ if (!existsSync(fileURLToPath(fileUrl))) {
593
+ (logger as any).verbose(`Writing ${id} to ${fileUrl}`);
594
+ await syncFile(fileURLToPath(fileUrl), contents);
595
+ }
596
+
597
+ const parsedData = await parseData({
598
+ id,
599
+ data,
600
+ filePath: fileUrl.toString(),
601
+ });
602
+
603
+ if (entryType.getRenderFunction) {
604
+ (logger as any).verbose(`Rendering ${id}`);
605
+ const render = await entryType.getRenderFunction(config);
606
+ let rendered: RenderedContent | undefined = undefined;
607
+ try {
608
+ rendered = await render?.({
609
+ id,
610
+ data,
611
+ body,
612
+ filePath: fileUrl.toString(),
613
+ digest,
614
+ });
615
+ } catch (error: any) {
616
+ logger.error(`Error rendering ${id}: ${error.message}`);
617
+ }
618
+ store.set({
619
+ id,
620
+ data: parsedData,
621
+ body,
622
+ filePath: relativePath,
623
+ digest,
624
+ rendered,
625
+ });
626
+ } else if ("contentModuleTypes" in entryType) {
627
+ store.set({
628
+ id,
629
+ data: parsedData,
630
+ body,
631
+ filePath: relativePath,
632
+ digest,
633
+ deferredRender: true,
634
+ });
635
+ } else {
636
+ store.set({ id, data: parsedData, body, filePath: relativePath, digest });
637
+ }
638
+
639
+ syncHeaders({
640
+ headers: res.headers,
641
+ meta,
642
+ id,
643
+ });
644
+ }
645
+
646
+ /**
647
+ * Converts a given GitHub repository path into a collection entry by fetching the content
648
+ * from the GitHub repository using the provided Octokit instance and options.
649
+ * Handles both files and directories, recursively processing directories if needed.
650
+ * @internal
651
+ */
652
+ export async function toCollectionEntry({
653
+ context,
654
+ octokit,
655
+ options,
656
+ signal,
657
+ force = false,
658
+ }: CollectionEntryOptions): Promise<ImportStats> {
659
+ const { owner, repo, ref = "main" } = options || {};
660
+ if (typeof repo !== "string" || typeof owner !== "string")
661
+ throw new TypeError(INVALID_STRING_ERROR);
662
+
663
+ // Get logger from context - it should be our Logger instance (initialize early)
664
+ const logger = context.logger as unknown as Logger;
665
+
666
+ // Repository-level caching - simple all-or-nothing approach
667
+ const configName = options.name || `${owner}/${repo}`;
668
+ const configId = createConfigId(options);
669
+
670
+ if (!force) {
671
+ try {
672
+ const state = await loadImportState(process.cwd());
673
+ const currentState = state.imports[configId];
674
+
675
+ if (currentState && currentState.lastCommitSha) {
676
+ logger.debug(`🔍 Checking repository changes for ${configName}...`);
677
+ const latestCommit = await getLatestCommitInfo(octokit, options, signal);
678
+
679
+ if (latestCommit && currentState.lastCommitSha === latestCommit.sha) {
680
+ logger.info(`✅ Repository ${configName} unchanged (${latestCommit.sha.slice(0, 7)}) - skipping import`);
681
+ return {
682
+ processed: 0,
683
+ updated: 0,
684
+ unchanged: 0,
685
+ assetsDownloaded: 0,
686
+ assetsCached: 0,
687
+ };
688
+ } else if (latestCommit) {
689
+ logger.info(`🔄 Repository ${configName} changed (${currentState.lastCommitSha?.slice(0, 7) || 'unknown'} -> ${latestCommit.sha.slice(0, 7)}) - proceeding with import`);
690
+ }
691
+ } else {
692
+ logger.debug(`📥 First time importing ${configName} - no previous state found`);
693
+ }
694
+ } catch (error) {
695
+ logger.warn(`Failed to check repository state for ${configName}: ${error instanceof Error ? error.message : String(error)}`);
696
+ // Continue with import if state check fails
697
+ }
698
+ } else {
699
+ logger.info(`🔄 Force mode enabled for ${configName} - proceeding with full import`);
700
+ }
701
+
702
+ // Get all unique directory prefixes from include patterns to limit scanning
703
+ const directoriesToScan = new Set<string>();
704
+ if (options.includes && options.includes.length > 0) {
705
+ for (const includePattern of options.includes) {
706
+ // Extract directory part from pattern (before any glob wildcards)
707
+ const pattern = includePattern.pattern;
708
+ const beforeGlob = pattern.split(/[*?{]/)[0];
709
+ const dirPart = beforeGlob.includes('/') ? beforeGlob.substring(0, beforeGlob.lastIndexOf('/')) : '';
710
+ directoriesToScan.add(dirPart);
711
+ }
712
+ } else {
713
+ // If no includes specified, scan from root
714
+ directoriesToScan.add('');
715
+ }
716
+
717
+ // Collect all files first (with content transforms applied)
718
+ const allFiles: ImportedFile[] = [];
719
+
720
+ for (const dirPath of directoriesToScan) {
721
+ const files = await collectFilesRecursively(dirPath);
722
+ allFiles.push(...files);
723
+ }
724
+
725
+ // Track statistics
726
+ const stats: ImportStats = {
727
+ processed: 0,
728
+ updated: 0,
729
+ unchanged: 0,
730
+ assetsDownloaded: 0,
731
+ assetsCached: 0,
732
+ };
733
+
734
+ // Apply link transformation if configured
735
+ let processedFiles = allFiles;
736
+ if (options.linkTransform) {
737
+ logger.verbose(`Applying link transformation to ${allFiles.length} files`);
738
+
739
+ // Generate automatic link mappings from pathMappings
740
+ const autoGeneratedMappings = options.includes
741
+ ? generateAutoLinkMappings(options.includes, options.linkTransform.stripPrefixes)
742
+ : [];
743
+
744
+ // Combine auto-generated mappings with user-defined mappings
745
+ const allLinkMappings = [
746
+ ...autoGeneratedMappings,
747
+ ...(options.linkTransform.linkMappings || [])
748
+ ];
749
+
750
+ logger.debug(`Generated ${autoGeneratedMappings.length} automatic link mappings from pathMappings`);
751
+
752
+ processedFiles = globalLinkTransform(allFiles, {
753
+ stripPrefixes: options.linkTransform.stripPrefixes,
754
+ customHandlers: options.linkTransform.customHandlers,
755
+ linkMappings: allLinkMappings,
756
+ logger,
757
+ });
758
+ }
759
+
760
+ // Now store all processed files
761
+ stats.processed = processedFiles.length;
762
+ for (const file of processedFiles) {
763
+ logger.logFileProcessing("Storing", file.sourcePath);
764
+ const result = await storeProcessedFile(file, context, options);
765
+ if (result) {
766
+ stats.updated++;
767
+ } else {
768
+ stats.unchanged++;
769
+ }
770
+ }
771
+
772
+ return stats;
773
+
774
+ // Helper function to collect files without storing them
775
+ async function collectFilesRecursively(path: string): Promise<ImportedFile[]> {
776
+ const collectedFiles: ImportedFile[] = [];
777
+
778
+ // Fetch the content
779
+ const { data, status } = await octokit.rest.repos.getContent({
780
+ owner,
781
+ repo,
782
+ path,
783
+ ref,
784
+ request: { signal },
785
+ });
786
+ if (status !== 200) throw new Error(INVALID_SERVICE_RESPONSE);
787
+
788
+ // Handle single file
789
+ if (!Array.isArray(data)) {
790
+ const filePath = data.path;
791
+ if (data.type === "file") {
792
+ const fileData = await collectFileData(
793
+ { url: data.download_url, editUrl: data.url },
794
+ filePath
795
+ );
796
+ if (fileData) {
797
+ collectedFiles.push(fileData);
798
+ }
799
+ }
800
+ return collectedFiles;
801
+ }
802
+
803
+ // Directory listing - process files and recurse into subdirectories
804
+ const filteredEntries = data
805
+ .filter(({ type, path }) => {
806
+ // Always include directories for recursion
807
+ if (type === "dir") return true;
808
+ // Apply filtering logic to files
809
+ if (type === "file") {
810
+ return shouldIncludeFile(path, options).included;
811
+ }
812
+ return false;
813
+ });
814
+
815
+ for (const { type, path, download_url, url } of filteredEntries) {
816
+ if (type === "dir") {
817
+ // Recurse into subdirectory
818
+ const subDirFiles = await collectFilesRecursively(path);
819
+ collectedFiles.push(...subDirFiles);
820
+ } else if (type === "file") {
821
+ // Process file
822
+ const fileData = await collectFileData(
823
+ { url: download_url, editUrl: url },
824
+ path
825
+ );
826
+ if (fileData) {
827
+ collectedFiles.push(fileData);
828
+ }
829
+ }
830
+ }
831
+
832
+ return collectedFiles;
833
+ }
834
+
835
+ // Helper function to collect file data with content transforms applied
836
+ async function collectFileData(
837
+ { url, editUrl }: { url: string | null; editUrl: string },
838
+ filePath: string
839
+ ): Promise<ImportedFile | null> {
840
+ if (url === null || typeof url !== "string") {
841
+ return null;
842
+ }
843
+
844
+ const urlObj = new URL(url);
845
+
846
+ // Determine if file needs renaming and generate appropriate ID
847
+ const includeCheck = shouldIncludeFile(filePath, options);
848
+ const matchedPattern = includeCheck.included ? includeCheck.matchedPattern : null;
849
+
850
+ // Check if this file has a path mapping
851
+ const hasPathMapping = matchedPattern &&
852
+ options?.includes &&
853
+ matchedPattern.index < options.includes.length &&
854
+ options.includes[matchedPattern.index].pathMappings &&
855
+ options.includes[matchedPattern.index].pathMappings![filePath];
856
+
857
+ // Generate ID based on appropriate path
858
+ const id = hasPathMapping ?
859
+ generateId(generatePath(filePath, matchedPattern, options)) : // Use path-mapped path for ID
860
+ generateId(filePath); // Use original path for ID
861
+
862
+ const finalPath = generatePath(filePath, matchedPattern, options);
863
+ let contents: string;
864
+
865
+ logger.logFileProcessing("Fetching", filePath, `from ${urlObj.toString()}`);
866
+
867
+ // Download file content
868
+ const init = { signal, headers: getHeaders({ init: {}, meta: context.meta, id }) };
869
+ let res: Response | null = null;
870
+
871
+ // Fetch with retries (simplified version of syncEntry logic)
872
+ for (let attempt = 0; attempt < 3; attempt++) {
873
+ try {
874
+ res = await fetch(urlObj, init);
875
+ if (res.ok) break;
876
+ } catch (error) {
877
+ if (attempt === 2) throw error;
878
+ await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1)));
879
+ }
880
+ }
881
+
882
+ if (!res) {
883
+ throw new Error(`No response received for ${urlObj.toString()}`);
884
+ }
885
+
886
+ if (res.status === 304) {
887
+ // File not modified, read existing content from disk if it exists
888
+ const includeResult = shouldIncludeFile(filePath, options);
889
+ const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
890
+ const fileUrl = pathToFileURL(relativePath);
891
+
892
+ if (existsSync(fileURLToPath(fileUrl))) {
893
+ logger.logFileProcessing("Using cached", filePath, "304 not modified");
894
+ const { promises: fs } = await import('node:fs');
895
+ contents = await fs.readFile(fileURLToPath(fileUrl), 'utf-8');
896
+ } else {
897
+ // File is missing locally, re-fetch without cache headers
898
+ logger.logFileProcessing("Re-fetching", filePath, "missing locally despite 304");
899
+ const freshInit = { ...init };
900
+ freshInit.headers = new Headers(init.headers);
901
+ freshInit.headers.delete('If-None-Match');
902
+ freshInit.headers.delete('If-Modified-Since');
903
+
904
+ res = await fetch(urlObj, freshInit);
905
+ if (!res.ok) {
906
+ throw new Error(`Failed to fetch file content from ${urlObj.toString()}: ${res.status} ${res.statusText || 'Unknown error'}`);
907
+ }
908
+ contents = await res.text();
909
+ }
910
+ } else if (!res.ok) {
911
+ throw new Error(`Failed to fetch file content from ${urlObj.toString()}: ${res.status} ${res.statusText || 'Unknown error'}`);
912
+ } else {
913
+ contents = await res.text();
914
+ }
915
+
916
+ // Process assets FIRST if configuration is provided
917
+ let fileAssetsDownloaded = 0;
918
+ let fileAssetsCached = 0;
919
+ if (options.assetsPath && options.assetsBaseUrl) {
920
+ try {
921
+ const assetResult = await processAssets(contents, filePath, options, octokit, logger, signal);
922
+ contents = assetResult.content;
923
+ fileAssetsDownloaded = assetResult.assetsDownloaded;
924
+ fileAssetsCached = assetResult.assetsCached;
925
+ } catch (error) {
926
+ logger.warn(`Asset processing failed for ${id}: ${error instanceof Error ? error.message : String(error)}`);
927
+ }
928
+ }
929
+
930
+ // Apply content transforms
931
+ const includeResult = shouldIncludeFile(filePath, options);
932
+ const transformsToApply: any[] = [];
933
+
934
+ // Add global transforms first
935
+ if (options.transforms && options.transforms.length > 0) {
936
+ transformsToApply.push(...options.transforms);
937
+ }
938
+
939
+ // Add pattern-specific transforms
940
+ if (includeResult.included && includeResult.matchedPattern && options.includes) {
941
+ const matchedInclude = options.includes[includeResult.matchedPattern.index];
942
+ if (matchedInclude.transforms && matchedInclude.transforms.length > 0) {
943
+ transformsToApply.push(...matchedInclude.transforms);
944
+ }
945
+ }
946
+
947
+ if (transformsToApply.length > 0) {
948
+ const transformContext = {
949
+ id,
950
+ path: filePath,
951
+ options,
952
+ matchedPattern: includeResult.included ? includeResult.matchedPattern : undefined,
953
+ };
954
+
955
+ for (const transform of transformsToApply) {
956
+ try {
957
+ contents = transform(contents, transformContext);
958
+ } catch (error) {
959
+ context.logger?.warn(`Transform failed for ${id}: ${error}`);
960
+ }
961
+ }
962
+ }
963
+
964
+ // Build link context for this file
965
+ const linkContext = includeResult.included && includeResult.matchedPattern ? {
966
+ sourcePath: filePath,
967
+ targetPath: finalPath,
968
+ basePath: includeResult.matchedPattern.basePath,
969
+ pathMappings: options.includes?.[includeResult.matchedPattern.index]?.pathMappings,
970
+ matchedPattern: includeResult.matchedPattern,
971
+ } : undefined;
972
+
973
+ // Use the finalPath we already computed
974
+ return {
975
+ sourcePath: filePath,
976
+ targetPath: finalPath,
977
+ content: contents,
978
+ id,
979
+ linkContext,
980
+ };
981
+ }
982
+
983
+ // Helper function to store a processed file
984
+ async function storeProcessedFile(
985
+ file: ImportedFile,
986
+ context: any,
987
+ options: ImportOptions
988
+ ): Promise<any> {
989
+ const { store, generateDigest, entryTypes, logger, parseData, config } = context;
990
+
991
+ function configForFile(filePath: string) {
992
+ const ext = filePath.split(".").at(-1);
993
+ if (!ext) {
994
+ logger.warn(`No extension found for ${filePath}`);
995
+ return;
996
+ }
997
+ return entryTypes?.get(`.${ext}`);
998
+ }
999
+
1000
+ const entryType = configForFile(file.sourcePath || "tmp.md");
1001
+ if (!entryType) throw new Error("No entry type found");
1002
+
1003
+ const fileUrl = pathToFileURL(file.targetPath);
1004
+ const { body, data } = await entryType.getEntryInfo({
1005
+ contents: file.content,
1006
+ fileUrl: fileUrl,
1007
+ });
1008
+
1009
+ // Generate digest for storage (repository-level caching handles change detection)
1010
+ const digest = generateDigest(file.content);
1011
+ const existingEntry = store.get(file.id);
1012
+
1013
+ if (existingEntry) {
1014
+ logger.debug(`🔄 File ${file.id} - updating`);
1015
+ } else {
1016
+ logger.debug(`📄 File ${file.id} - adding`);
1017
+ }
1018
+
1019
+ // Write file to disk
1020
+ if (!existsSync(fileURLToPath(fileUrl))) {
1021
+ logger.verbose(`Writing ${file.id} to ${fileUrl}`);
1022
+ await syncFile(fileURLToPath(fileUrl), file.content);
1023
+ }
1024
+
1025
+ const parsedData = await parseData({
1026
+ id: file.id,
1027
+ data,
1028
+ filePath: fileUrl.toString(),
1029
+ });
1030
+
1031
+ // Store in content store
1032
+ if (entryType.getRenderFunction) {
1033
+ logger.verbose(`Rendering ${file.id}`);
1034
+ const render = await entryType.getRenderFunction(config);
1035
+ let rendered = undefined;
1036
+ try {
1037
+ rendered = await render?.({
1038
+ id: file.id,
1039
+ data,
1040
+ body,
1041
+ filePath: fileUrl.toString(),
1042
+ digest,
1043
+ });
1044
+ } catch (error: any) {
1045
+ logger.error(`Error rendering ${file.id}: ${error.message}`);
1046
+ }
1047
+ logger.debug(`🔍 Storing collection entry: ${file.id} (${file.sourcePath} -> ${file.targetPath})`);
1048
+ store.set({
1049
+ id: file.id,
1050
+ data: parsedData,
1051
+ body,
1052
+ filePath: file.targetPath,
1053
+ digest,
1054
+ rendered,
1055
+ });
1056
+ } else if ("contentModuleTypes" in entryType) {
1057
+ store.set({
1058
+ id: file.id,
1059
+ data: parsedData,
1060
+ body,
1061
+ filePath: file.targetPath,
1062
+ digest,
1063
+ deferredRender: true,
1064
+ });
1065
+ } else {
1066
+ store.set({
1067
+ id: file.id,
1068
+ data: parsedData,
1069
+ body,
1070
+ filePath: file.targetPath,
1071
+ digest
1072
+ });
1073
+ }
1074
+
1075
+ return { id: file.id, filePath: file.targetPath };
1076
+ }
1077
+
1078
+ async function processDirectoryRecursively(path: string): Promise<any> {
1079
+ // Fetch the content
1080
+ const { data, status } = await octokit.rest.repos.getContent({
1081
+ owner,
1082
+ repo,
1083
+ path,
1084
+ ref,
1085
+ request: { signal },
1086
+ });
1087
+ if (status !== 200) throw new Error(INVALID_SERVICE_RESPONSE);
1088
+
1089
+ // Matches for regular files
1090
+ if (!Array.isArray(data)) {
1091
+ const filePath = data.path;
1092
+ switch (data.type) {
1093
+ // Return
1094
+ case "file":
1095
+ return await syncEntry(
1096
+ context,
1097
+ { url: data.download_url, editUrl: data.url },
1098
+ filePath,
1099
+ options,
1100
+ octokit,
1101
+ { signal },
1102
+ );
1103
+ default:
1104
+ throw new Error("Invalid type");
1105
+ }
1106
+ }
1107
+
1108
+ // Directory listing with filtering - process sequentially
1109
+ const filteredEntries = data
1110
+ .filter(({ type, path }) => {
1111
+ // Always include directories for recursion
1112
+ if (type === "dir") return true;
1113
+ // Apply filtering logic to files
1114
+ if (type === "file") {
1115
+ return shouldIncludeFile(path, options).included;
1116
+ }
1117
+ return false;
1118
+ });
1119
+
1120
+ const results = [];
1121
+ for (const { type, path, download_url, url } of filteredEntries) {
1122
+ switch (type) {
1123
+ // Recurse
1124
+ case "dir":
1125
+ results.push(await processDirectoryRecursively(path));
1126
+ break;
1127
+ // Return
1128
+ case "file":
1129
+ results.push(await syncEntry(
1130
+ context,
1131
+ { url: download_url, editUrl: url },
1132
+ path,
1133
+ options,
1134
+ octokit,
1135
+ { signal },
1136
+ ));
1137
+ break;
1138
+ default:
1139
+ throw new Error("Invalid type");
1140
+ }
1141
+ }
1142
+ return results;
1143
+ } // End of processDirectoryRecursively function
1144
+ }
1145
+
1146
+
1147
+
1148
+ /**
1149
+ * Get the headers needed to make a conditional request.
1150
+ * Uses the etag and last-modified values from the meta store.
1151
+ * @internal
1152
+ */
1153
+ export function getHeaders({
1154
+ init,
1155
+ meta,
1156
+ id,
1157
+ }: {
1158
+ /** Initial headers to include */
1159
+ init?: RequestInit["headers"];
1160
+ /** Meta store to get etag and last-modified values from */
1161
+ meta: LoaderContext["meta"];
1162
+ id: string;
1163
+ }): Headers {
1164
+ const tag = `${id}-etag`;
1165
+ const lastModifiedTag = `${id}-last-modified`;
1166
+ const etag = meta.get(tag);
1167
+ const lastModified = meta.get(lastModifiedTag);
1168
+ const headers = new Headers(init);
1169
+
1170
+ if (etag) {
1171
+ headers.set("If-None-Match", etag);
1172
+ } else if (lastModified) {
1173
+ headers.set("If-Modified-Since", lastModified);
1174
+ }
1175
+ return headers;
1176
+ }
1177
+
1178
+ /**
1179
+ * Store the etag or last-modified headers from a response in the meta store.
1180
+ * @internal
1181
+ */
1182
+ export function syncHeaders({
1183
+ headers,
1184
+ meta,
1185
+ id,
1186
+ }: {
1187
+ /** Headers from the response */
1188
+ headers: Headers;
1189
+ /** Meta store to store etag and last-modified values in */
1190
+ meta: LoaderContext["meta"];
1191
+ /** id string */
1192
+ id: string;
1193
+ }) {
1194
+ const etag = headers.get("etag");
1195
+ const lastModified = headers.get("last-modified");
1196
+ const tag = `${id}-etag`;
1197
+ const lastModifiedTag = `${id}-last-modified`;
1198
+ meta.delete(tag);
1199
+ meta.delete(lastModifiedTag);
1200
+ if (etag) {
1201
+ meta.set(tag, etag);
1202
+ } else if (lastModified) {
1203
+ meta.set(lastModifiedTag, lastModified);
1204
+ }
1205
+ }