@larkiny/astro-github-loader 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +675 -0
- package/dist/github.cleanup.d.ts +5 -0
- package/dist/github.cleanup.js +216 -0
- package/dist/github.constants.d.ts +24 -0
- package/dist/github.constants.js +24 -0
- package/dist/github.content.d.ts +138 -0
- package/dist/github.content.js +1016 -0
- package/dist/github.dryrun.d.ts +72 -0
- package/dist/github.dryrun.js +247 -0
- package/dist/github.link-transform.d.ts +77 -0
- package/dist/github.link-transform.js +321 -0
- package/dist/github.loader.d.ts +14 -0
- package/dist/github.loader.js +143 -0
- package/dist/github.loader.spec.d.ts +1 -0
- package/dist/github.loader.spec.js +96 -0
- package/dist/github.logger.d.ts +132 -0
- package/dist/github.logger.js +260 -0
- package/dist/github.sync.d.ts +5 -0
- package/dist/github.sync.js +292 -0
- package/dist/github.types.d.ts +315 -0
- package/dist/github.types.js +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/package.json +66 -0
- package/src/github.cleanup.ts +243 -0
- package/src/github.constants.ts +25 -0
- package/src/github.content.ts +1205 -0
- package/src/github.dryrun.ts +339 -0
- package/src/github.link-transform.ts +452 -0
- package/src/github.loader.spec.ts +106 -0
- package/src/github.loader.ts +189 -0
- package/src/github.logger.ts +324 -0
- package/src/github.types.ts +339 -0
- package/src/index.ts +5 -0
|
@@ -0,0 +1,1205 @@
|
|
|
1
|
+
import { existsSync, promises as fs } from "node:fs";
|
|
2
|
+
import { fileURLToPath, pathToFileURL } from "node:url";
|
|
3
|
+
import path, { join, dirname, basename, extname } from "node:path";
|
|
4
|
+
import picomatch from "picomatch";
|
|
5
|
+
import { globalLinkTransform, generateAutoLinkMappings, type ImportedFile } from "./github.link-transform.js";
|
|
6
|
+
import type { Logger } from "./github.logger.js";
|
|
7
|
+
import { getLatestCommitInfo, loadImportState, createConfigId } from "./github.dryrun.js";
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
INVALID_SERVICE_RESPONSE,
|
|
11
|
+
INVALID_STRING_ERROR,
|
|
12
|
+
INVALID_URL_ERROR,
|
|
13
|
+
} from "./github.constants.js";
|
|
14
|
+
|
|
15
|
+
import type { LoaderContext, CollectionEntryOptions, ImportOptions, RenderedContent, MatchedPattern } from "./github.types.js";
|
|
16
|
+
|
|
17
|
+
export interface ImportStats {
|
|
18
|
+
processed: number;
|
|
19
|
+
updated: number;
|
|
20
|
+
unchanged: number;
|
|
21
|
+
assetsDownloaded?: number;
|
|
22
|
+
assetsCached?: number;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Generates a unique identifier from a file path by removing the extension
|
|
27
|
+
* @param filePath - The file path to generate ID from
|
|
28
|
+
* @return {string} The generated identifier as a string with extension removed
|
|
29
|
+
* @internal
|
|
30
|
+
*/
|
|
31
|
+
export function generateId(filePath: string): string {
|
|
32
|
+
let id = filePath;
|
|
33
|
+
|
|
34
|
+
// Remove file extension for ID generation
|
|
35
|
+
const lastDotIndex = id.lastIndexOf('.');
|
|
36
|
+
if (lastDotIndex > 0) {
|
|
37
|
+
id = id.substring(0, lastDotIndex);
|
|
38
|
+
}
|
|
39
|
+
return id;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Applies path mapping logic to get the final filename for a file
|
|
45
|
+
*
|
|
46
|
+
* Supports two types of path mappings:
|
|
47
|
+
* - **File mapping**: Exact file path match (e.g., 'docs/README.md' -> 'docs/overview.md')
|
|
48
|
+
* - **Folder mapping**: Folder path with trailing slash (e.g., 'docs/capabilities/' -> 'docs/')
|
|
49
|
+
*
|
|
50
|
+
* @param filePath - Original source file path
|
|
51
|
+
* @param matchedPattern - The pattern that matched this file
|
|
52
|
+
* @param options - Import options containing path mappings
|
|
53
|
+
* @returns Final filename after applying path mapping logic
|
|
54
|
+
* @internal
|
|
55
|
+
*/
|
|
56
|
+
export function applyRename(filePath: string, matchedPattern?: MatchedPattern | null, options?: ImportOptions): string {
|
|
57
|
+
if (options?.includes && matchedPattern && matchedPattern.index < options.includes.length) {
|
|
58
|
+
const includePattern = options.includes[matchedPattern.index];
|
|
59
|
+
|
|
60
|
+
if (includePattern.pathMappings) {
|
|
61
|
+
// First check for exact file match (current behavior - backwards compatible)
|
|
62
|
+
if (includePattern.pathMappings[filePath]) {
|
|
63
|
+
const mappingValue = includePattern.pathMappings[filePath];
|
|
64
|
+
return typeof mappingValue === 'string' ? mappingValue : mappingValue.target;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Then check for folder-to-folder mappings
|
|
68
|
+
for (const [sourceFolder, mappingValue] of Object.entries(includePattern.pathMappings)) {
|
|
69
|
+
// Check if this is a folder mapping (ends with /) and file is within it
|
|
70
|
+
if (sourceFolder.endsWith('/') && filePath.startsWith(sourceFolder)) {
|
|
71
|
+
// Replace the source folder path with target folder path
|
|
72
|
+
const targetFolder = typeof mappingValue === 'string' ? mappingValue : mappingValue.target;
|
|
73
|
+
const relativePath = filePath.slice(sourceFolder.length);
|
|
74
|
+
return path.posix.join(targetFolder, relativePath);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Return original filename if no path mapping found
|
|
81
|
+
return basename(filePath);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Generates a local file path based on the matched pattern and file path
|
|
86
|
+
* @param filePath - The original file path from the repository
|
|
87
|
+
* @param matchedPattern - The pattern that matched this file (or null if no includes specified)
|
|
88
|
+
* @param options - Import options containing includes patterns for path mapping lookups
|
|
89
|
+
* @return {string} The local file path where this content should be stored
|
|
90
|
+
* @internal
|
|
91
|
+
*/
|
|
92
|
+
export function generatePath(filePath: string, matchedPattern?: MatchedPattern | null, options?: ImportOptions): string {
|
|
93
|
+
if (matchedPattern) {
|
|
94
|
+
// Extract the directory part from the pattern (before any glob wildcards)
|
|
95
|
+
const pattern = matchedPattern.pattern;
|
|
96
|
+
const beforeGlob = pattern.split(/[*?{]/)[0];
|
|
97
|
+
|
|
98
|
+
// Remove the pattern prefix from the file path to get the relative path
|
|
99
|
+
let relativePath = filePath;
|
|
100
|
+
if (beforeGlob && filePath.startsWith(beforeGlob)) {
|
|
101
|
+
relativePath = filePath.substring(beforeGlob.length);
|
|
102
|
+
// Remove leading slash if present
|
|
103
|
+
if (relativePath.startsWith('/')) {
|
|
104
|
+
relativePath = relativePath.substring(1);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// If no relative path remains, use just the filename
|
|
109
|
+
if (!relativePath) {
|
|
110
|
+
relativePath = basename(filePath);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Apply path mapping logic
|
|
114
|
+
const finalFilename = applyRename(filePath, matchedPattern, options);
|
|
115
|
+
// Always apply path mapping if applyRename returned something different from the original basename
|
|
116
|
+
// OR if there are pathMappings configured (since empty string mappings might return same basename)
|
|
117
|
+
const hasPathMappings = options?.includes?.[matchedPattern.index]?.pathMappings &&
|
|
118
|
+
Object.keys(options.includes[matchedPattern.index].pathMappings!).length > 0;
|
|
119
|
+
if (finalFilename !== basename(filePath) || hasPathMappings) {
|
|
120
|
+
// Check if applyRename returned a full path (contains path separators) or just a filename
|
|
121
|
+
if (finalFilename.includes('/') || finalFilename.includes('\\')) {
|
|
122
|
+
// applyRename returned a full relative path - need to extract relative part
|
|
123
|
+
// Remove the pattern prefix to get the relative path within the pattern context
|
|
124
|
+
const beforeGlob = pattern.split(/[*?{]/)[0];
|
|
125
|
+
if (beforeGlob && finalFilename.startsWith(beforeGlob)) {
|
|
126
|
+
relativePath = finalFilename.substring(beforeGlob.length);
|
|
127
|
+
// Remove leading slash if present
|
|
128
|
+
if (relativePath.startsWith('/')) {
|
|
129
|
+
relativePath = relativePath.substring(1);
|
|
130
|
+
}
|
|
131
|
+
} else {
|
|
132
|
+
relativePath = finalFilename;
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
// applyRename returned just a filename
|
|
136
|
+
// If the filename is different due to pathMapping, use it directly
|
|
137
|
+
// This handles cases where pathMappings flatten directory structures
|
|
138
|
+
relativePath = finalFilename;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return join(matchedPattern.basePath, relativePath);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Should not happen since we always use includes
|
|
146
|
+
throw new Error("No matched pattern provided - includes are required");
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Synchronizes a file by ensuring the target directory exists and then writing the specified content to the file at the given path.
|
|
151
|
+
*
|
|
152
|
+
* @param {string} path - The path of the file to synchronize, including its directory and filename.
|
|
153
|
+
* @param {string} content - The content to write into the file.
|
|
154
|
+
* @return {Promise<void>} - A promise that resolves when the file has been successfully written.
|
|
155
|
+
* @internal
|
|
156
|
+
*/
|
|
157
|
+
export async function syncFile(path: string, content: string) {
|
|
158
|
+
const dir = path.substring(0, path.lastIndexOf("/"));
|
|
159
|
+
|
|
160
|
+
// Ensure the directory exists
|
|
161
|
+
if (dir && !existsSync(dir)) {
|
|
162
|
+
await fs.mkdir(dir, { recursive: true });
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Write the file to the filesystem and store
|
|
166
|
+
await fs.writeFile(path, content, "utf-8");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Default asset patterns for common image and media file types
|
|
171
|
+
* @internal
|
|
172
|
+
*/
|
|
173
|
+
const DEFAULT_ASSET_PATTERNS = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.bmp'];
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Checks if a file path should be included and returns the matching pattern
|
|
177
|
+
* @param filePath - The file path to check (relative to the repository root)
|
|
178
|
+
* @param options - Import options containing includes patterns
|
|
179
|
+
* @returns Object with include status and matched pattern, or null if not included
|
|
180
|
+
* @internal
|
|
181
|
+
*/
|
|
182
|
+
export function shouldIncludeFile(filePath: string, options: ImportOptions): { included: true; matchedPattern: MatchedPattern | null } | { included: false; matchedPattern: null } {
|
|
183
|
+
const { includes } = options;
|
|
184
|
+
|
|
185
|
+
// If no include patterns specified, include all files
|
|
186
|
+
if (!includes || includes.length === 0) {
|
|
187
|
+
return { included: true, matchedPattern: null };
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Check each include pattern to find a match
|
|
191
|
+
for (let i = 0; i < includes.length; i++) {
|
|
192
|
+
const includePattern = includes[i];
|
|
193
|
+
const matcher = picomatch(includePattern.pattern);
|
|
194
|
+
|
|
195
|
+
if (matcher(filePath)) {
|
|
196
|
+
return {
|
|
197
|
+
included: true,
|
|
198
|
+
matchedPattern: {
|
|
199
|
+
pattern: includePattern.pattern,
|
|
200
|
+
basePath: includePattern.basePath,
|
|
201
|
+
index: i
|
|
202
|
+
}
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// No patterns matched
|
|
208
|
+
return { included: false, matchedPattern: null };
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Detects asset references in markdown content using regex patterns
|
|
213
|
+
* @param content - The markdown content to parse
|
|
214
|
+
* @param assetPatterns - File extensions to treat as assets
|
|
215
|
+
* @returns Array of detected asset paths
|
|
216
|
+
* @internal
|
|
217
|
+
*/
|
|
218
|
+
export function detectAssets(content: string, assetPatterns: string[] = DEFAULT_ASSET_PATTERNS): string[] {
|
|
219
|
+
const assets: string[] = [];
|
|
220
|
+
const patterns = assetPatterns.map(ext => ext.toLowerCase());
|
|
221
|
+
|
|
222
|
+
// Match markdown images: 
|
|
223
|
+
const imageRegex = /!\[[^\]]*\]\(([^)]+)\)/g;
|
|
224
|
+
let match;
|
|
225
|
+
|
|
226
|
+
while ((match = imageRegex.exec(content)) !== null) {
|
|
227
|
+
const assetPath = match[1];
|
|
228
|
+
// Only include relative paths and assets matching our patterns
|
|
229
|
+
if (assetPath.startsWith('./') || assetPath.startsWith('../') || !assetPath.includes('://')) {
|
|
230
|
+
const ext = extname(assetPath).toLowerCase();
|
|
231
|
+
if (patterns.includes(ext)) {
|
|
232
|
+
assets.push(assetPath);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Match HTML img tags: <img src="path">
|
|
238
|
+
const htmlImgRegex = /<img[^>]+src\s*=\s*["']([^"']+)["'][^>]*>/gi;
|
|
239
|
+
while ((match = htmlImgRegex.exec(content)) !== null) {
|
|
240
|
+
const assetPath = match[1];
|
|
241
|
+
if (assetPath.startsWith('./') || assetPath.startsWith('../') || !assetPath.includes('://')) {
|
|
242
|
+
const ext = extname(assetPath).toLowerCase();
|
|
243
|
+
if (patterns.includes(ext)) {
|
|
244
|
+
assets.push(assetPath);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return [...new Set(assets)]; // Remove duplicates
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Downloads an asset from GitHub and saves it locally
|
|
254
|
+
* @param octokit - GitHub API client
|
|
255
|
+
* @param owner - Repository owner
|
|
256
|
+
* @param repo - Repository name
|
|
257
|
+
* @param ref - Git reference
|
|
258
|
+
* @param assetPath - Path to the asset in the repository
|
|
259
|
+
* @param localPath - Local path where the asset should be saved
|
|
260
|
+
* @param signal - Abort signal for cancellation
|
|
261
|
+
* @returns Promise that resolves when the asset is downloaded
|
|
262
|
+
* @internal
|
|
263
|
+
*/
|
|
264
|
+
export async function downloadAsset(
|
|
265
|
+
octokit: any,
|
|
266
|
+
owner: string,
|
|
267
|
+
repo: string,
|
|
268
|
+
ref: string,
|
|
269
|
+
assetPath: string,
|
|
270
|
+
localPath: string,
|
|
271
|
+
signal?: AbortSignal
|
|
272
|
+
): Promise<void> {
|
|
273
|
+
try {
|
|
274
|
+
const { data } = await octokit.rest.repos.getContent({
|
|
275
|
+
owner,
|
|
276
|
+
repo,
|
|
277
|
+
path: assetPath,
|
|
278
|
+
ref,
|
|
279
|
+
request: { signal },
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
if (Array.isArray(data) || data.type !== 'file' || !data.download_url) {
|
|
283
|
+
throw new Error(`Asset ${assetPath} is not a valid file (type: ${data.type}, downloadUrl: ${data.download_url})`);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const response = await fetch(data.download_url, { signal });
|
|
287
|
+
if (!response.ok) {
|
|
288
|
+
throw new Error(`Failed to download asset: ${response.status} ${response.statusText}`);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const buffer = await response.arrayBuffer();
|
|
292
|
+
const dir = dirname(localPath);
|
|
293
|
+
|
|
294
|
+
if (!existsSync(dir)) {
|
|
295
|
+
await fs.mkdir(dir, { recursive: true });
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
await fs.writeFile(localPath, new Uint8Array(buffer));
|
|
299
|
+
} catch (error: any) {
|
|
300
|
+
if (error.status === 404) {
|
|
301
|
+
throw new Error(`Asset not found: ${assetPath}`);
|
|
302
|
+
}
|
|
303
|
+
throw error;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Transforms asset references in markdown content to use local paths
|
|
309
|
+
* @param content - The markdown content to transform
|
|
310
|
+
* @param assetMap - Map of original asset paths to new local paths
|
|
311
|
+
* @returns Transformed content with updated asset references
|
|
312
|
+
* @internal
|
|
313
|
+
*/
|
|
314
|
+
export function transformAssetReferences(content: string, assetMap: Map<string, string>): string {
|
|
315
|
+
let transformedContent = content;
|
|
316
|
+
|
|
317
|
+
for (const [originalPath, newPath] of assetMap) {
|
|
318
|
+
// Transform markdown images
|
|
319
|
+
const imageRegex = new RegExp(`(!)\\[([^\\]]*)\\]\\(\\s*${escapeRegExp(originalPath)}\\s*\\)`, 'g');
|
|
320
|
+
transformedContent = transformedContent.replace(imageRegex, `$1[$2](${newPath})`);
|
|
321
|
+
|
|
322
|
+
// Transform HTML img tags
|
|
323
|
+
const htmlRegex = new RegExp(`(<img[^>]+src\\s*=\\s*["'])${escapeRegExp(originalPath)}(["'][^>]*>)`, 'gi');
|
|
324
|
+
transformedContent = transformedContent.replace(htmlRegex, `$1${newPath}$2`);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return transformedContent;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Escapes special regex characters in a string
|
|
332
|
+
* @internal
|
|
333
|
+
*/
|
|
334
|
+
function escapeRegExp(string: string): string {
|
|
335
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Processes assets in markdown content by detecting, downloading, and transforming references
|
|
340
|
+
* @param content - The markdown content to process
|
|
341
|
+
* @param options - Configuration options including asset settings
|
|
342
|
+
* @param octokit - GitHub API client
|
|
343
|
+
* @param signal - Abort signal for cancellation
|
|
344
|
+
* @returns Promise that resolves to transformed content
|
|
345
|
+
* @internal
|
|
346
|
+
*/
|
|
347
|
+
async function processAssets(
|
|
348
|
+
content: string,
|
|
349
|
+
filePath: string,
|
|
350
|
+
options: ImportOptions,
|
|
351
|
+
octokit: any,
|
|
352
|
+
logger: Logger,
|
|
353
|
+
signal?: AbortSignal
|
|
354
|
+
): Promise<{ content: string; assetsDownloaded: number; assetsCached: number }> {
|
|
355
|
+
const { owner, repo, ref = 'main', assetsPath, assetsBaseUrl, assetPatterns } = options;
|
|
356
|
+
|
|
357
|
+
logger.verbose(`🖼️ Processing assets for ${filePath}`);
|
|
358
|
+
logger.debug(` assetsPath: ${assetsPath}`);
|
|
359
|
+
logger.debug(` assetsBaseUrl: ${assetsBaseUrl}`);
|
|
360
|
+
|
|
361
|
+
if (!assetsPath || !assetsBaseUrl) {
|
|
362
|
+
logger.verbose(` ⏭️ Skipping asset processing - missing assetsPath or assetsBaseUrl`);
|
|
363
|
+
return { content, assetsDownloaded: 0, assetsCached: 0 };
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Detect assets in the content
|
|
367
|
+
const detectedAssets = detectAssets(content, assetPatterns);
|
|
368
|
+
logger.verbose(` 📸 Detected ${detectedAssets.length} assets`);
|
|
369
|
+
if (detectedAssets.length > 0) {
|
|
370
|
+
logger.debug(` Assets: ${detectedAssets.join(', ')}`);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
if (detectedAssets.length === 0) {
|
|
374
|
+
return { content, assetsDownloaded: 0, assetsCached: 0 };
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const assetMap = new Map<string, string>();
|
|
378
|
+
let assetsDownloaded = 0;
|
|
379
|
+
let assetsCached = 0;
|
|
380
|
+
|
|
381
|
+
// Process each detected asset
|
|
382
|
+
await Promise.all(detectedAssets.map(async (assetPath) => {
|
|
383
|
+
logger.logAssetProcessing("Processing", assetPath);
|
|
384
|
+
try {
|
|
385
|
+
// Resolve the asset path relative to the current markdown file
|
|
386
|
+
const resolvedAssetPath = resolveAssetPath(filePath, assetPath);
|
|
387
|
+
logger.debug(` 🔗 Resolved path: ${resolvedAssetPath}`);
|
|
388
|
+
|
|
389
|
+
// Generate unique filename to avoid conflicts
|
|
390
|
+
const originalFilename = basename(assetPath);
|
|
391
|
+
const ext = extname(originalFilename);
|
|
392
|
+
const nameWithoutExt = basename(originalFilename, ext);
|
|
393
|
+
const uniqueFilename = `${nameWithoutExt}-${Date.now()}${ext}`;
|
|
394
|
+
const localPath = join(assetsPath, uniqueFilename);
|
|
395
|
+
logger.debug(` 💾 Local path: ${localPath}`);
|
|
396
|
+
|
|
397
|
+
// Check if asset already exists (simple cache check)
|
|
398
|
+
if (existsSync(localPath)) {
|
|
399
|
+
logger.logAssetProcessing("Cached", assetPath);
|
|
400
|
+
assetsCached++;
|
|
401
|
+
} else {
|
|
402
|
+
// Download the asset
|
|
403
|
+
logger.logAssetProcessing("Downloading", assetPath, `from ${owner}/${repo}@${ref}:${resolvedAssetPath}`);
|
|
404
|
+
await downloadAsset(octokit, owner, repo, ref, resolvedAssetPath, localPath, signal);
|
|
405
|
+
logger.logAssetProcessing("Downloaded", assetPath);
|
|
406
|
+
assetsDownloaded++;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Generate URL for the transformed reference
|
|
410
|
+
const assetUrl = `${assetsBaseUrl}/${uniqueFilename}`.replace(/\/+/g, '/');
|
|
411
|
+
logger.debug(` 🔄 Transform: ${assetPath} -> ${assetUrl}`);
|
|
412
|
+
|
|
413
|
+
// Map the transformation
|
|
414
|
+
assetMap.set(assetPath, assetUrl);
|
|
415
|
+
} catch (error) {
|
|
416
|
+
logger.warn(` ❌ Failed to process asset ${assetPath}: ${error}`);
|
|
417
|
+
}
|
|
418
|
+
}));
|
|
419
|
+
|
|
420
|
+
logger.verbose(` 🗺️ Processed ${assetMap.size} assets: ${assetsDownloaded} downloaded, ${assetsCached} cached`);
|
|
421
|
+
|
|
422
|
+
// Transform the content with new asset references
|
|
423
|
+
const transformedContent = transformAssetReferences(content, assetMap);
|
|
424
|
+
return { content: transformedContent, assetsDownloaded, assetsCached };
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/**
|
|
428
|
+
* Resolves an asset path relative to a base path
|
|
429
|
+
* @internal
|
|
430
|
+
*/
|
|
431
|
+
function resolveAssetPath(basePath: string, assetPath: string): string {
|
|
432
|
+
if (assetPath.startsWith('./')) {
|
|
433
|
+
return join(dirname(basePath), assetPath.slice(2));
|
|
434
|
+
} else if (assetPath.startsWith('../')) {
|
|
435
|
+
return join(dirname(basePath), assetPath);
|
|
436
|
+
}
|
|
437
|
+
return assetPath;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Synchronizes an entry by fetching its contents, validating its metadata, and storing or rendering it as needed.
|
|
442
|
+
*
|
|
443
|
+
* @param {LoaderContext} context - The loader context containing the required utilities, metadata, and configuration.
|
|
444
|
+
* @param {Object} urls - Object containing URL data.
|
|
445
|
+
* @param {string | URL | null} urls.url - The URL of the entry to fetch. Throws an error if null or invalid.
|
|
446
|
+
* @param {string} urls.editUrl - The URL for editing the entry.
|
|
447
|
+
* @param {RootOptions} options - Configuration settings for processing the entry such as file paths and custom options.
|
|
448
|
+
* @param {any} octokit - GitHub API client for downloading assets.
|
|
449
|
+
* @param {RequestInit} [init] - Optional parameter for customizing the fetch request.
|
|
450
|
+
* @return {Promise<void>} Resolves when the entry has been successfully processed and stored. Throws errors if invalid URL, missing configuration, or other issues occur.
|
|
451
|
+
* @internal
|
|
452
|
+
*/
|
|
453
|
+
export async function syncEntry(
|
|
454
|
+
context: LoaderContext,
|
|
455
|
+
{ url, editUrl }: { url: string | URL | null; editUrl: string },
|
|
456
|
+
filePath: string,
|
|
457
|
+
options: ImportOptions,
|
|
458
|
+
octokit: any,
|
|
459
|
+
init: RequestInit = {},
|
|
460
|
+
) {
|
|
461
|
+
// Exit on null or if the URL is invalid
|
|
462
|
+
if (url === null || (typeof url !== "string" && !(url instanceof URL))) {
|
|
463
|
+
throw new TypeError(INVALID_URL_ERROR);
|
|
464
|
+
}
|
|
465
|
+
// Validate URL
|
|
466
|
+
if (typeof url === "string") url = new URL(url);
|
|
467
|
+
|
|
468
|
+
const { meta, store, generateDigest, entryTypes, logger, parseData, config } =
|
|
469
|
+
context;
|
|
470
|
+
|
|
471
|
+
function configForFile(file: string) {
|
|
472
|
+
const ext = file.split(".").at(-1);
|
|
473
|
+
if (!ext) {
|
|
474
|
+
logger.warn(`No extension found for ${file}`);
|
|
475
|
+
return;
|
|
476
|
+
}
|
|
477
|
+
return entryTypes?.get(`.${ext}`);
|
|
478
|
+
}
|
|
479
|
+
// Custom ID, TODO: Allow custom id generators
|
|
480
|
+
let id = generateId(filePath);
|
|
481
|
+
|
|
482
|
+
init.headers = getHeaders({
|
|
483
|
+
init: init.headers,
|
|
484
|
+
meta,
|
|
485
|
+
id,
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
let res = await fetch(url, init);
|
|
489
|
+
|
|
490
|
+
if (res.status === 304) {
|
|
491
|
+
// Only skip if the local file actually exists
|
|
492
|
+
const includeResult = shouldIncludeFile(filePath, options);
|
|
493
|
+
const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
|
|
494
|
+
const fileUrl = pathToFileURL(relativePath);
|
|
495
|
+
|
|
496
|
+
if (existsSync(fileURLToPath(fileUrl))) {
|
|
497
|
+
logger.info(`Skipping ${id} as it has not changed`);
|
|
498
|
+
return;
|
|
499
|
+
} else {
|
|
500
|
+
logger.info(`File ${id} missing locally, re-fetching despite 304`);
|
|
501
|
+
// File is missing locally, fetch without ETag headers
|
|
502
|
+
const freshInit = { ...init };
|
|
503
|
+
freshInit.headers = new Headers(init.headers);
|
|
504
|
+
freshInit.headers.delete('If-None-Match');
|
|
505
|
+
freshInit.headers.delete('If-Modified-Since');
|
|
506
|
+
|
|
507
|
+
res = await fetch(url, freshInit);
|
|
508
|
+
if (!res.ok) throw new Error(res.statusText);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
if (!res.ok) throw new Error(res.statusText);
|
|
512
|
+
let contents = await res.text();
|
|
513
|
+
const entryType = configForFile(filePath || "tmp.md");
|
|
514
|
+
if (!entryType) throw new Error("No entry type found");
|
|
515
|
+
|
|
516
|
+
// Process assets FIRST if configuration is provided - before content transforms
|
|
517
|
+
// This ensures asset detection works with original markdown links before they get transformed
|
|
518
|
+
if (options.assetsPath && options.assetsBaseUrl) {
|
|
519
|
+
try {
|
|
520
|
+
// Create a dummy logger for syncEntry since it uses Astro's logger
|
|
521
|
+
const dummyLogger = {
|
|
522
|
+
verbose: (msg: string) => logger.info(msg),
|
|
523
|
+
debug: (msg: string) => logger.debug(msg),
|
|
524
|
+
warn: (msg: string) => logger.warn(msg),
|
|
525
|
+
logAssetProcessing: (action: string, path: string, details?: string) => {
|
|
526
|
+
const msg = details ? `Asset ${action}: ${path} - ${details}` : `Asset ${action}: ${path}`;
|
|
527
|
+
logger.info(msg);
|
|
528
|
+
}
|
|
529
|
+
};
|
|
530
|
+
const assetResult = await processAssets(contents, filePath, options, octokit, dummyLogger as Logger, init.signal || undefined);
|
|
531
|
+
contents = assetResult.content;
|
|
532
|
+
} catch (error: any) {
|
|
533
|
+
logger.warn(`Asset processing failed for ${id}: ${error.message}`);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Apply content transforms if provided - both global and pattern-specific
|
|
538
|
+
// This runs after asset processing so transforms work with processed content
|
|
539
|
+
const includeResultForTransforms = shouldIncludeFile(filePath, options);
|
|
540
|
+
const transformsToApply: any[] = [];
|
|
541
|
+
|
|
542
|
+
// Add global transforms first
|
|
543
|
+
if (options.transforms && options.transforms.length > 0) {
|
|
544
|
+
transformsToApply.push(...options.transforms);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Add pattern-specific transforms
|
|
548
|
+
if (includeResultForTransforms.included && includeResultForTransforms.matchedPattern && options.includes) {
|
|
549
|
+
const matchedInclude = options.includes[includeResultForTransforms.matchedPattern.index];
|
|
550
|
+
if (matchedInclude.transforms && matchedInclude.transforms.length > 0) {
|
|
551
|
+
transformsToApply.push(...matchedInclude.transforms);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
if (transformsToApply.length > 0) {
|
|
556
|
+
const transformContext = {
|
|
557
|
+
id,
|
|
558
|
+
path: filePath,
|
|
559
|
+
options,
|
|
560
|
+
matchedPattern: includeResultForTransforms.included ? includeResultForTransforms.matchedPattern : undefined,
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
for (const transform of transformsToApply) {
|
|
564
|
+
try {
|
|
565
|
+
contents = transform(contents, transformContext);
|
|
566
|
+
} catch (error) {
|
|
567
|
+
logger.warn(`Transform failed for ${id}: ${error}`);
|
|
568
|
+
}
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
const includeResult = shouldIncludeFile(filePath, options);
|
|
573
|
+
const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
|
|
574
|
+
const fileUrl = pathToFileURL(relativePath);
|
|
575
|
+
const { body, data } = await entryType.getEntryInfo({
|
|
576
|
+
contents,
|
|
577
|
+
fileUrl: fileUrl,
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
const existingEntry = store.get(id);
|
|
581
|
+
|
|
582
|
+
const digest = generateDigest(contents);
|
|
583
|
+
|
|
584
|
+
if (
|
|
585
|
+
existingEntry &&
|
|
586
|
+
existingEntry.digest === digest &&
|
|
587
|
+
existingEntry.filePath
|
|
588
|
+
) {
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
// Write file to path
|
|
592
|
+
if (!existsSync(fileURLToPath(fileUrl))) {
|
|
593
|
+
(logger as any).verbose(`Writing ${id} to ${fileUrl}`);
|
|
594
|
+
await syncFile(fileURLToPath(fileUrl), contents);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
const parsedData = await parseData({
|
|
598
|
+
id,
|
|
599
|
+
data,
|
|
600
|
+
filePath: fileUrl.toString(),
|
|
601
|
+
});
|
|
602
|
+
|
|
603
|
+
if (entryType.getRenderFunction) {
|
|
604
|
+
(logger as any).verbose(`Rendering ${id}`);
|
|
605
|
+
const render = await entryType.getRenderFunction(config);
|
|
606
|
+
let rendered: RenderedContent | undefined = undefined;
|
|
607
|
+
try {
|
|
608
|
+
rendered = await render?.({
|
|
609
|
+
id,
|
|
610
|
+
data,
|
|
611
|
+
body,
|
|
612
|
+
filePath: fileUrl.toString(),
|
|
613
|
+
digest,
|
|
614
|
+
});
|
|
615
|
+
} catch (error: any) {
|
|
616
|
+
logger.error(`Error rendering ${id}: ${error.message}`);
|
|
617
|
+
}
|
|
618
|
+
store.set({
|
|
619
|
+
id,
|
|
620
|
+
data: parsedData,
|
|
621
|
+
body,
|
|
622
|
+
filePath: relativePath,
|
|
623
|
+
digest,
|
|
624
|
+
rendered,
|
|
625
|
+
});
|
|
626
|
+
} else if ("contentModuleTypes" in entryType) {
|
|
627
|
+
store.set({
|
|
628
|
+
id,
|
|
629
|
+
data: parsedData,
|
|
630
|
+
body,
|
|
631
|
+
filePath: relativePath,
|
|
632
|
+
digest,
|
|
633
|
+
deferredRender: true,
|
|
634
|
+
});
|
|
635
|
+
} else {
|
|
636
|
+
store.set({ id, data: parsedData, body, filePath: relativePath, digest });
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
syncHeaders({
|
|
640
|
+
headers: res.headers,
|
|
641
|
+
meta,
|
|
642
|
+
id,
|
|
643
|
+
});
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
/**
|
|
647
|
+
* Converts a given GitHub repository path into a collection entry by fetching the content
|
|
648
|
+
* from the GitHub repository using the provided Octokit instance and options.
|
|
649
|
+
* Handles both files and directories, recursively processing directories if needed.
|
|
650
|
+
* @internal
|
|
651
|
+
*/
|
|
652
|
+
export async function toCollectionEntry({
|
|
653
|
+
context,
|
|
654
|
+
octokit,
|
|
655
|
+
options,
|
|
656
|
+
signal,
|
|
657
|
+
force = false,
|
|
658
|
+
}: CollectionEntryOptions): Promise<ImportStats> {
|
|
659
|
+
const { owner, repo, ref = "main" } = options || {};
|
|
660
|
+
if (typeof repo !== "string" || typeof owner !== "string")
|
|
661
|
+
throw new TypeError(INVALID_STRING_ERROR);
|
|
662
|
+
|
|
663
|
+
// Get logger from context - it should be our Logger instance (initialize early)
|
|
664
|
+
const logger = context.logger as unknown as Logger;
|
|
665
|
+
|
|
666
|
+
// Repository-level caching - simple all-or-nothing approach
|
|
667
|
+
const configName = options.name || `${owner}/${repo}`;
|
|
668
|
+
const configId = createConfigId(options);
|
|
669
|
+
|
|
670
|
+
if (!force) {
|
|
671
|
+
try {
|
|
672
|
+
const state = await loadImportState(process.cwd());
|
|
673
|
+
const currentState = state.imports[configId];
|
|
674
|
+
|
|
675
|
+
if (currentState && currentState.lastCommitSha) {
|
|
676
|
+
logger.debug(`🔍 Checking repository changes for ${configName}...`);
|
|
677
|
+
const latestCommit = await getLatestCommitInfo(octokit, options, signal);
|
|
678
|
+
|
|
679
|
+
if (latestCommit && currentState.lastCommitSha === latestCommit.sha) {
|
|
680
|
+
logger.info(`✅ Repository ${configName} unchanged (${latestCommit.sha.slice(0, 7)}) - skipping import`);
|
|
681
|
+
return {
|
|
682
|
+
processed: 0,
|
|
683
|
+
updated: 0,
|
|
684
|
+
unchanged: 0,
|
|
685
|
+
assetsDownloaded: 0,
|
|
686
|
+
assetsCached: 0,
|
|
687
|
+
};
|
|
688
|
+
} else if (latestCommit) {
|
|
689
|
+
logger.info(`🔄 Repository ${configName} changed (${currentState.lastCommitSha?.slice(0, 7) || 'unknown'} -> ${latestCommit.sha.slice(0, 7)}) - proceeding with import`);
|
|
690
|
+
}
|
|
691
|
+
} else {
|
|
692
|
+
logger.debug(`📥 First time importing ${configName} - no previous state found`);
|
|
693
|
+
}
|
|
694
|
+
} catch (error) {
|
|
695
|
+
logger.warn(`Failed to check repository state for ${configName}: ${error instanceof Error ? error.message : String(error)}`);
|
|
696
|
+
// Continue with import if state check fails
|
|
697
|
+
}
|
|
698
|
+
} else {
|
|
699
|
+
logger.info(`🔄 Force mode enabled for ${configName} - proceeding with full import`);
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
// Get all unique directory prefixes from include patterns to limit scanning
|
|
703
|
+
const directoriesToScan = new Set<string>();
|
|
704
|
+
if (options.includes && options.includes.length > 0) {
|
|
705
|
+
for (const includePattern of options.includes) {
|
|
706
|
+
// Extract directory part from pattern (before any glob wildcards)
|
|
707
|
+
const pattern = includePattern.pattern;
|
|
708
|
+
const beforeGlob = pattern.split(/[*?{]/)[0];
|
|
709
|
+
const dirPart = beforeGlob.includes('/') ? beforeGlob.substring(0, beforeGlob.lastIndexOf('/')) : '';
|
|
710
|
+
directoriesToScan.add(dirPart);
|
|
711
|
+
}
|
|
712
|
+
} else {
|
|
713
|
+
// If no includes specified, scan from root
|
|
714
|
+
directoriesToScan.add('');
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Collect all files first (with content transforms applied)
|
|
718
|
+
const allFiles: ImportedFile[] = [];
|
|
719
|
+
|
|
720
|
+
for (const dirPath of directoriesToScan) {
|
|
721
|
+
const files = await collectFilesRecursively(dirPath);
|
|
722
|
+
allFiles.push(...files);
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
// Track statistics
|
|
726
|
+
const stats: ImportStats = {
|
|
727
|
+
processed: 0,
|
|
728
|
+
updated: 0,
|
|
729
|
+
unchanged: 0,
|
|
730
|
+
assetsDownloaded: 0,
|
|
731
|
+
assetsCached: 0,
|
|
732
|
+
};
|
|
733
|
+
|
|
734
|
+
// Apply link transformation if configured
|
|
735
|
+
let processedFiles = allFiles;
|
|
736
|
+
if (options.linkTransform) {
|
|
737
|
+
logger.verbose(`Applying link transformation to ${allFiles.length} files`);
|
|
738
|
+
|
|
739
|
+
// Generate automatic link mappings from pathMappings
|
|
740
|
+
const autoGeneratedMappings = options.includes
|
|
741
|
+
? generateAutoLinkMappings(options.includes, options.linkTransform.stripPrefixes)
|
|
742
|
+
: [];
|
|
743
|
+
|
|
744
|
+
// Combine auto-generated mappings with user-defined mappings
|
|
745
|
+
const allLinkMappings = [
|
|
746
|
+
...autoGeneratedMappings,
|
|
747
|
+
...(options.linkTransform.linkMappings || [])
|
|
748
|
+
];
|
|
749
|
+
|
|
750
|
+
logger.debug(`Generated ${autoGeneratedMappings.length} automatic link mappings from pathMappings`);
|
|
751
|
+
|
|
752
|
+
processedFiles = globalLinkTransform(allFiles, {
|
|
753
|
+
stripPrefixes: options.linkTransform.stripPrefixes,
|
|
754
|
+
customHandlers: options.linkTransform.customHandlers,
|
|
755
|
+
linkMappings: allLinkMappings,
|
|
756
|
+
logger,
|
|
757
|
+
});
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Now store all processed files
|
|
761
|
+
stats.processed = processedFiles.length;
|
|
762
|
+
for (const file of processedFiles) {
|
|
763
|
+
logger.logFileProcessing("Storing", file.sourcePath);
|
|
764
|
+
const result = await storeProcessedFile(file, context, options);
|
|
765
|
+
if (result) {
|
|
766
|
+
stats.updated++;
|
|
767
|
+
} else {
|
|
768
|
+
stats.unchanged++;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
return stats;
|
|
773
|
+
|
|
774
|
+
// Helper function to collect files without storing them
|
|
775
|
+
async function collectFilesRecursively(path: string): Promise<ImportedFile[]> {
|
|
776
|
+
const collectedFiles: ImportedFile[] = [];
|
|
777
|
+
|
|
778
|
+
// Fetch the content
|
|
779
|
+
const { data, status } = await octokit.rest.repos.getContent({
|
|
780
|
+
owner,
|
|
781
|
+
repo,
|
|
782
|
+
path,
|
|
783
|
+
ref,
|
|
784
|
+
request: { signal },
|
|
785
|
+
});
|
|
786
|
+
if (status !== 200) throw new Error(INVALID_SERVICE_RESPONSE);
|
|
787
|
+
|
|
788
|
+
// Handle single file
|
|
789
|
+
if (!Array.isArray(data)) {
|
|
790
|
+
const filePath = data.path;
|
|
791
|
+
if (data.type === "file") {
|
|
792
|
+
const fileData = await collectFileData(
|
|
793
|
+
{ url: data.download_url, editUrl: data.url },
|
|
794
|
+
filePath
|
|
795
|
+
);
|
|
796
|
+
if (fileData) {
|
|
797
|
+
collectedFiles.push(fileData);
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
return collectedFiles;
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
// Directory listing - process files and recurse into subdirectories
|
|
804
|
+
const filteredEntries = data
|
|
805
|
+
.filter(({ type, path }) => {
|
|
806
|
+
// Always include directories for recursion
|
|
807
|
+
if (type === "dir") return true;
|
|
808
|
+
// Apply filtering logic to files
|
|
809
|
+
if (type === "file") {
|
|
810
|
+
return shouldIncludeFile(path, options).included;
|
|
811
|
+
}
|
|
812
|
+
return false;
|
|
813
|
+
});
|
|
814
|
+
|
|
815
|
+
for (const { type, path, download_url, url } of filteredEntries) {
|
|
816
|
+
if (type === "dir") {
|
|
817
|
+
// Recurse into subdirectory
|
|
818
|
+
const subDirFiles = await collectFilesRecursively(path);
|
|
819
|
+
collectedFiles.push(...subDirFiles);
|
|
820
|
+
} else if (type === "file") {
|
|
821
|
+
// Process file
|
|
822
|
+
const fileData = await collectFileData(
|
|
823
|
+
{ url: download_url, editUrl: url },
|
|
824
|
+
path
|
|
825
|
+
);
|
|
826
|
+
if (fileData) {
|
|
827
|
+
collectedFiles.push(fileData);
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
return collectedFiles;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// Helper function to collect file data with content transforms applied
|
|
836
|
+
async function collectFileData(
|
|
837
|
+
{ url, editUrl }: { url: string | null; editUrl: string },
|
|
838
|
+
filePath: string
|
|
839
|
+
): Promise<ImportedFile | null> {
|
|
840
|
+
if (url === null || typeof url !== "string") {
|
|
841
|
+
return null;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
const urlObj = new URL(url);
|
|
845
|
+
|
|
846
|
+
// Determine if file needs renaming and generate appropriate ID
|
|
847
|
+
const includeCheck = shouldIncludeFile(filePath, options);
|
|
848
|
+
const matchedPattern = includeCheck.included ? includeCheck.matchedPattern : null;
|
|
849
|
+
|
|
850
|
+
// Check if this file has a path mapping
|
|
851
|
+
const hasPathMapping = matchedPattern &&
|
|
852
|
+
options?.includes &&
|
|
853
|
+
matchedPattern.index < options.includes.length &&
|
|
854
|
+
options.includes[matchedPattern.index].pathMappings &&
|
|
855
|
+
options.includes[matchedPattern.index].pathMappings![filePath];
|
|
856
|
+
|
|
857
|
+
// Generate ID based on appropriate path
|
|
858
|
+
const id = hasPathMapping ?
|
|
859
|
+
generateId(generatePath(filePath, matchedPattern, options)) : // Use path-mapped path for ID
|
|
860
|
+
generateId(filePath); // Use original path for ID
|
|
861
|
+
|
|
862
|
+
const finalPath = generatePath(filePath, matchedPattern, options);
|
|
863
|
+
let contents: string;
|
|
864
|
+
|
|
865
|
+
logger.logFileProcessing("Fetching", filePath, `from ${urlObj.toString()}`);
|
|
866
|
+
|
|
867
|
+
// Download file content
|
|
868
|
+
const init = { signal, headers: getHeaders({ init: {}, meta: context.meta, id }) };
|
|
869
|
+
let res: Response | null = null;
|
|
870
|
+
|
|
871
|
+
// Fetch with retries (simplified version of syncEntry logic)
|
|
872
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
873
|
+
try {
|
|
874
|
+
res = await fetch(urlObj, init);
|
|
875
|
+
if (res.ok) break;
|
|
876
|
+
} catch (error) {
|
|
877
|
+
if (attempt === 2) throw error;
|
|
878
|
+
await new Promise(resolve => setTimeout(resolve, 1000 * (attempt + 1)));
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
if (!res) {
|
|
883
|
+
throw new Error(`No response received for ${urlObj.toString()}`);
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
if (res.status === 304) {
|
|
887
|
+
// File not modified, read existing content from disk if it exists
|
|
888
|
+
const includeResult = shouldIncludeFile(filePath, options);
|
|
889
|
+
const relativePath = generatePath(filePath, includeResult.included ? includeResult.matchedPattern : null, options);
|
|
890
|
+
const fileUrl = pathToFileURL(relativePath);
|
|
891
|
+
|
|
892
|
+
if (existsSync(fileURLToPath(fileUrl))) {
|
|
893
|
+
logger.logFileProcessing("Using cached", filePath, "304 not modified");
|
|
894
|
+
const { promises: fs } = await import('node:fs');
|
|
895
|
+
contents = await fs.readFile(fileURLToPath(fileUrl), 'utf-8');
|
|
896
|
+
} else {
|
|
897
|
+
// File is missing locally, re-fetch without cache headers
|
|
898
|
+
logger.logFileProcessing("Re-fetching", filePath, "missing locally despite 304");
|
|
899
|
+
const freshInit = { ...init };
|
|
900
|
+
freshInit.headers = new Headers(init.headers);
|
|
901
|
+
freshInit.headers.delete('If-None-Match');
|
|
902
|
+
freshInit.headers.delete('If-Modified-Since');
|
|
903
|
+
|
|
904
|
+
res = await fetch(urlObj, freshInit);
|
|
905
|
+
if (!res.ok) {
|
|
906
|
+
throw new Error(`Failed to fetch file content from ${urlObj.toString()}: ${res.status} ${res.statusText || 'Unknown error'}`);
|
|
907
|
+
}
|
|
908
|
+
contents = await res.text();
|
|
909
|
+
}
|
|
910
|
+
} else if (!res.ok) {
|
|
911
|
+
throw new Error(`Failed to fetch file content from ${urlObj.toString()}: ${res.status} ${res.statusText || 'Unknown error'}`);
|
|
912
|
+
} else {
|
|
913
|
+
contents = await res.text();
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
// Process assets FIRST if configuration is provided
|
|
917
|
+
let fileAssetsDownloaded = 0;
|
|
918
|
+
let fileAssetsCached = 0;
|
|
919
|
+
if (options.assetsPath && options.assetsBaseUrl) {
|
|
920
|
+
try {
|
|
921
|
+
const assetResult = await processAssets(contents, filePath, options, octokit, logger, signal);
|
|
922
|
+
contents = assetResult.content;
|
|
923
|
+
fileAssetsDownloaded = assetResult.assetsDownloaded;
|
|
924
|
+
fileAssetsCached = assetResult.assetsCached;
|
|
925
|
+
} catch (error) {
|
|
926
|
+
logger.warn(`Asset processing failed for ${id}: ${error instanceof Error ? error.message : String(error)}`);
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
// Apply content transforms
|
|
931
|
+
const includeResult = shouldIncludeFile(filePath, options);
|
|
932
|
+
const transformsToApply: any[] = [];
|
|
933
|
+
|
|
934
|
+
// Add global transforms first
|
|
935
|
+
if (options.transforms && options.transforms.length > 0) {
|
|
936
|
+
transformsToApply.push(...options.transforms);
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// Add pattern-specific transforms
|
|
940
|
+
if (includeResult.included && includeResult.matchedPattern && options.includes) {
|
|
941
|
+
const matchedInclude = options.includes[includeResult.matchedPattern.index];
|
|
942
|
+
if (matchedInclude.transforms && matchedInclude.transforms.length > 0) {
|
|
943
|
+
transformsToApply.push(...matchedInclude.transforms);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
|
|
947
|
+
if (transformsToApply.length > 0) {
|
|
948
|
+
const transformContext = {
|
|
949
|
+
id,
|
|
950
|
+
path: filePath,
|
|
951
|
+
options,
|
|
952
|
+
matchedPattern: includeResult.included ? includeResult.matchedPattern : undefined,
|
|
953
|
+
};
|
|
954
|
+
|
|
955
|
+
for (const transform of transformsToApply) {
|
|
956
|
+
try {
|
|
957
|
+
contents = transform(contents, transformContext);
|
|
958
|
+
} catch (error) {
|
|
959
|
+
context.logger?.warn(`Transform failed for ${id}: ${error}`);
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
// Build link context for this file
|
|
965
|
+
const linkContext = includeResult.included && includeResult.matchedPattern ? {
|
|
966
|
+
sourcePath: filePath,
|
|
967
|
+
targetPath: finalPath,
|
|
968
|
+
basePath: includeResult.matchedPattern.basePath,
|
|
969
|
+
pathMappings: options.includes?.[includeResult.matchedPattern.index]?.pathMappings,
|
|
970
|
+
matchedPattern: includeResult.matchedPattern,
|
|
971
|
+
} : undefined;
|
|
972
|
+
|
|
973
|
+
// Use the finalPath we already computed
|
|
974
|
+
return {
|
|
975
|
+
sourcePath: filePath,
|
|
976
|
+
targetPath: finalPath,
|
|
977
|
+
content: contents,
|
|
978
|
+
id,
|
|
979
|
+
linkContext,
|
|
980
|
+
};
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
// Helper function to store a processed file
|
|
984
|
+
async function storeProcessedFile(
|
|
985
|
+
file: ImportedFile,
|
|
986
|
+
context: any,
|
|
987
|
+
options: ImportOptions
|
|
988
|
+
): Promise<any> {
|
|
989
|
+
const { store, generateDigest, entryTypes, logger, parseData, config } = context;
|
|
990
|
+
|
|
991
|
+
function configForFile(filePath: string) {
|
|
992
|
+
const ext = filePath.split(".").at(-1);
|
|
993
|
+
if (!ext) {
|
|
994
|
+
logger.warn(`No extension found for ${filePath}`);
|
|
995
|
+
return;
|
|
996
|
+
}
|
|
997
|
+
return entryTypes?.get(`.${ext}`);
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
const entryType = configForFile(file.sourcePath || "tmp.md");
|
|
1001
|
+
if (!entryType) throw new Error("No entry type found");
|
|
1002
|
+
|
|
1003
|
+
const fileUrl = pathToFileURL(file.targetPath);
|
|
1004
|
+
const { body, data } = await entryType.getEntryInfo({
|
|
1005
|
+
contents: file.content,
|
|
1006
|
+
fileUrl: fileUrl,
|
|
1007
|
+
});
|
|
1008
|
+
|
|
1009
|
+
// Generate digest for storage (repository-level caching handles change detection)
|
|
1010
|
+
const digest = generateDigest(file.content);
|
|
1011
|
+
const existingEntry = store.get(file.id);
|
|
1012
|
+
|
|
1013
|
+
if (existingEntry) {
|
|
1014
|
+
logger.debug(`🔄 File ${file.id} - updating`);
|
|
1015
|
+
} else {
|
|
1016
|
+
logger.debug(`📄 File ${file.id} - adding`);
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
// Write file to disk
|
|
1020
|
+
if (!existsSync(fileURLToPath(fileUrl))) {
|
|
1021
|
+
logger.verbose(`Writing ${file.id} to ${fileUrl}`);
|
|
1022
|
+
await syncFile(fileURLToPath(fileUrl), file.content);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
const parsedData = await parseData({
|
|
1026
|
+
id: file.id,
|
|
1027
|
+
data,
|
|
1028
|
+
filePath: fileUrl.toString(),
|
|
1029
|
+
});
|
|
1030
|
+
|
|
1031
|
+
// Store in content store
|
|
1032
|
+
if (entryType.getRenderFunction) {
|
|
1033
|
+
logger.verbose(`Rendering ${file.id}`);
|
|
1034
|
+
const render = await entryType.getRenderFunction(config);
|
|
1035
|
+
let rendered = undefined;
|
|
1036
|
+
try {
|
|
1037
|
+
rendered = await render?.({
|
|
1038
|
+
id: file.id,
|
|
1039
|
+
data,
|
|
1040
|
+
body,
|
|
1041
|
+
filePath: fileUrl.toString(),
|
|
1042
|
+
digest,
|
|
1043
|
+
});
|
|
1044
|
+
} catch (error: any) {
|
|
1045
|
+
logger.error(`Error rendering ${file.id}: ${error.message}`);
|
|
1046
|
+
}
|
|
1047
|
+
logger.debug(`🔍 Storing collection entry: ${file.id} (${file.sourcePath} -> ${file.targetPath})`);
|
|
1048
|
+
store.set({
|
|
1049
|
+
id: file.id,
|
|
1050
|
+
data: parsedData,
|
|
1051
|
+
body,
|
|
1052
|
+
filePath: file.targetPath,
|
|
1053
|
+
digest,
|
|
1054
|
+
rendered,
|
|
1055
|
+
});
|
|
1056
|
+
} else if ("contentModuleTypes" in entryType) {
|
|
1057
|
+
store.set({
|
|
1058
|
+
id: file.id,
|
|
1059
|
+
data: parsedData,
|
|
1060
|
+
body,
|
|
1061
|
+
filePath: file.targetPath,
|
|
1062
|
+
digest,
|
|
1063
|
+
deferredRender: true,
|
|
1064
|
+
});
|
|
1065
|
+
} else {
|
|
1066
|
+
store.set({
|
|
1067
|
+
id: file.id,
|
|
1068
|
+
data: parsedData,
|
|
1069
|
+
body,
|
|
1070
|
+
filePath: file.targetPath,
|
|
1071
|
+
digest
|
|
1072
|
+
});
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
return { id: file.id, filePath: file.targetPath };
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
async function processDirectoryRecursively(path: string): Promise<any> {
|
|
1079
|
+
// Fetch the content
|
|
1080
|
+
const { data, status } = await octokit.rest.repos.getContent({
|
|
1081
|
+
owner,
|
|
1082
|
+
repo,
|
|
1083
|
+
path,
|
|
1084
|
+
ref,
|
|
1085
|
+
request: { signal },
|
|
1086
|
+
});
|
|
1087
|
+
if (status !== 200) throw new Error(INVALID_SERVICE_RESPONSE);
|
|
1088
|
+
|
|
1089
|
+
// Matches for regular files
|
|
1090
|
+
if (!Array.isArray(data)) {
|
|
1091
|
+
const filePath = data.path;
|
|
1092
|
+
switch (data.type) {
|
|
1093
|
+
// Return
|
|
1094
|
+
case "file":
|
|
1095
|
+
return await syncEntry(
|
|
1096
|
+
context,
|
|
1097
|
+
{ url: data.download_url, editUrl: data.url },
|
|
1098
|
+
filePath,
|
|
1099
|
+
options,
|
|
1100
|
+
octokit,
|
|
1101
|
+
{ signal },
|
|
1102
|
+
);
|
|
1103
|
+
default:
|
|
1104
|
+
throw new Error("Invalid type");
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
// Directory listing with filtering - process sequentially
|
|
1109
|
+
const filteredEntries = data
|
|
1110
|
+
.filter(({ type, path }) => {
|
|
1111
|
+
// Always include directories for recursion
|
|
1112
|
+
if (type === "dir") return true;
|
|
1113
|
+
// Apply filtering logic to files
|
|
1114
|
+
if (type === "file") {
|
|
1115
|
+
return shouldIncludeFile(path, options).included;
|
|
1116
|
+
}
|
|
1117
|
+
return false;
|
|
1118
|
+
});
|
|
1119
|
+
|
|
1120
|
+
const results = [];
|
|
1121
|
+
for (const { type, path, download_url, url } of filteredEntries) {
|
|
1122
|
+
switch (type) {
|
|
1123
|
+
// Recurse
|
|
1124
|
+
case "dir":
|
|
1125
|
+
results.push(await processDirectoryRecursively(path));
|
|
1126
|
+
break;
|
|
1127
|
+
// Return
|
|
1128
|
+
case "file":
|
|
1129
|
+
results.push(await syncEntry(
|
|
1130
|
+
context,
|
|
1131
|
+
{ url: download_url, editUrl: url },
|
|
1132
|
+
path,
|
|
1133
|
+
options,
|
|
1134
|
+
octokit,
|
|
1135
|
+
{ signal },
|
|
1136
|
+
));
|
|
1137
|
+
break;
|
|
1138
|
+
default:
|
|
1139
|
+
throw new Error("Invalid type");
|
|
1140
|
+
}
|
|
1141
|
+
}
|
|
1142
|
+
return results;
|
|
1143
|
+
} // End of processDirectoryRecursively function
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
/**
|
|
1149
|
+
* Get the headers needed to make a conditional request.
|
|
1150
|
+
* Uses the etag and last-modified values from the meta store.
|
|
1151
|
+
* @internal
|
|
1152
|
+
*/
|
|
1153
|
+
export function getHeaders({
|
|
1154
|
+
init,
|
|
1155
|
+
meta,
|
|
1156
|
+
id,
|
|
1157
|
+
}: {
|
|
1158
|
+
/** Initial headers to include */
|
|
1159
|
+
init?: RequestInit["headers"];
|
|
1160
|
+
/** Meta store to get etag and last-modified values from */
|
|
1161
|
+
meta: LoaderContext["meta"];
|
|
1162
|
+
id: string;
|
|
1163
|
+
}): Headers {
|
|
1164
|
+
const tag = `${id}-etag`;
|
|
1165
|
+
const lastModifiedTag = `${id}-last-modified`;
|
|
1166
|
+
const etag = meta.get(tag);
|
|
1167
|
+
const lastModified = meta.get(lastModifiedTag);
|
|
1168
|
+
const headers = new Headers(init);
|
|
1169
|
+
|
|
1170
|
+
if (etag) {
|
|
1171
|
+
headers.set("If-None-Match", etag);
|
|
1172
|
+
} else if (lastModified) {
|
|
1173
|
+
headers.set("If-Modified-Since", lastModified);
|
|
1174
|
+
}
|
|
1175
|
+
return headers;
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
/**
|
|
1179
|
+
* Store the etag or last-modified headers from a response in the meta store.
|
|
1180
|
+
* @internal
|
|
1181
|
+
*/
|
|
1182
|
+
export function syncHeaders({
|
|
1183
|
+
headers,
|
|
1184
|
+
meta,
|
|
1185
|
+
id,
|
|
1186
|
+
}: {
|
|
1187
|
+
/** Headers from the response */
|
|
1188
|
+
headers: Headers;
|
|
1189
|
+
/** Meta store to store etag and last-modified values in */
|
|
1190
|
+
meta: LoaderContext["meta"];
|
|
1191
|
+
/** id string */
|
|
1192
|
+
id: string;
|
|
1193
|
+
}) {
|
|
1194
|
+
const etag = headers.get("etag");
|
|
1195
|
+
const lastModified = headers.get("last-modified");
|
|
1196
|
+
const tag = `${id}-etag`;
|
|
1197
|
+
const lastModifiedTag = `${id}-last-modified`;
|
|
1198
|
+
meta.delete(tag);
|
|
1199
|
+
meta.delete(lastModifiedTag);
|
|
1200
|
+
if (etag) {
|
|
1201
|
+
meta.set(tag, etag);
|
|
1202
|
+
} else if (lastModified) {
|
|
1203
|
+
meta.set(lastModifiedTag, lastModified);
|
|
1204
|
+
}
|
|
1205
|
+
}
|