@larkiny/astro-github-loader 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,216 @@
1
+ import { promises as fs } from "node:fs";
2
+ import { existsSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { generateId, generatePath, shouldIncludeFile } from "./github.content.js";
5
+ const SLEEP_BETWEEN_DELETES = 10; // ms between file deletions
6
+ /**
7
+ * Sleep utility for pacing file operations
8
+ */
9
+ function sleep(ms) {
10
+ return new Promise(resolve => setTimeout(resolve, ms));
11
+ }
12
+ /**
13
+ * Gets all files that should exist locally based on remote repository state
14
+ */
15
+ async function getExpectedFiles(octokit, options, signal) {
16
+ const { owner, repo, ref = "main" } = options;
17
+ const expectedFiles = new Set();
18
+ // Get all unique directory prefixes from include patterns to limit scanning
19
+ const directoriesToScan = new Set();
20
+ if (options.includes && options.includes.length > 0) {
21
+ for (const includePattern of options.includes) {
22
+ // Extract directory part from pattern (before any glob wildcards)
23
+ const pattern = includePattern.pattern;
24
+ const beforeGlob = pattern.split(/[*?{]/)[0];
25
+ const dirPart = beforeGlob.includes('/') ? beforeGlob.substring(0, beforeGlob.lastIndexOf('/')) : '';
26
+ directoriesToScan.add(dirPart);
27
+ }
28
+ }
29
+ else {
30
+ // If no includes specified, scan from root
31
+ directoriesToScan.add('');
32
+ }
33
+ async function processDirectory(dirPath) {
34
+ try {
35
+ const { data } = await octokit.rest.repos.getContent({
36
+ owner,
37
+ repo,
38
+ path: dirPath,
39
+ ref,
40
+ request: { signal }
41
+ });
42
+ if (!Array.isArray(data)) {
43
+ // Single file
44
+ if (data.type === 'file' && shouldIncludeFile(data.path, options).included) {
45
+ const id = generateId(data.path);
46
+ const includeResult = shouldIncludeFile(data.path, options);
47
+ const localPath = generatePath(data.path, includeResult.included ? includeResult.matchedPattern : null, options);
48
+ // Convert to absolute path for consistent comparison
49
+ const absolutePath = localPath.startsWith('/') ? localPath : join(process.cwd(), localPath);
50
+ expectedFiles.add(absolutePath);
51
+ }
52
+ return;
53
+ }
54
+ // Directory listing
55
+ const promises = data
56
+ .filter(({ type, path }) => {
57
+ if (type === "dir")
58
+ return true;
59
+ if (type === "file")
60
+ return shouldIncludeFile(path, options).included;
61
+ return false;
62
+ })
63
+ .map(async ({ type, path: itemPath }) => {
64
+ if (type === "dir") {
65
+ await processDirectory(itemPath);
66
+ }
67
+ else if (type === "file") {
68
+ const id = generateId(itemPath);
69
+ const includeResult = shouldIncludeFile(itemPath, options);
70
+ const localPath = generatePath(itemPath, includeResult.included ? includeResult.matchedPattern : null, options);
71
+ // Convert to absolute path for consistent comparison
72
+ const absolutePath = localPath.startsWith('/') ? localPath : join(process.cwd(), localPath);
73
+ expectedFiles.add(absolutePath);
74
+ }
75
+ });
76
+ await Promise.all(promises);
77
+ }
78
+ catch (error) {
79
+ if (signal?.aborted)
80
+ throw error;
81
+ console.warn(`Failed to process directory ${dirPath}:`, error);
82
+ }
83
+ }
84
+ // Process only the directories that match our include patterns
85
+ for (const dirPath of directoriesToScan) {
86
+ await processDirectory(dirPath);
87
+ }
88
+ return expectedFiles;
89
+ }
90
+ /**
91
+ * Gets all existing local files in the basePath as absolute paths
92
+ */
93
+ async function getExistingFiles(basePath) {
94
+ const existingFiles = new Set();
95
+ if (!existsSync(basePath)) {
96
+ return existingFiles;
97
+ }
98
+ async function walkDirectory(dirPath) {
99
+ try {
100
+ const entries = await fs.readdir(dirPath, { withFileTypes: true });
101
+ for (const entry of entries) {
102
+ const fullPath = join(dirPath, entry.name);
103
+ if (entry.isDirectory()) {
104
+ // Skip manifest files and other system directories
105
+ if (!entry.name.startsWith('.')) {
106
+ await walkDirectory(fullPath);
107
+ }
108
+ }
109
+ else if (entry.isFile()) {
110
+ // Skip manifest and system files
111
+ if (!entry.name.startsWith('.')) {
112
+ existingFiles.add(fullPath);
113
+ }
114
+ }
115
+ }
116
+ }
117
+ catch (error) {
118
+ console.warn(`Failed to read directory ${dirPath}:`, error);
119
+ }
120
+ }
121
+ await walkDirectory(basePath);
122
+ return existingFiles;
123
+ }
124
+ /**
125
+ * Performs selective cleanup of obsolete files
126
+ */
127
+ export async function performSelectiveCleanup(config, context, octokit, signal) {
128
+ const startTime = Date.now();
129
+ const { logger } = context;
130
+ const configName = config.name || `${config.owner}/${config.repo}`;
131
+ if (!config.includes || config.includes.length === 0) {
132
+ // No cleanup needed if no include patterns specified
133
+ return {
134
+ added: 0,
135
+ updated: 0,
136
+ deleted: 0,
137
+ unchanged: 0,
138
+ duration: Date.now() - startTime
139
+ };
140
+ }
141
+ logger.debug(`Starting selective cleanup for ${configName}`);
142
+ try {
143
+ // Get existing local files from all include pattern base paths
144
+ const allExistingFiles = new Set();
145
+ for (const includePattern of config.includes) {
146
+ const existingFiles = await getExistingFiles(includePattern.basePath);
147
+ existingFiles.forEach(file => allExistingFiles.add(file));
148
+ }
149
+ // If no existing files, skip cleanup (fresh import)
150
+ if (allExistingFiles.size === 0) {
151
+ logger.debug(`No existing files found in any base paths, skipping cleanup`);
152
+ return {
153
+ added: 0,
154
+ updated: 0,
155
+ deleted: 0,
156
+ unchanged: 0,
157
+ duration: Date.now() - startTime
158
+ };
159
+ }
160
+ // Get expected files from remote repository
161
+ const expectedFiles = await getExpectedFiles(octokit, config, signal);
162
+ // Find files to delete (exist locally but not in remote)
163
+ const filesToDelete = [];
164
+ for (const existingFile of allExistingFiles) {
165
+ if (!expectedFiles.has(existingFile)) {
166
+ filesToDelete.push(existingFile);
167
+ }
168
+ }
169
+ // Delete obsolete files with pacing
170
+ let deletedCount = 0;
171
+ for (const filePath of filesToDelete) {
172
+ try {
173
+ if (existsSync(filePath)) {
174
+ await fs.unlink(filePath);
175
+ logger.debug(`Deleted obsolete file: ${filePath}`);
176
+ deletedCount++;
177
+ await sleep(SLEEP_BETWEEN_DELETES);
178
+ }
179
+ }
180
+ catch (error) {
181
+ logger.warn(`Failed to delete ${filePath}: ${error}`);
182
+ }
183
+ }
184
+ const duration = Date.now() - startTime;
185
+ const stats = {
186
+ added: 0, // Will be counted by main sync process
187
+ updated: 0, // Will be counted by main sync process
188
+ deleted: deletedCount,
189
+ unchanged: 0, // Will be counted by main sync process
190
+ duration
191
+ };
192
+ if (deletedCount > 0) {
193
+ logger.info(`Cleanup completed for ${configName}: ${deletedCount} obsolete files deleted (${duration}ms)`);
194
+ }
195
+ else {
196
+ logger.debug(`No cleanup needed for ${configName} (${duration}ms)`);
197
+ }
198
+ return stats;
199
+ }
200
+ catch (error) {
201
+ if (signal?.aborted) {
202
+ logger.info(`Cleanup cancelled for ${configName}`);
203
+ throw error;
204
+ }
205
+ const duration = Date.now() - startTime;
206
+ logger.error(`Cleanup failed for ${configName} after ${duration}ms: ${error}`);
207
+ // Don't throw - let the main sync process continue
208
+ return {
209
+ added: 0,
210
+ updated: 0,
211
+ deleted: 0,
212
+ unchanged: 0,
213
+ duration
214
+ };
215
+ }
216
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * This variable represents an error message indicating that the provided string is invalid.
3
+ * It is typically used to flag inputs or data that do not meet the required string format or criteria.
4
+ * The value is a constant string: 'Invalid string'.
5
+ *
6
+ * @internal
7
+ */
8
+ export declare const INVALID_STRING_ERROR = "Invalid string";
9
+ /**
10
+ * Represents an error message indicating that a provided URL is invalid.
11
+ * This constant is typically used for validation or error handling when a URL
12
+ * does not conform to the expected format or requirements.
13
+ *
14
+ * @internal
15
+ */
16
+ export declare const INVALID_URL_ERROR = "Invalid url";
17
+ /**
18
+ * A constant that holds a default error message indicating that a service response is invalid.
19
+ * This value is typically used to signify that the response from a service or API call
20
+ * does not meet the expected format, structure, or criteria.
21
+ *
22
+ * @internal
23
+ */
24
+ export declare const INVALID_SERVICE_RESPONSE = "Invalid service response";
@@ -0,0 +1,24 @@
1
+ /**
2
+ * This variable represents an error message indicating that the provided string is invalid.
3
+ * It is typically used to flag inputs or data that do not meet the required string format or criteria.
4
+ * The value is a constant string: 'Invalid string'.
5
+ *
6
+ * @internal
7
+ */
8
+ export const INVALID_STRING_ERROR = "Invalid string";
9
+ /**
10
+ * Represents an error message indicating that a provided URL is invalid.
11
+ * This constant is typically used for validation or error handling when a URL
12
+ * does not conform to the expected format or requirements.
13
+ *
14
+ * @internal
15
+ */
16
+ export const INVALID_URL_ERROR = "Invalid url";
17
+ /**
18
+ * A constant that holds a default error message indicating that a service response is invalid.
19
+ * This value is typically used to signify that the response from a service or API call
20
+ * does not meet the expected format, structure, or criteria.
21
+ *
22
+ * @internal
23
+ */
24
+ export const INVALID_SERVICE_RESPONSE = "Invalid service response";
@@ -0,0 +1,138 @@
1
+ import type { LoaderContext, CollectionEntryOptions, ImportOptions, MatchedPattern } from "./github.types.js";
2
+ export interface ImportStats {
3
+ processed: number;
4
+ updated: number;
5
+ unchanged: number;
6
+ assetsDownloaded?: number;
7
+ assetsCached?: number;
8
+ }
9
+ /**
10
+ * Generates a unique identifier from a file path by removing the extension
11
+ * @param filePath - The file path to generate ID from
12
+ * @return {string} The generated identifier as a string with extension removed
13
+ * @internal
14
+ */
15
+ export declare function generateId(filePath: string): string;
16
+ /**
17
+ * Applies path mapping logic to get the final filename for a file
18
+ *
19
+ * Supports two types of path mappings:
20
+ * - **File mapping**: Exact file path match (e.g., 'docs/README.md' -> 'docs/overview.md')
21
+ * - **Folder mapping**: Folder path with trailing slash (e.g., 'docs/capabilities/' -> 'docs/')
22
+ *
23
+ * @param filePath - Original source file path
24
+ * @param matchedPattern - The pattern that matched this file
25
+ * @param options - Import options containing path mappings
26
+ * @returns Final filename after applying path mapping logic
27
+ * @internal
28
+ */
29
+ export declare function applyRename(filePath: string, matchedPattern?: MatchedPattern | null, options?: ImportOptions): string;
30
+ /**
31
+ * Generates a local file path based on the matched pattern and file path
32
+ * @param filePath - The original file path from the repository
33
+ * @param matchedPattern - The pattern that matched this file (or null if no includes specified)
34
+ * @param options - Import options containing includes patterns for path mapping lookups
35
+ * @return {string} The local file path where this content should be stored
36
+ * @internal
37
+ */
38
+ export declare function generatePath(filePath: string, matchedPattern?: MatchedPattern | null, options?: ImportOptions): string;
39
+ /**
40
+ * Synchronizes a file by ensuring the target directory exists and then writing the specified content to the file at the given path.
41
+ *
42
+ * @param {string} path - The path of the file to synchronize, including its directory and filename.
43
+ * @param {string} content - The content to write into the file.
44
+ * @return {Promise<void>} - A promise that resolves when the file has been successfully written.
45
+ * @internal
46
+ */
47
+ export declare function syncFile(path: string, content: string): Promise<void>;
48
+ /**
49
+ * Checks if a file path should be included and returns the matching pattern
50
+ * @param filePath - The file path to check (relative to the repository root)
51
+ * @param options - Import options containing includes patterns
52
+ * @returns Object with include status and matched pattern, or null if not included
53
+ * @internal
54
+ */
55
+ export declare function shouldIncludeFile(filePath: string, options: ImportOptions): {
56
+ included: true;
57
+ matchedPattern: MatchedPattern | null;
58
+ } | {
59
+ included: false;
60
+ matchedPattern: null;
61
+ };
62
+ /**
63
+ * Detects asset references in markdown content using regex patterns
64
+ * @param content - The markdown content to parse
65
+ * @param assetPatterns - File extensions to treat as assets
66
+ * @returns Array of detected asset paths
67
+ * @internal
68
+ */
69
+ export declare function detectAssets(content: string, assetPatterns?: string[]): string[];
70
+ /**
71
+ * Downloads an asset from GitHub and saves it locally
72
+ * @param octokit - GitHub API client
73
+ * @param owner - Repository owner
74
+ * @param repo - Repository name
75
+ * @param ref - Git reference
76
+ * @param assetPath - Path to the asset in the repository
77
+ * @param localPath - Local path where the asset should be saved
78
+ * @param signal - Abort signal for cancellation
79
+ * @returns Promise that resolves when the asset is downloaded
80
+ * @internal
81
+ */
82
+ export declare function downloadAsset(octokit: any, owner: string, repo: string, ref: string, assetPath: string, localPath: string, signal?: AbortSignal): Promise<void>;
83
+ /**
84
+ * Transforms asset references in markdown content to use local paths
85
+ * @param content - The markdown content to transform
86
+ * @param assetMap - Map of original asset paths to new local paths
87
+ * @returns Transformed content with updated asset references
88
+ * @internal
89
+ */
90
+ export declare function transformAssetReferences(content: string, assetMap: Map<string, string>): string;
91
+ /**
92
+ * Synchronizes an entry by fetching its contents, validating its metadata, and storing or rendering it as needed.
93
+ *
94
+ * @param {LoaderContext} context - The loader context containing the required utilities, metadata, and configuration.
95
+ * @param {Object} urls - Object containing URL data.
96
+ * @param {string | URL | null} urls.url - The URL of the entry to fetch. Throws an error if null or invalid.
97
+ * @param {string} urls.editUrl - The URL for editing the entry.
98
+ * @param {RootOptions} options - Configuration settings for processing the entry such as file paths and custom options.
99
+ * @param {any} octokit - GitHub API client for downloading assets.
100
+ * @param {RequestInit} [init] - Optional parameter for customizing the fetch request.
101
+ * @return {Promise<void>} Resolves when the entry has been successfully processed and stored. Throws errors if invalid URL, missing configuration, or other issues occur.
102
+ * @internal
103
+ */
104
+ export declare function syncEntry(context: LoaderContext, { url, editUrl }: {
105
+ url: string | URL | null;
106
+ editUrl: string;
107
+ }, filePath: string, options: ImportOptions, octokit: any, init?: RequestInit): Promise<void>;
108
+ /**
109
+ * Converts a given GitHub repository path into a collection entry by fetching the content
110
+ * from the GitHub repository using the provided Octokit instance and options.
111
+ * Handles both files and directories, recursively processing directories if needed.
112
+ * @internal
113
+ */
114
+ export declare function toCollectionEntry({ context, octokit, options, signal, force, }: CollectionEntryOptions): Promise<ImportStats>;
115
+ /**
116
+ * Get the headers needed to make a conditional request.
117
+ * Uses the etag and last-modified values from the meta store.
118
+ * @internal
119
+ */
120
+ export declare function getHeaders({ init, meta, id, }: {
121
+ /** Initial headers to include */
122
+ init?: RequestInit["headers"];
123
+ /** Meta store to get etag and last-modified values from */
124
+ meta: LoaderContext["meta"];
125
+ id: string;
126
+ }): Headers;
127
+ /**
128
+ * Store the etag or last-modified headers from a response in the meta store.
129
+ * @internal
130
+ */
131
+ export declare function syncHeaders({ headers, meta, id, }: {
132
+ /** Headers from the response */
133
+ headers: Headers;
134
+ /** Meta store to store etag and last-modified values in */
135
+ meta: LoaderContext["meta"];
136
+ /** id string */
137
+ id: string;
138
+ }): void;