@larkiny/astro-github-loader 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +675 -0
- package/dist/github.cleanup.d.ts +5 -0
- package/dist/github.cleanup.js +216 -0
- package/dist/github.constants.d.ts +24 -0
- package/dist/github.constants.js +24 -0
- package/dist/github.content.d.ts +138 -0
- package/dist/github.content.js +1016 -0
- package/dist/github.dryrun.d.ts +72 -0
- package/dist/github.dryrun.js +247 -0
- package/dist/github.link-transform.d.ts +77 -0
- package/dist/github.link-transform.js +321 -0
- package/dist/github.loader.d.ts +14 -0
- package/dist/github.loader.js +143 -0
- package/dist/github.loader.spec.d.ts +1 -0
- package/dist/github.loader.spec.js +96 -0
- package/dist/github.logger.d.ts +132 -0
- package/dist/github.logger.js +260 -0
- package/dist/github.sync.d.ts +5 -0
- package/dist/github.sync.js +292 -0
- package/dist/github.types.d.ts +315 -0
- package/dist/github.types.js +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/package.json +66 -0
- package/src/github.cleanup.ts +243 -0
- package/src/github.constants.ts +25 -0
- package/src/github.content.ts +1205 -0
- package/src/github.dryrun.ts +339 -0
- package/src/github.link-transform.ts +452 -0
- package/src/github.loader.spec.ts +106 -0
- package/src/github.loader.ts +189 -0
- package/src/github.logger.ts +324 -0
- package/src/github.types.ts +339 -0
- package/src/index.ts +5 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Algorand Foundation
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,675 @@
|
|
|
1
|
+
# Astro GitHub Loader
|
|
2
|
+
|
|
3
|
+
Load content from GitHub repositories into Astro content collections with flexible pattern-based import, asset management, content transformations, and intelligent change detection.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🎯 **Pattern-Based Import** - Use glob patterns to selectively import content with per-pattern configuration
|
|
8
|
+
- 🖼️ **Asset Management** - Automatically download and transform asset references in markdown files
|
|
9
|
+
- 🛠️ **Content Transforms** - Apply custom transformations to content during import, with pattern-specific transforms
|
|
10
|
+
- ⚡ **Intelligent Change Detection** - Ref-aware commit tracking that only triggers re-imports when your target branch/tag actually changes
|
|
11
|
+
- 🔒 **Stable Imports** - Non-destructive approach that preserves local content collections
|
|
12
|
+
- 🚀 **Optimized Performance** - Smart directory scanning to minimize GitHub API calls
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
import { defineCollection } from "astro:content";
|
|
18
|
+
import { docsLoader } from "@astrojs/starlight/loaders";
|
|
19
|
+
import { docsSchema } from "@astrojs/starlight/schema";
|
|
20
|
+
import { Octokit } from "octokit";
|
|
21
|
+
import { githubLoader } from "@larkiny/astro-github-loader";
|
|
22
|
+
import type {
|
|
23
|
+
ImportOptions,
|
|
24
|
+
LoaderContext,
|
|
25
|
+
} from "@larkiny/astro-github-loader";
|
|
26
|
+
|
|
27
|
+
const REMOTE_CONTENT: ImportOptions[] = [
|
|
28
|
+
{
|
|
29
|
+
name: "Documentation",
|
|
30
|
+
owner: "your-org",
|
|
31
|
+
repo: "your-docs-repo",
|
|
32
|
+
ref: "main",
|
|
33
|
+
includes: [
|
|
34
|
+
{
|
|
35
|
+
pattern: "docs/**/*.md",
|
|
36
|
+
basePath: "src/content/docs/imported",
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
},
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
const octokit = new Octokit({ auth: import.meta.env.GITHUB_TOKEN });
|
|
43
|
+
|
|
44
|
+
export const collections = {
|
|
45
|
+
docs: defineCollection({
|
|
46
|
+
loader: {
|
|
47
|
+
name: "docs",
|
|
48
|
+
load: async (context) => {
|
|
49
|
+
await docsLoader().load(context);
|
|
50
|
+
|
|
51
|
+
for (const config of REMOTE_CONTENT) {
|
|
52
|
+
await githubLoader({
|
|
53
|
+
octokit,
|
|
54
|
+
configs: [config],
|
|
55
|
+
clear: config.clear,
|
|
56
|
+
dryRun: false, // Set to true for change detection only
|
|
57
|
+
}).load(context as LoaderContext);
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
schema: docsSchema(),
|
|
62
|
+
}),
|
|
63
|
+
};
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Multi-Ref Configuration Example
|
|
67
|
+
|
|
68
|
+
Track multiple git references from the same repository independently:
|
|
69
|
+
|
|
70
|
+
```typescript
|
|
71
|
+
import { defineCollection } from "astro:content";
|
|
72
|
+
import { docsLoader } from "@astrojs/starlight/loaders";
|
|
73
|
+
import { docsSchema } from "@astrojs/starlight/schema";
|
|
74
|
+
import { Octokit } from "octokit";
|
|
75
|
+
import { githubLoader } from "@larkiny/astro-github-loader";
|
|
76
|
+
import type { ImportOptions } from "@larkiny/astro-github-loader";
|
|
77
|
+
|
|
78
|
+
const MULTI_REF_CONTENT: ImportOptions[] = [
|
|
79
|
+
{
|
|
80
|
+
name: "Stable Docs",
|
|
81
|
+
owner: "myorg",
|
|
82
|
+
repo: "docs",
|
|
83
|
+
ref: "v2.0.0", // Immutable tag - never re-imports
|
|
84
|
+
includes: [
|
|
85
|
+
{
|
|
86
|
+
pattern: "docs/**/*.md",
|
|
87
|
+
basePath: "src/content/docs/v2",
|
|
88
|
+
},
|
|
89
|
+
],
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: "Latest Docs",
|
|
93
|
+
owner: "myorg",
|
|
94
|
+
repo: "docs",
|
|
95
|
+
ref: "main", // Live branch - re-imports only on main commits
|
|
96
|
+
includes: [
|
|
97
|
+
{
|
|
98
|
+
pattern: "docs/**/*.md",
|
|
99
|
+
basePath: "src/content/docs/latest",
|
|
100
|
+
},
|
|
101
|
+
],
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
name: "Beta Features",
|
|
105
|
+
owner: "myorg",
|
|
106
|
+
repo: "docs",
|
|
107
|
+
ref: "beta", // Feature branch - ignores main/other branch commits
|
|
108
|
+
includes: [
|
|
109
|
+
{
|
|
110
|
+
pattern: "experimental/**/*.md",
|
|
111
|
+
basePath: "src/content/docs/beta",
|
|
112
|
+
},
|
|
113
|
+
],
|
|
114
|
+
},
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
const octokit = new Octokit({ auth: import.meta.env.GITHUB_TOKEN });
|
|
118
|
+
|
|
119
|
+
export const collections = {
|
|
120
|
+
docs: defineCollection({
|
|
121
|
+
loader: {
|
|
122
|
+
name: "docs",
|
|
123
|
+
load: async (context) => {
|
|
124
|
+
await docsLoader().load(context);
|
|
125
|
+
|
|
126
|
+
// Each config is tracked independently by ref
|
|
127
|
+
for (const config of MULTI_REF_CONTENT) {
|
|
128
|
+
await githubLoader({
|
|
129
|
+
octokit,
|
|
130
|
+
configs: [config],
|
|
131
|
+
dryRun: false,
|
|
132
|
+
}).load(context);
|
|
133
|
+
}
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
schema: docsSchema(),
|
|
137
|
+
}),
|
|
138
|
+
};
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
In this example:
|
|
142
|
+
- **Stable docs** (v2.0.0 tag): Never re-imports, provides stable reference
|
|
143
|
+
- **Latest docs** (main branch): Only re-imports when main branch changes
|
|
144
|
+
- **Beta features** (beta branch): Only re-imports when beta branch changes
|
|
145
|
+
|
|
146
|
+
Commits to `develop`, `feature-xyz`, or any other branches are completely ignored by all three configs.
|
|
147
|
+
|
|
148
|
+
## Processing Pipeline
|
|
149
|
+
|
|
150
|
+
The astro-github-loader processes files through a well-defined pipeline with clear order of operations:
|
|
151
|
+
|
|
152
|
+
To understand more about the content processing flow, see the [detailed guide](PROCESSING_FLOW.md).
|
|
153
|
+
|
|
154
|
+
### Order of Operations
|
|
155
|
+
|
|
156
|
+
1. **File Discovery and Collection**: Scan repository using include patterns and fetch file contents
|
|
157
|
+
2. **Individual File Processing**: For each file:
|
|
158
|
+
- Apply asset processing (download and transform asset references)
|
|
159
|
+
- Apply path mappings to determine target paths
|
|
160
|
+
- Apply content transformations (global transforms, then pattern-specific transforms)
|
|
161
|
+
3. **Global Link Transformation**: Process all markdown links across all imported files using `linkMappings`
|
|
162
|
+
4. **File Storage**: Write processed files to Astro content store
|
|
163
|
+
|
|
164
|
+
### Path vs Link Transformations
|
|
165
|
+
|
|
166
|
+
Understanding when and why to use each type of transformation:
|
|
167
|
+
|
|
168
|
+
- **`pathMappings`**: Controls where files are imported to (changes file system paths)
|
|
169
|
+
|
|
170
|
+
- Applied during import process
|
|
171
|
+
- Affects the final location of files on disk
|
|
172
|
+
- **Use when**: You need to restructure the imported files differently than they exist in the source repository
|
|
173
|
+
- Example: `'docs/capabilities/': 'docs/'` moves files from capabilities folder up one level
|
|
174
|
+
|
|
175
|
+
- **`linkMappings`**: Controls how markdown links are transformed (changes URLs in content)
|
|
176
|
+
- Applied after all content is imported
|
|
177
|
+
- Affects links within markdown content
|
|
178
|
+
- **Use when**: You have restructured files (with `pathMappings`) OR need to handle links to files outside the imported document set
|
|
179
|
+
- Example: Transform `../cli/index.md` to `/reference/algokit-cli/` (external reference)
|
|
180
|
+
|
|
181
|
+
## Pattern-Based Import System
|
|
182
|
+
|
|
183
|
+
The `includes` system allows you to define multiple import patterns, each with its own destination path and transforms:
|
|
184
|
+
|
|
185
|
+
```typescript
|
|
186
|
+
const REMOTE_CONTENT: ImportOptions[] = [
|
|
187
|
+
{
|
|
188
|
+
name: "Multi-Pattern Import",
|
|
189
|
+
owner: "your-org",
|
|
190
|
+
repo: "your-docs-repo",
|
|
191
|
+
includes: [
|
|
192
|
+
// Import main documentation with path restructuring
|
|
193
|
+
{
|
|
194
|
+
pattern: "docs/**/*.md",
|
|
195
|
+
basePath: "src/content/docs/guides",
|
|
196
|
+
pathMappings: {
|
|
197
|
+
// Move files from capabilities subfolder up one level
|
|
198
|
+
"docs/capabilities/": "docs/",
|
|
199
|
+
// Rename specific files
|
|
200
|
+
"docs/README.md": "docs/overview.md",
|
|
201
|
+
},
|
|
202
|
+
transforms: [addGuideMetadata],
|
|
203
|
+
},
|
|
204
|
+
// Import API reference to different location
|
|
205
|
+
{
|
|
206
|
+
pattern: "api-reference/**/*.md",
|
|
207
|
+
basePath: "src/content/docs/api",
|
|
208
|
+
pathMappings: {
|
|
209
|
+
// Flatten API structure
|
|
210
|
+
"api-reference/v1/": "api-reference/",
|
|
211
|
+
},
|
|
212
|
+
transforms: [addApiMetadata, formatApiDocs],
|
|
213
|
+
},
|
|
214
|
+
// Import specific files
|
|
215
|
+
{
|
|
216
|
+
pattern: "README.md",
|
|
217
|
+
basePath: "src/content/docs",
|
|
218
|
+
transforms: [convertReadmeToOverview],
|
|
219
|
+
},
|
|
220
|
+
],
|
|
221
|
+
},
|
|
222
|
+
];
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Pattern Features
|
|
226
|
+
|
|
227
|
+
- **Glob patterns**: Use `**/*.md`, `docs/guides/*.md`, specific files, etc.
|
|
228
|
+
- **Per-pattern basePath**: Each pattern can target a different local directory
|
|
229
|
+
- **Per-pattern transforms**: Apply different transformations to different content types
|
|
230
|
+
- **Per-pattern pathMappings**: Restructure file paths within each pattern
|
|
231
|
+
- **Directory structure preservation**: Relative paths within patterns are preserved
|
|
232
|
+
|
|
233
|
+
### Path Mappings
|
|
234
|
+
|
|
235
|
+
Use `pathMappings` to restructure files during import.
|
|
236
|
+
|
|
237
|
+
**Common use cases**:
|
|
238
|
+
|
|
239
|
+
- Flatten nested folder structures (e.g., move `docs/capabilities/` files to `docs/`)
|
|
240
|
+
- Rename specific files (e.g., `README.md` → `overview.md`)
|
|
241
|
+
- Reorganize content for better site structure
|
|
242
|
+
- Remove unwanted path segments from imported files
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
{
|
|
246
|
+
pattern: "docs/**/*.md",
|
|
247
|
+
basePath: "src/content/docs/guides",
|
|
248
|
+
pathMappings: {
|
|
249
|
+
// File mappings (exact paths)
|
|
250
|
+
'docs/README.md': 'docs/overview.md',
|
|
251
|
+
'docs/getting-started.md': 'docs/quickstart.md',
|
|
252
|
+
|
|
253
|
+
// Folder mappings (require trailing slash)
|
|
254
|
+
'docs/capabilities/': 'docs/', // Move all files up one level
|
|
255
|
+
'docs/legacy/guides/': 'docs/archive/', // Move to different folder
|
|
256
|
+
},
|
|
257
|
+
}
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
**Important**: Folder mappings require trailing slashes to distinguish from file mappings:
|
|
261
|
+
|
|
262
|
+
- ✅ `'docs/capabilities/': 'docs/'` (folder mapping - moves all files)
|
|
263
|
+
- ❌ `'docs/capabilities': 'docs/'` (treated as exact file match)
|
|
264
|
+
|
|
265
|
+
### Common Pattern Examples
|
|
266
|
+
|
|
267
|
+
- **`**/\*.md`\*\* - All markdown files in the repository
|
|
268
|
+
- **`docs/**/\*`\*\* - All files in the docs directory and subdirectories
|
|
269
|
+
- **`guides/*.md`** - Only markdown files directly in the guides directory
|
|
270
|
+
- **`api-reference/**/\*.{md,mdx}`\*\* - Markdown and MDX files in api-reference
|
|
271
|
+
- **`README.md`** - Specific file at repository root
|
|
272
|
+
- **`docs/getting-started.md`** - Specific file at specific path
|
|
273
|
+
|
|
274
|
+
## Content & Link Transformations
|
|
275
|
+
|
|
276
|
+
The loader supports both content transformations (modifying file contents) and link transformations (fixing cross-references):
|
|
277
|
+
|
|
278
|
+
### Content Transformations
|
|
279
|
+
|
|
280
|
+
Apply content transformations globally or per-pattern.
|
|
281
|
+
|
|
282
|
+
**Use content transforms when you need to**:
|
|
283
|
+
|
|
284
|
+
- Add frontmatter (metadata) to imported files
|
|
285
|
+
- Convert H1 headings to frontmatter titles
|
|
286
|
+
- Add import tracking information
|
|
287
|
+
- Modify content structure or formatting
|
|
288
|
+
- Add badges, labels, or other metadata specific to your site
|
|
289
|
+
|
|
290
|
+
```typescript
|
|
291
|
+
import { githubLoader } from "@larkiny/astro-github-loader";
|
|
292
|
+
import type { TransformFunction } from "@larkiny/astro-github-loader";
|
|
293
|
+
|
|
294
|
+
// Global transform functions
|
|
295
|
+
const addImportMetadata: TransformFunction = (content, context) => {
|
|
296
|
+
return `---
|
|
297
|
+
imported_from: ${context.options.owner}/${context.options.repo}
|
|
298
|
+
original_path: ${context.path}
|
|
299
|
+
imported_at: ${new Date().toISOString()}
|
|
300
|
+
---
|
|
301
|
+
${content}`;
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
// Pattern-specific transform
|
|
305
|
+
const addApiDocsBadge: TransformFunction = (content, context) => {
|
|
306
|
+
const lines = content.split("\n");
|
|
307
|
+
const frontmatterEnd = lines.findIndex((line, i) => i > 0 && line === "---");
|
|
308
|
+
if (frontmatterEnd > 0) {
|
|
309
|
+
lines.splice(frontmatterEnd, 0, "sidebar:", ' badge: "API"');
|
|
310
|
+
}
|
|
311
|
+
return lines.join("\n");
|
|
312
|
+
};
|
|
313
|
+
|
|
314
|
+
// Convert H1 to title frontmatter
|
|
315
|
+
const convertH1ToTitle: TransformFunction = (content, context) => {
|
|
316
|
+
const h1Match = content.match(/^#\s+(.+)$/m);
|
|
317
|
+
if (h1Match) {
|
|
318
|
+
const title = h1Match[1];
|
|
319
|
+
// Remove the H1 from content
|
|
320
|
+
content = content.replace(/^#\s+.+$/m, "").trim();
|
|
321
|
+
// Add to frontmatter
|
|
322
|
+
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
323
|
+
if (frontmatterMatch) {
|
|
324
|
+
const existingFrontmatter = frontmatterMatch[1];
|
|
325
|
+
const newFrontmatter = `---\ntitle: "${title}"\n${existingFrontmatter}\n---`;
|
|
326
|
+
content = content.replace(/^---\n[\s\S]*?\n---/, newFrontmatter);
|
|
327
|
+
} else {
|
|
328
|
+
content = `---\ntitle: "${title}"\n---\n\n${content}`;
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
return content;
|
|
332
|
+
};
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
### Link Transformations
|
|
336
|
+
|
|
337
|
+
Configure link transformations to handle cross-repository links and restructured file references.
|
|
338
|
+
|
|
339
|
+
**Use link mappings when**:
|
|
340
|
+
|
|
341
|
+
- You've restructured files with `pathMappings` and need to update internal links
|
|
342
|
+
- Links reference files outside the imported document set (external repositories, different sections)
|
|
343
|
+
- Links need to be transformed for your site's URL structure (e.g., Starlight routing)
|
|
344
|
+
- You need to handle broken or outdated links in the source content
|
|
345
|
+
|
|
346
|
+
```typescript
|
|
347
|
+
import { createStarlightLinkMappings } from "./transforms/links.js";
|
|
348
|
+
|
|
349
|
+
const REMOTE_CONTENT: ImportOptions[] = [
|
|
350
|
+
{
|
|
351
|
+
name: "Docs with Full Transformations",
|
|
352
|
+
owner: "your-org",
|
|
353
|
+
repo: "docs-repo",
|
|
354
|
+
|
|
355
|
+
// Global content transforms applied to all includes
|
|
356
|
+
transforms: [addImportMetadata, convertH1ToTitle],
|
|
357
|
+
|
|
358
|
+
includes: [
|
|
359
|
+
{
|
|
360
|
+
pattern: "docs/**/*.md",
|
|
361
|
+
basePath: "src/content/docs/guides",
|
|
362
|
+
pathMappings: {
|
|
363
|
+
"docs/capabilities/": "docs/",
|
|
364
|
+
"docs/README.md": "docs/overview.md",
|
|
365
|
+
},
|
|
366
|
+
// Pattern-specific content transforms
|
|
367
|
+
transforms: [addGuideFormatting],
|
|
368
|
+
},
|
|
369
|
+
{
|
|
370
|
+
pattern: "api/**/*.md",
|
|
371
|
+
basePath: "src/content/docs/api",
|
|
372
|
+
transforms: [addApiDocsBadge, formatApiContent],
|
|
373
|
+
},
|
|
374
|
+
],
|
|
375
|
+
|
|
376
|
+
// Link transformations (applied after content transforms)
|
|
377
|
+
linkTransform: {
|
|
378
|
+
stripPrefixes: ["src/content/docs"],
|
|
379
|
+
linkMappings: [
|
|
380
|
+
// Apply Starlight-specific link transformations
|
|
381
|
+
...createStarlightLinkMappings(),
|
|
382
|
+
|
|
383
|
+
// Custom link mappings for external references
|
|
384
|
+
{
|
|
385
|
+
pattern: /^\.\.\/cli\/?$/,
|
|
386
|
+
replacement: (match: string, anchor: string) => {
|
|
387
|
+
return `/reference/algokit-cli`;
|
|
388
|
+
},
|
|
389
|
+
global: true,
|
|
390
|
+
description: "Map CLI reference links to reference section",
|
|
391
|
+
},
|
|
392
|
+
|
|
393
|
+
// Transform README links to introduction
|
|
394
|
+
{
|
|
395
|
+
pattern: /^\.\.\/\.\.\/README\.md$/,
|
|
396
|
+
replacement: (match: string, anchor: string) => {
|
|
397
|
+
return `/introduction`;
|
|
398
|
+
},
|
|
399
|
+
global: true,
|
|
400
|
+
description: "Map README links to introduction page",
|
|
401
|
+
},
|
|
402
|
+
],
|
|
403
|
+
},
|
|
404
|
+
},
|
|
405
|
+
];
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Link Transformation Utilities
|
|
409
|
+
|
|
410
|
+
Handle markdown links with anchor fragments using built-in utilities:
|
|
411
|
+
|
|
412
|
+
```typescript
|
|
413
|
+
import {
|
|
414
|
+
createLinkTransform,
|
|
415
|
+
extractAnchor,
|
|
416
|
+
removeMarkdownExtension,
|
|
417
|
+
} from "@larkiny/astro-github-loader";
|
|
418
|
+
|
|
419
|
+
const linkTransform = createLinkTransform({
|
|
420
|
+
baseUrl: "/docs/imported",
|
|
421
|
+
pathTransform: (path, context) => {
|
|
422
|
+
const { path: cleanPath, anchor } = extractAnchor(path);
|
|
423
|
+
|
|
424
|
+
// Custom link handling logic
|
|
425
|
+
if (cleanPath === "README.md") {
|
|
426
|
+
return `/docs/imported/overview${anchor}`;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Use utility to remove .md extension and preserve anchors
|
|
430
|
+
return `/docs/imported/${removeMarkdownExtension(path)}`;
|
|
431
|
+
},
|
|
432
|
+
});
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Link Transform Utilities
|
|
436
|
+
|
|
437
|
+
- **`extractAnchor(path)`** - Returns `{path, anchor}` separating the anchor fragment
|
|
438
|
+
- **`removeMarkdownExtension(path)`** - Removes `.md`/`.mdx` extensions while preserving anchors
|
|
439
|
+
- **`createLinkTransform(options)`** - Main transform with custom path handling
|
|
440
|
+
|
|
441
|
+
## Asset Import and Management
|
|
442
|
+
|
|
443
|
+
Automatically detect, download, and transform asset references:
|
|
444
|
+
|
|
445
|
+
```typescript
|
|
446
|
+
const REMOTE_CONTENT_WITH_ASSETS: ImportOptions[] = [
|
|
447
|
+
{
|
|
448
|
+
name: "Docs with Assets",
|
|
449
|
+
owner: "your-org",
|
|
450
|
+
repo: "docs-repo",
|
|
451
|
+
includes: [
|
|
452
|
+
{
|
|
453
|
+
pattern: "documentation/**/*.md",
|
|
454
|
+
basePath: "src/content/docs/imported",
|
|
455
|
+
},
|
|
456
|
+
],
|
|
457
|
+
// Asset configuration
|
|
458
|
+
assetsPath: "src/assets/imported",
|
|
459
|
+
assetsBaseUrl: "~/assets/imported", // or "/assets/imported"
|
|
460
|
+
assetPatterns: [".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"],
|
|
461
|
+
},
|
|
462
|
+
];
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
### Asset Management Features
|
|
466
|
+
|
|
467
|
+
- **Automatic detection**: Finds image references in markdown
|
|
468
|
+
- **Smart downloading**: Only downloads assets that have changed
|
|
469
|
+
- **Path transformation**: Updates markdown to use local asset paths
|
|
470
|
+
- **Multiple formats**: Supports various image formats
|
|
471
|
+
|
|
472
|
+
## File Management Strategy
|
|
473
|
+
|
|
474
|
+
> **⚠️ Important: Do not use `clear: true`**
|
|
475
|
+
>
|
|
476
|
+
> The `clear: true` option should not be used with the current implementation due to how Astro content collection syncing works. Mass file deletions can cause Astro to invalidate entire content collections, leading to 404 errors and build instability.
|
|
477
|
+
>
|
|
478
|
+
> **Instead**: If you need to handle file deletions, renames, or path restructuring from the source repository:
|
|
479
|
+
>
|
|
480
|
+
> 1. Manually delete the local import folders (e.g., `src/content/docs/imported`)
|
|
481
|
+
> 2. Re-run the import process
|
|
482
|
+
> 3. Fresh content will be imported with the new structure
|
|
483
|
+
>
|
|
484
|
+
> This approach ensures your site remains stable while handling structural changes.
|
|
485
|
+
|
|
486
|
+
## Change Detection & Dry-Run Mode
|
|
487
|
+
|
|
488
|
+
Check for repository changes without importing:
|
|
489
|
+
|
|
490
|
+
```typescript
|
|
491
|
+
// In your content config
|
|
492
|
+
await githubLoader({
|
|
493
|
+
octokit,
|
|
494
|
+
configs: REMOTE_CONTENT,
|
|
495
|
+
clear: false,
|
|
496
|
+
dryRun: process.env.IMPORT_DRY_RUN === "true",
|
|
497
|
+
}).load(context);
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
### Setting up Dry-Run Scripts
|
|
501
|
+
|
|
502
|
+
Add to your `package.json`:
|
|
503
|
+
|
|
504
|
+
```json
|
|
505
|
+
{
|
|
506
|
+
"scripts": {
|
|
507
|
+
"import:check": "IMPORT_DRY_RUN=true astro sync"
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
```
|
|
511
|
+
|
|
512
|
+
### Dry-Run Output
|
|
513
|
+
|
|
514
|
+
```bash
|
|
515
|
+
npm run import:check
|
|
516
|
+
|
|
517
|
+
# Output:
|
|
518
|
+
📊 Repository Import Status:
|
|
519
|
+
✅ Documentation: Up to date
|
|
520
|
+
Last imported: 2 hours ago
|
|
521
|
+
🔄 API Reference: Needs re-import
|
|
522
|
+
Latest commit: Add new endpoints
|
|
523
|
+
Committed: 30 minutes ago
|
|
524
|
+
Last imported: 1 day ago
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
### How Change Detection Works
|
|
528
|
+
|
|
529
|
+
The loader uses intelligent, ref-aware change detection:
|
|
530
|
+
|
|
531
|
+
- **Per-ref tracking**: Each `owner/repo@ref` combination is tracked separately
|
|
532
|
+
- **Branch isolation**: Commits to other branches are completely ignored
|
|
533
|
+
- **Tag immutability**: Fixed tags (e.g., `v1.0.0`) never trigger re-imports
|
|
534
|
+
- **Efficient checking**: Only the latest commit of your target ref is checked
|
|
535
|
+
|
|
536
|
+
**Examples**:
|
|
537
|
+
- Config tracking `main` branch → only `main` commits trigger re-import
|
|
538
|
+
- Config tracking `v2.1.0` tag → never re-imports (tags are immutable)
|
|
539
|
+
- Config tracking `feature-branch` → ignores commits to `main`, `develop`, etc.
|
|
540
|
+
- Multiple configs for same repo with different refs → tracked independently
|
|
541
|
+
|
|
542
|
+
This means you can safely track multiple refs from the same repository without unnecessary re-imports when unrelated branches change.
|
|
543
|
+
|
|
544
|
+
## Configuration Options
|
|
545
|
+
|
|
546
|
+
### ImportOptions Interface
|
|
547
|
+
|
|
548
|
+
```typescript
|
|
549
|
+
interface ImportOptions {
|
|
550
|
+
/** Display name for this configuration (used in logging) */
|
|
551
|
+
name?: string;
|
|
552
|
+
|
|
553
|
+
/** GitHub repository owner */
|
|
554
|
+
owner: string;
|
|
555
|
+
|
|
556
|
+
/** GitHub repository name */
|
|
557
|
+
repo: string;
|
|
558
|
+
|
|
559
|
+
/** Git reference (branch, tag, or commit SHA) */
|
|
560
|
+
ref?: string; // defaults to "main"
|
|
561
|
+
|
|
562
|
+
/** Whether this configuration is enabled */
|
|
563
|
+
enabled?: boolean; // defaults to true
|
|
564
|
+
|
|
565
|
+
/** Whether to clear content store (recommend: false) */
|
|
566
|
+
clear?: boolean; // defaults to false
|
|
567
|
+
|
|
568
|
+
/** Array of transform functions applied to all includes */
|
|
569
|
+
transforms?: TransformFunction[];
|
|
570
|
+
|
|
571
|
+
/** Pattern-based import configuration */
|
|
572
|
+
includes?: IncludePattern[];
|
|
573
|
+
|
|
574
|
+
/** Asset management options */
|
|
575
|
+
assetsPath?: string; // Local directory for downloaded assets
|
|
576
|
+
assetsBaseUrl?: string; // Base URL for asset references
|
|
577
|
+
assetPatterns?: string[]; // File extensions to treat as assets
|
|
578
|
+
|
|
579
|
+
/** Link transformation options (applied after all content transforms) */
|
|
580
|
+
linkTransform?: ImportLinkTransformOptions;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
interface ImportLinkTransformOptions {
|
|
584
|
+
/** Base paths to strip from final URLs (e.g., ["src/content/docs"]) */
|
|
585
|
+
stripPrefixes: string[];
|
|
586
|
+
|
|
587
|
+
/** Link mappings to transform URLs in markdown links */
|
|
588
|
+
linkMappings?: LinkMapping[];
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
interface LinkMapping {
|
|
592
|
+
/** Pattern to match (string or regex) */
|
|
593
|
+
pattern: string | RegExp;
|
|
594
|
+
|
|
595
|
+
/** Replacement string or function */
|
|
596
|
+
replacement:
|
|
597
|
+
| string
|
|
598
|
+
| ((match: string, anchor: string, context: any) => string);
|
|
599
|
+
|
|
600
|
+
/** Apply to all links, not just unresolved internal links (default: false) */
|
|
601
|
+
global?: boolean;
|
|
602
|
+
|
|
603
|
+
/** Description for debugging (optional) */
|
|
604
|
+
description?: string;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
interface IncludePattern {
|
|
608
|
+
/** Glob pattern to match files (relative to repository root) */
|
|
609
|
+
pattern: string;
|
|
610
|
+
|
|
611
|
+
/** Local base path where matching files should be imported */
|
|
612
|
+
basePath: string;
|
|
613
|
+
|
|
614
|
+
/** Transforms to apply only to files matching this pattern */
|
|
615
|
+
transforms?: TransformFunction[];
|
|
616
|
+
|
|
617
|
+
/**
|
|
618
|
+
* Map of source paths to target paths for controlling where files are imported.
|
|
619
|
+
*
|
|
620
|
+
* Supports two types of mappings:
|
|
621
|
+
* - **File mapping**: `'docs/README.md': 'docs/overview.md'` - moves a specific file to a new path
|
|
622
|
+
* - **Folder mapping**: `'docs/capabilities/': 'docs/'` - moves all files from source folder to target folder
|
|
623
|
+
*
|
|
624
|
+
* **Important**: Folder mappings require trailing slashes to distinguish from file mappings.
|
|
625
|
+
* - ✅ `'docs/capabilities/': 'docs/'` (folder mapping - moves all files)
|
|
626
|
+
* - ❌ `'docs/capabilities': 'docs/'` (treated as exact file match)
|
|
627
|
+
*/
|
|
628
|
+
pathMappings?: Record<string, string>;
|
|
629
|
+
}
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
### Transform Function Interface
|
|
633
|
+
|
|
634
|
+
```typescript
|
|
635
|
+
interface TransformContext {
|
|
636
|
+
/** Generated ID for the content */
|
|
637
|
+
id: string;
|
|
638
|
+
|
|
639
|
+
/** File path within the repository */
|
|
640
|
+
path: string;
|
|
641
|
+
|
|
642
|
+
/** Full configuration options */
|
|
643
|
+
options: ImportOptions;
|
|
644
|
+
|
|
645
|
+
/** Information about which include pattern matched (if any) */
|
|
646
|
+
matchedPattern?: MatchedPattern;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
type TransformFunction = (content: string, context: TransformContext) => string;
|
|
650
|
+
```
|
|
651
|
+
|
|
652
|
+
## Performance Optimizations
|
|
653
|
+
|
|
654
|
+
The loader includes several optimizations:
|
|
655
|
+
|
|
656
|
+
- **Smart directory scanning**: Only scans directories that match include patterns
|
|
657
|
+
- **Efficient API usage**: Minimizes GitHub API calls through targeted requests
|
|
658
|
+
- **Ref-aware change detection**: Tracks commit SHA for specific git references (branches/tags) to avoid unnecessary downloads when unrelated branches change
|
|
659
|
+
- **Concurrent processing**: Downloads and processes files in parallel
|
|
660
|
+
|
|
661
|
+
## Installation & Setup
|
|
662
|
+
|
|
663
|
+
```bash
|
|
664
|
+
npm install @larkiny/astro-github-loader octokit
|
|
665
|
+
```
|
|
666
|
+
|
|
667
|
+
Set up your GitHub token in `.env`:
|
|
668
|
+
|
|
669
|
+
```bash
|
|
670
|
+
GITHUB_TOKEN=your_github_token_here
|
|
671
|
+
```
|
|
672
|
+
|
|
673
|
+
## License
|
|
674
|
+
|
|
675
|
+
MIT - See LICENSE file for details.
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { ImportOptions, LoaderContext, SyncStats } from "./github.types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Performs selective cleanup of obsolete files
|
|
4
|
+
*/
|
|
5
|
+
export declare function performSelectiveCleanup(config: ImportOptions, context: LoaderContext, octokit: any, signal?: AbortSignal): Promise<SyncStats>;
|