@larkiny/astro-github-loader 0.10.1 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,11 +17,11 @@ Load content from GitHub repositories into Astro content collections with flexib
17
17
  import { defineCollection } from "astro:content";
18
18
  import { docsLoader } from "@astrojs/starlight/loaders";
19
19
  import { docsSchema } from "@astrojs/starlight/schema";
20
- import { Octokit } from "octokit";
21
- import { githubLoader } from "@larkiny/astro-github-loader";
22
- import type {
23
- ImportOptions,
24
- LoaderContext,
20
+ import {
21
+ githubLoader,
22
+ createOctokitFromEnv,
23
+ type ImportOptions,
24
+ type LoaderContext,
25
25
  } from "@larkiny/astro-github-loader";
26
26
 
27
27
  const REMOTE_CONTENT: ImportOptions[] = [
@@ -39,7 +39,8 @@ const REMOTE_CONTENT: ImportOptions[] = [
39
39
  },
40
40
  ];
41
41
 
42
- const octokit = new Octokit({ auth: import.meta.env.GITHUB_TOKEN });
42
+ // Automatically uses GitHub App or Personal Access Token based on env vars
43
+ const octokit = createOctokitFromEnv();
43
44
 
44
45
  export const collections = {
45
46
  docs: defineCollection({
@@ -63,6 +64,116 @@ export const collections = {
63
64
  };
64
65
  ```
65
66
 
67
+ ## Authentication
68
+
69
+ The loader supports two authentication methods with different rate limits:
70
+
71
+ | Method | Rate Limit | Best For |
72
+ |--------|-----------|----------|
73
+ | **GitHub App** (Recommended) | 15,000 requests/hour | Production, large imports, organizational use |
74
+ | **Personal Access Token** | 5,000 requests/hour | Development, small imports |
75
+
76
+ ### Option 1: GitHub App Authentication (Recommended - 3x Rate Limit)
77
+
78
+ **Step 1: Create a GitHub App**
79
+
80
+ 1. Go to GitHub Settings → Developer settings → GitHub Apps → [New GitHub App](https://github.com/settings/apps/new)
81
+ 2. Fill in the required fields:
82
+ - **GitHub App name**: `your-org-docs-loader` (or any name)
83
+ - **Homepage URL**: Your documentation site URL
84
+ - **Webhook**: Uncheck "Active" (not needed)
85
+ 3. Set **Repository permissions**:
86
+ - Contents: **Read-only**
87
+ 4. Click **Create GitHub App**
88
+
89
+ **Step 2: Generate Private Key**
90
+
91
+ 1. In your GitHub App settings, scroll to "Private keys"
92
+ 2. Click **Generate a private key**
93
+ 3. Save the downloaded `.pem` file securely
94
+
95
+ **Step 3: Install the App**
96
+
97
+ 1. In your GitHub App settings, click **Install App**
98
+ 2. Select your organization or personal account
99
+ 3. Choose **All repositories** or **Only select repositories**
100
+ 4. Note the **Installation ID** from the URL: `https://github.com/settings/installations/{installation_id}`
101
+
102
+ **Step 4: Configure Environment Variables**
103
+
104
+ ```bash
105
+ # .env
106
+ GITHUB_APP_ID=123456
107
+ GITHUB_APP_INSTALLATION_ID=12345678
108
+ # For the private key, you have two options:
109
+
110
+ # Option A: Direct PEM content (multiline)
111
+ GITHUB_APP_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----
112
+ MIIEpAIBAAKCAQEA...
113
+ ...
114
+ -----END RSA PRIVATE KEY-----"
115
+
116
+ # Option B: Base64 encoded (single line - easier for .env files)
117
+ # Run: cat your-app.private-key.pem | base64 | tr -d '\n'
118
+ GITHUB_APP_PRIVATE_KEY="LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0..."
119
+ ```
120
+
121
+ **Step 5: Use in Your Config**
122
+
123
+ ```typescript
124
+ import { createOctokitFromEnv } from "@larkiny/astro-github-loader";
125
+
126
+ // Automatically uses GitHub App if env vars are set
127
+ const octokit = createOctokitFromEnv();
128
+ ```
129
+
130
+ ### Option 2: Personal Access Token (PAT)
131
+
132
+ **Step 1: Create a Token**
133
+
134
+ 1. Go to GitHub Settings → Developer settings → Personal access tokens → [Tokens (classic)](https://github.com/settings/tokens)
135
+ 2. Click **Generate new token (classic)**
136
+ 3. Select scopes:
137
+ - `public_repo` (for public repositories)
138
+ - `repo` (for private repositories)
139
+ 4. Generate and copy the token
140
+
141
+ **Step 2: Configure Environment Variable**
142
+
143
+ ```bash
144
+ # .env
145
+ GITHUB_TOKEN=ghp_your_token_here
146
+ ```
147
+
148
+ **Step 3: Use in Your Config**
149
+
150
+ ```typescript
151
+ import { createOctokitFromEnv } from "@larkiny/astro-github-loader";
152
+
153
+ // Automatically falls back to PAT if GitHub App vars aren't set
154
+ const octokit = createOctokitFromEnv();
155
+ ```
156
+
157
+ ### Manual Authentication (Advanced)
158
+
159
+ For more control, you can manually create the Octokit instance:
160
+
161
+ ```typescript
162
+ import { createAuthenticatedOctokit } from "@larkiny/astro-github-loader";
163
+
164
+ // GitHub App (explicit)
165
+ const octokit = createAuthenticatedOctokit({
166
+ appId: process.env.GITHUB_APP_ID!,
167
+ privateKey: process.env.GITHUB_APP_PRIVATE_KEY!,
168
+ installationId: process.env.GITHUB_APP_INSTALLATION_ID!,
169
+ });
170
+
171
+ // Personal Access Token (explicit)
172
+ const octokit = createAuthenticatedOctokit({
173
+ token: process.env.GITHUB_TOKEN!,
174
+ });
175
+ ```
176
+
66
177
  ## Multi-Ref Configuration Example
67
178
 
68
179
  Track multiple git references from the same repository independently:
@@ -71,9 +182,11 @@ Track multiple git references from the same repository independently:
71
182
  import { defineCollection } from "astro:content";
72
183
  import { docsLoader } from "@astrojs/starlight/loaders";
73
184
  import { docsSchema } from "@astrojs/starlight/schema";
74
- import { Octokit } from "octokit";
75
- import { githubLoader } from "@larkiny/astro-github-loader";
76
- import type { ImportOptions } from "@larkiny/astro-github-loader";
185
+ import {
186
+ githubLoader,
187
+ createOctokitFromEnv,
188
+ type ImportOptions,
189
+ } from "@larkiny/astro-github-loader";
77
190
 
78
191
  const MULTI_REF_CONTENT: ImportOptions[] = [
79
192
  {
@@ -114,7 +227,7 @@ const MULTI_REF_CONTENT: ImportOptions[] = [
114
227
  },
115
228
  ];
116
229
 
117
- const octokit = new Octokit({ auth: import.meta.env.GITHUB_TOKEN });
230
+ const octokit = createOctokitFromEnv();
118
231
 
119
232
  export const collections = {
120
233
  docs: defineCollection({
@@ -661,15 +774,23 @@ The loader includes several optimizations:
661
774
  ## Installation & Setup
662
775
 
663
776
  ```bash
664
- npm install @larkiny/astro-github-loader octokit
777
+ npm install @larkiny/astro-github-loader
665
778
  ```
666
779
 
667
- Set up your GitHub token in `.env`:
780
+ Set up your authentication in `.env`:
668
781
 
669
782
  ```bash
670
- GITHUB_TOKEN=your_github_token_here
783
+ # Option 1: GitHub App (recommended - 15,000 requests/hour)
784
+ GITHUB_APP_ID=123456
785
+ GITHUB_APP_INSTALLATION_ID=12345678
786
+ GITHUB_APP_PRIVATE_KEY="-----BEGIN RSA PRIVATE KEY-----..."
787
+
788
+ # Option 2: Personal Access Token (5,000 requests/hour)
789
+ GITHUB_TOKEN=ghp_your_token_here
671
790
  ```
672
791
 
792
+ See the [Authentication](#authentication) section for detailed setup instructions.
793
+
673
794
  ## License
674
795
 
675
796
  MIT - See LICENSE file for details.
@@ -0,0 +1,83 @@
1
+ import { Octokit } from "octokit";
2
+ /**
3
+ * Configuration options for GitHub App authentication
4
+ */
5
+ export interface GitHubAppAuthConfig {
6
+ /** GitHub App ID */
7
+ appId: string | number;
8
+ /** GitHub App private key (PEM format) */
9
+ privateKey: string;
10
+ /** GitHub App installation ID */
11
+ installationId: string | number;
12
+ }
13
+ /**
14
+ * Configuration options for Personal Access Token authentication
15
+ */
16
+ export interface GitHubPATAuthConfig {
17
+ /** Personal Access Token (classic or fine-grained) */
18
+ token: string;
19
+ }
20
+ /**
21
+ * Union type for authentication configuration
22
+ */
23
+ export type GitHubAuthConfig = GitHubAppAuthConfig | GitHubPATAuthConfig;
24
+ /**
25
+ * Creates an authenticated Octokit instance with support for both Personal Access Tokens
26
+ * and GitHub App authentication.
27
+ *
28
+ * **Rate Limits:**
29
+ * - Personal Access Token: 5,000 requests/hour
30
+ * - GitHub App: 15,000 requests/hour (3x higher)
31
+ *
32
+ * **GitHub App Setup:**
33
+ * 1. Create a GitHub App: https://github.com/settings/apps/new
34
+ * 2. Grant required permissions: Contents (read-only)
35
+ * 3. Install the app to your organization/repositories
36
+ * 4. Generate and download a private key
37
+ * 5. Note your App ID and Installation ID
38
+ *
39
+ * @param config - Authentication configuration (PAT or GitHub App)
40
+ * @returns Authenticated Octokit instance
41
+ *
42
+ * @example
43
+ * // Using Personal Access Token
44
+ * const octokit = createAuthenticatedOctokit({
45
+ * token: process.env.GITHUB_TOKEN
46
+ * });
47
+ *
48
+ * @example
49
+ * // Using GitHub App (recommended for higher rate limits)
50
+ * const octokit = createAuthenticatedOctokit({
51
+ * appId: process.env.GITHUB_APP_ID,
52
+ * privateKey: process.env.GITHUB_APP_PRIVATE_KEY,
53
+ * installationId: process.env.GITHUB_APP_INSTALLATION_ID
54
+ * });
55
+ */
56
+ export declare function createAuthenticatedOctokit(config: GitHubAuthConfig): Octokit;
57
+ /**
58
+ * Creates an authenticated Octokit instance from environment variables.
59
+ * Automatically detects whether to use GitHub App or PAT authentication based on
60
+ * which environment variables are present.
61
+ *
62
+ * **Priority:**
63
+ * 1. GitHub App (if GITHUB_APP_ID, GITHUB_APP_PRIVATE_KEY, and GITHUB_APP_INSTALLATION_ID are set)
64
+ * 2. Personal Access Token (if GITHUB_TOKEN is set)
65
+ *
66
+ * **Environment Variables:**
67
+ *
68
+ * For GitHub App (recommended - 15,000 req/hour):
69
+ * - `GITHUB_APP_ID` - Your GitHub App ID
70
+ * - `GITHUB_APP_PRIVATE_KEY` - Private key in PEM format (can be multiline or base64 encoded)
71
+ * - `GITHUB_APP_INSTALLATION_ID` - Installation ID for your org/repos
72
+ *
73
+ * For Personal Access Token (5,000 req/hour):
74
+ * - `GITHUB_TOKEN` - Your personal access token
75
+ *
76
+ * @returns Authenticated Octokit instance
77
+ * @throws Error if no valid authentication credentials are found
78
+ *
79
+ * @example
80
+ * // In your Astro config or content.config.ts
81
+ * const octokit = createOctokitFromEnv();
82
+ */
83
+ export declare function createOctokitFromEnv(): Octokit;
@@ -0,0 +1,119 @@
1
+ import { Octokit } from "octokit";
2
+ import { createAppAuth } from "@octokit/auth-app";
3
+ /**
4
+ * Type guard to check if config is GitHub App authentication
5
+ */
6
+ function isGitHubAppAuth(config) {
7
+ return 'appId' in config && 'privateKey' in config && 'installationId' in config;
8
+ }
9
+ /**
10
+ * Creates an authenticated Octokit instance with support for both Personal Access Tokens
11
+ * and GitHub App authentication.
12
+ *
13
+ * **Rate Limits:**
14
+ * - Personal Access Token: 5,000 requests/hour
15
+ * - GitHub App: 15,000 requests/hour (3x higher)
16
+ *
17
+ * **GitHub App Setup:**
18
+ * 1. Create a GitHub App: https://github.com/settings/apps/new
19
+ * 2. Grant required permissions: Contents (read-only)
20
+ * 3. Install the app to your organization/repositories
21
+ * 4. Generate and download a private key
22
+ * 5. Note your App ID and Installation ID
23
+ *
24
+ * @param config - Authentication configuration (PAT or GitHub App)
25
+ * @returns Authenticated Octokit instance
26
+ *
27
+ * @example
28
+ * // Using Personal Access Token
29
+ * const octokit = createAuthenticatedOctokit({
30
+ * token: process.env.GITHUB_TOKEN
31
+ * });
32
+ *
33
+ * @example
34
+ * // Using GitHub App (recommended for higher rate limits)
35
+ * const octokit = createAuthenticatedOctokit({
36
+ * appId: process.env.GITHUB_APP_ID,
37
+ * privateKey: process.env.GITHUB_APP_PRIVATE_KEY,
38
+ * installationId: process.env.GITHUB_APP_INSTALLATION_ID
39
+ * });
40
+ */
41
+ export function createAuthenticatedOctokit(config) {
42
+ if (isGitHubAppAuth(config)) {
43
+ // GitHub App authentication (15,000 requests/hour)
44
+ return new Octokit({
45
+ authStrategy: createAppAuth,
46
+ auth: {
47
+ appId: config.appId,
48
+ privateKey: config.privateKey,
49
+ installationId: config.installationId,
50
+ },
51
+ });
52
+ }
53
+ else {
54
+ // Personal Access Token authentication (5,000 requests/hour)
55
+ return new Octokit({
56
+ auth: config.token,
57
+ });
58
+ }
59
+ }
60
+ /**
61
+ * Creates an authenticated Octokit instance from environment variables.
62
+ * Automatically detects whether to use GitHub App or PAT authentication based on
63
+ * which environment variables are present.
64
+ *
65
+ * **Priority:**
66
+ * 1. GitHub App (if GITHUB_APP_ID, GITHUB_APP_PRIVATE_KEY, and GITHUB_APP_INSTALLATION_ID are set)
67
+ * 2. Personal Access Token (if GITHUB_TOKEN is set)
68
+ *
69
+ * **Environment Variables:**
70
+ *
71
+ * For GitHub App (recommended - 15,000 req/hour):
72
+ * - `GITHUB_APP_ID` - Your GitHub App ID
73
+ * - `GITHUB_APP_PRIVATE_KEY` - Private key in PEM format (can be multiline or base64 encoded)
74
+ * - `GITHUB_APP_INSTALLATION_ID` - Installation ID for your org/repos
75
+ *
76
+ * For Personal Access Token (5,000 req/hour):
77
+ * - `GITHUB_TOKEN` - Your personal access token
78
+ *
79
+ * @returns Authenticated Octokit instance
80
+ * @throws Error if no valid authentication credentials are found
81
+ *
82
+ * @example
83
+ * // In your Astro config or content.config.ts
84
+ * const octokit = createOctokitFromEnv();
85
+ */
86
+ export function createOctokitFromEnv() {
87
+ // Check for GitHub App credentials (preferred)
88
+ const appId = process.env.GITHUB_APP_ID;
89
+ const privateKey = process.env.GITHUB_APP_PRIVATE_KEY;
90
+ const installationId = process.env.GITHUB_APP_INSTALLATION_ID;
91
+ if (appId && privateKey && installationId) {
92
+ // Decode private key if it's base64 encoded (for easier .env storage)
93
+ let decodedPrivateKey = privateKey;
94
+ if (!privateKey.includes('BEGIN RSA PRIVATE KEY') && !privateKey.includes('BEGIN PRIVATE KEY')) {
95
+ try {
96
+ decodedPrivateKey = Buffer.from(privateKey, 'base64').toString('utf-8');
97
+ }
98
+ catch {
99
+ // If decoding fails, use as-is (might already be plaintext)
100
+ }
101
+ }
102
+ console.log('✓ Using GitHub App authentication (15,000 requests/hour)');
103
+ return createAuthenticatedOctokit({
104
+ appId,
105
+ privateKey: decodedPrivateKey,
106
+ installationId,
107
+ });
108
+ }
109
+ // Fallback to Personal Access Token
110
+ const token = process.env.GITHUB_TOKEN;
111
+ if (token) {
112
+ console.log('✓ Using Personal Access Token authentication (5,000 requests/hour)');
113
+ console.log('💡 Consider switching to GitHub App for 3x higher rate limits');
114
+ return createAuthenticatedOctokit({ token });
115
+ }
116
+ throw new Error('No GitHub authentication credentials found. Please set either:\n' +
117
+ ' - GITHUB_TOKEN (for PAT authentication)\n' +
118
+ ' - GITHUB_APP_ID, GITHUB_APP_PRIVATE_KEY, GITHUB_APP_INSTALLATION_ID (for GitHub App authentication)');
119
+ }
@@ -555,26 +555,71 @@ export async function toCollectionEntry({ context, octokit, options, signal, for
555
555
  throw new TypeError(INVALID_STRING_ERROR);
556
556
  // Get logger from context - it should be our Logger instance (initialize early)
557
557
  const logger = context.logger;
558
- // Get all unique directory prefixes from include patterns to limit scanning
559
- const directoriesToScan = new Set();
560
- if (options.includes && options.includes.length > 0) {
561
- for (const includePattern of options.includes) {
562
- // Extract directory part from pattern (before any glob wildcards)
563
- const pattern = includePattern.pattern;
564
- const beforeGlob = pattern.split(/[*?{]/)[0];
565
- const dirPart = beforeGlob.includes('/') ? beforeGlob.substring(0, beforeGlob.lastIndexOf('/')) : '';
566
- directoriesToScan.add(dirPart);
567
- }
568
- }
569
- else {
570
- // If no includes specified, scan from root
571
- directoriesToScan.add('');
572
- }
558
+ /**
559
+ * OPTIMIZATION: Use Git Trees API for efficient file discovery
560
+ *
561
+ * This replaces the previous recursive directory traversal approach which made
562
+ * N API calls (one per directory) with a single API call to fetch the entire
563
+ * repository tree structure.
564
+ *
565
+ * Benefits:
566
+ * - Reduces API calls by 50-70% for typical repositories
567
+ * - Single getTree() call retrieves all file paths at once
568
+ * - Reduces rate limit pressure significantly
569
+ * - Faster for large repositories with deep directory structures
570
+ *
571
+ * Previous approach:
572
+ * - Called repos.getContent() recursively for each directory
573
+ * - Example: 10 directories = 10 API calls
574
+ *
575
+ * New approach:
576
+ * - 1 call to repos.listCommits() to get commit SHA
577
+ * - 1 call to git.getTree() to get entire file tree
578
+ * - Total: 2 API calls regardless of repository structure
579
+ */
580
+ logger.debug(`Using Git Trees API for efficient file discovery`);
581
+ // Get the commit SHA for the ref
582
+ const { data: commits } = await octokit.rest.repos.listCommits({
583
+ owner,
584
+ repo,
585
+ sha: ref,
586
+ per_page: 1,
587
+ request: { signal }
588
+ });
589
+ if (commits.length === 0) {
590
+ throw new Error(`No commits found for ref ${ref}`);
591
+ }
592
+ const commitSha = commits[0].sha;
593
+ const treeSha = commits[0].commit.tree.sha;
594
+ logger.debug(`Fetching repository tree for commit ${commitSha.slice(0, 7)}`);
595
+ // Get the entire repository tree in a single API call
596
+ const { data: treeData } = await octokit.rest.git.getTree({
597
+ owner,
598
+ repo,
599
+ tree_sha: treeSha,
600
+ recursive: "true",
601
+ request: { signal }
602
+ });
603
+ logger.debug(`Retrieved ${treeData.tree.length} items from repository tree`);
604
+ // Filter tree to only include files (not dirs/submodules) that match our patterns
605
+ const fileEntries = treeData.tree.filter((item) => {
606
+ if (item.type !== 'blob')
607
+ return false; // Only process files (blobs)
608
+ const includeCheck = shouldIncludeFile(item.path, options);
609
+ return includeCheck.included;
610
+ });
611
+ logger.info(`Found ${fileEntries.length} files matching include patterns (filtered from ${treeData.tree.length} total items)`);
573
612
  // Collect all files first (with content transforms applied)
574
613
  const allFiles = [];
575
- for (const dirPath of directoriesToScan) {
576
- const files = await collectFilesRecursively(dirPath);
577
- allFiles.push(...files);
614
+ for (const treeItem of fileEntries) {
615
+ const filePath = treeItem.path;
616
+ // Construct the download URL (raw.githubusercontent.com format)
617
+ const downloadUrl = `https://raw.githubusercontent.com/${owner}/${repo}/${commitSha}/${filePath}`;
618
+ const editUrl = treeItem.url || ''; // Git blob URL (use empty string as fallback)
619
+ const fileData = await collectFileData({ url: downloadUrl, editUrl }, filePath);
620
+ if (fileData) {
621
+ allFiles.push(fileData);
622
+ }
578
623
  }
579
624
  // Track statistics
580
625
  const stats = {
@@ -618,58 +663,6 @@ export async function toCollectionEntry({ context, octokit, options, signal, for
618
663
  }
619
664
  }
620
665
  return stats;
621
- // Helper function to collect files without storing them
622
- async function collectFilesRecursively(path) {
623
- const collectedFiles = [];
624
- // Fetch the content
625
- const { data, status } = await octokit.rest.repos.getContent({
626
- owner,
627
- repo,
628
- path,
629
- ref,
630
- request: { signal },
631
- });
632
- if (status !== 200)
633
- throw new Error(INVALID_SERVICE_RESPONSE);
634
- // Handle single file
635
- if (!Array.isArray(data)) {
636
- const filePath = data.path;
637
- if (data.type === "file") {
638
- const fileData = await collectFileData({ url: data.download_url, editUrl: data.url }, filePath);
639
- if (fileData) {
640
- collectedFiles.push(fileData);
641
- }
642
- }
643
- return collectedFiles;
644
- }
645
- // Directory listing - process files and recurse into subdirectories
646
- const filteredEntries = data
647
- .filter(({ type, path }) => {
648
- // Always include directories for recursion
649
- if (type === "dir")
650
- return true;
651
- // Apply filtering logic to files
652
- if (type === "file") {
653
- return shouldIncludeFile(path, options).included;
654
- }
655
- return false;
656
- });
657
- for (const { type, path, download_url, url } of filteredEntries) {
658
- if (type === "dir") {
659
- // Recurse into subdirectory
660
- const subDirFiles = await collectFilesRecursively(path);
661
- collectedFiles.push(...subDirFiles);
662
- }
663
- else if (type === "file") {
664
- // Process file
665
- const fileData = await collectFileData({ url: download_url, editUrl: url }, path);
666
- if (fileData) {
667
- collectedFiles.push(fileData);
668
- }
669
- }
670
- }
671
- return collectedFiles;
672
- }
673
666
  // Helper function to collect file data with content transforms applied
674
667
  async function collectFileData({ url, editUrl }, filePath) {
675
668
  if (url === null || typeof url !== "string") {
@@ -0,0 +1 @@
1
+ export {};