@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-github 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/config.d.ts CHANGED
@@ -15,7 +15,7 @@ export interface Config {
15
15
  * @visibility backend
16
16
  */
17
17
  appId: string | number;
18
- /**
18
+ /**
19
19
  * GitHub App private key
20
20
  * @visibility secret
21
21
  */
@@ -7,12 +7,20 @@ const createGitHubService = async ({
7
7
  const { App } = await import('octokit');
8
8
  const owner = config.getString("aiAssistant.ingestors.github.owner");
9
9
  const appId = config.getString("aiAssistant.ingestors.github.appId");
10
- const privateKey = config.getString("aiAssistant.ingestors.github.privateKey");
11
- const installationId = config.getNumber("aiAssistant.ingestors.github.installationId");
12
- const baseUrl = config.getOptionalString("aiAssistant.ingestors.github.baseUrl");
10
+ const privateKey = config.getString(
11
+ "aiAssistant.ingestors.github.privateKey"
12
+ );
13
+ const installationId = config.getNumber(
14
+ "aiAssistant.ingestors.github.installationId"
15
+ );
16
+ const baseUrl = config.getOptionalString(
17
+ "aiAssistant.ingestors.github.baseUrl"
18
+ );
13
19
  logger.info(`Connecting to GitHub App for owner: ${owner}`);
14
20
  if (!owner || !appId || !privateKey || !installationId) {
15
- throw new Error("GitHub owner, appId, privateKey, and installationId are required");
21
+ throw new Error(
22
+ "GitHub owner, appId, privateKey, and installationId are required"
23
+ );
16
24
  }
17
25
  const app = new App({
18
26
  appId,
@@ -63,9 +71,13 @@ const createGitHubService = async ({
63
71
  throw new Error(`Expected file but got directory for path: ${path}`);
64
72
  }
65
73
  if (!("content" in fileContent) || fileContent.type !== "file") {
66
- throw new Error(`Expected file but got ${fileContent.type} for path: ${path}`);
74
+ throw new Error(
75
+ `Expected file but got ${fileContent.type} for path: ${path}`
76
+ );
67
77
  }
68
- const content = Buffer.from(fileContent.content, "base64").toString("utf-8");
78
+ const content = Buffer.from(fileContent.content, "base64").toString(
79
+ "utf-8"
80
+ );
69
81
  return content;
70
82
  };
71
83
  return { owner, getRepos, getRepoFiles, getRepoFileContent };
@@ -1 +1 @@
1
- {"version":3,"file":"github.cjs.js","sources":["../../src/services/github.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\n\nexport const createGitHubService = async ({\n config,\n logger,\n}: {\n config: RootConfigService;\n logger: LoggerService;\n}) => {\n // Dynamic import for ESM-only octokit v5\n const { App } = await import('octokit');\n // Get configuration values\n const owner = config.getString('aiAssistant.ingestors.github.owner');\n const appId = config.getString('aiAssistant.ingestors.github.appId');\n const privateKey = config.getString('aiAssistant.ingestors.github.privateKey');\n const installationId = config.getNumber('aiAssistant.ingestors.github.installationId');\n const baseUrl = config.getOptionalString('aiAssistant.ingestors.github.baseUrl');\n\n logger.info(`Connecting to GitHub App for owner: ${owner}`);\n\n if (!owner || !appId || !privateKey || !installationId) {\n throw new Error('GitHub owner, appId, privateKey, and installationId are required');\n }\n\n // Create GitHub App instance\n const app = new App({\n appId,\n privateKey,\n ...(baseUrl && { baseUrl }),\n });\n\n // Get installation-specific Octokit instance\n const octokit = await app.getInstallationOctokit(installationId);\n\n logger.info(`Connected to GitHub App for owner: ${owner}`);\n\n /**\n * Get a list of repositories for the specified GitHub owner\n * @returns List of repositories for the specified GitHub owner\n */\n const getRepos = async () => {\n const { data: repositories } = await octokit.rest.apps.listReposAccessibleToInstallation({\n per_page: 100,\n });\n\n // Filter repositories by owner if needed\n const repos = repositories.repositories.filter(repo => \n repo.owner?.login?.toLowerCase() === owner.toLowerCase()\n );\n\n logger.info(`Found ${repos.length} repositories for owner ${owner}`);\n\n return repos;\n };\n\n /**\n * Get a list of files in the specified GitHub repository\n * @param repoName The name of the repository\n * @param fileTypes Optional list of file types to filter by\n * @returns List of files in the specified GitHub repository\n */\n const getRepoFiles = async (repoName: string, fileTypes?: string[]) => {\n const { data: tree } = await octokit.rest.git.getTree({\n owner,\n repo: repoName,\n tree_sha: 'HEAD',\n recursive: 'true',\n });\n\n // Filter to only files (not directories)\n const files = tree.tree.filter((item: any) => item.type === 'blob');\n\n logger.info(`Found ${files.length} files in GitHub repository ${repoName}`);\n\n if (fileTypes && fileTypes.length > 0) {\n const filteredFiles = files.filter((file: any) =>\n fileTypes.some(type => file.path?.endsWith(type))\n );\n logger.info(\n `Filtered to ${filteredFiles.length} files with types: ${fileTypes.join(\n ', ',\n )}`,\n );\n return filteredFiles;\n }\n\n return files;\n };\n\n /**\n * Get the content of a specific file in a GitHub repository\n * @param repoName The name of the repository\n * @param path The path of the file\n * @returns The content of the file\n */\n const getRepoFileContent = async (repoName: string, path: string) => {\n const { data: fileContent } = await octokit.rest.repos.getContent({\n owner,\n repo: repoName,\n path,\n });\n\n if (Array.isArray(fileContent)) {\n throw new Error(`Expected file but got directory for path: ${path}`);\n }\n\n if (!('content' in fileContent) || fileContent.type !== 'file') {\n throw new Error(`Expected file but got ${fileContent.type} for path: ${path}`);\n }\n\n // Decode base64 content\n const content = Buffer.from(fileContent.content, 'base64').toString('utf-8');\n\n return content;\n };\n\n return { owner, getRepos, getRepoFiles, getRepoFileContent };\n};\n"],"names":[],"mappings":";;AAKO,MAAM,sBAAsB,OAAO;AAAA,EACxC,MAAA;AAAA,EACA;AACF,CAAA,KAGM;AAEJ,EAAA,MAAM,EAAE,GAAA,EAAI,GAAI,MAAM,OAAO,SAAS,CAAA;AAEtC,EAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,SAAA,CAAU,oCAAoC,CAAA;AACnE,EAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,SAAA,CAAU,oCAAoC,CAAA;AACnE,EAAA,MAAM,UAAA,GAAa,MAAA,CAAO,SAAA,CAAU,yCAAyC,CAAA;AAC7E,EAAA,MAAM,cAAA,GAAiB,MAAA,CAAO,SAAA,CAAU,6CAA6C,CAAA;AACrF,EAAA,MAAM,OAAA,GAAU,MAAA,CAAO,iBAAA,CAAkB,sCAAsC,CAAA;AAE/E,EAAA,MAAA,CAAO,IAAA,CAAK,CAAA,oCAAA,EAAuC,KAAK,CAAA,CAAE,CAAA;AAE1D,EAAA,IAAI,CAAC,KAAA,IAAS,CAAC,SAAS,CAAC,UAAA,IAAc,CAAC,cAAA,EAAgB;AACtD,IAAA,MAAM,IAAI,MAAM,kEAAkE,CAAA;AAAA,EACpF;AAGA,EAAA,MAAM,GAAA,GAAM,IAAI,GAAA,CAAI;AAAA,IAClB,KAAA;AAAA,IACA,UAAA;AAAA,IACA,GAAI,OAAA,IAAW,EAAE,OAAA;AAAQ,GAC1B,CAAA;AAGD,EAAA,MAAM,OAAA,GAAU,MAAM,GAAA,CAAI,sBAAA,CAAuB,cAAc,CAAA;AAE/D,EAAA,MAAA,CAAO,IAAA,CAAK,CAAA,mCAAA,EAAsC,KAAK,CAAA,CAAE,CAAA;AAMzD,EAAA,MAAM,WAAW,YAAY;AAC3B,IAAA,MAAM,EAAE,MAAM,YAAA,EAAa,GAAI,MAAM,OAAA,CAAQ,IAAA,CAAK,KAAK,iCAAA,CAAkC;AAAA,MACvF,QAAA,EAAU;AAAA,KACX,CAAA;AAGD,IAAA,MAAM,KAAA,GAAQ,aAAa,YAAA,CAAa,MAAA;AAAA,MAAO,UAC7C,IAAA,CAAK,KAAA,EAAO,OAAO,WAAA,EAAY,KAAM,MAAM,WAAA;AAAY,KACzD;AAEA,IAAA,MAAA,CAAO,KAAK,CAAA,MAAA,EAAS,KAAA,CAAM,MAAM,CAAA,wBAAA,EAA2B,KAAK,CAAA,CAAE,CAAA;AAEnE,IAAA,OAAO,KAAA;AAAA,EACT,CAAA;AAQA,EAAA,MAAM,YAAA,GAAe,OAAO,QAAA,EAAkB,SAAA,KAAyB;AACrE,IAAA,MAAM,EAAE,MAAM,IAAA,EAAK,GAAI,MAAM,OAAA,CAAQ,IAAA,CAAK,IAAI,OAAA,CAAQ;AAAA,MACpD,KAAA;AAAA,MACA,IAAA,EAAM,QAAA;AAAA,MACN,QAAA,EAAU,MAAA;AAAA,MACV,SAAA,EAAW;AAAA,KACZ,CAAA;AAGD,IAAA,MAAM,KAAA,GAAQ,KAAK,IAAA,CAAK,MAAA,CAAO,CAAC,IAAA,KAAc,IAAA,CAAK,SAAS,MAAM,CAAA;AAElE,IAAA,MAAA,CAAO,KAAK,CAAA,MAAA,EAAS,KAAA,CAAM,MAAM,CAAA,4BAAA,EAA+B,QAAQ,CAAA,CAAE,CAAA;AAE1E,IAAA,IAAI,SAAA,IAAa,SAAA,CAAU,MAAA,GAAS,CAAA,EAAG;AACrC,MAAA,MAAM,gBAAgB,KAAA,CAAM,MAAA;AAAA,QAAO,CAAC,SAClC,SAAA,CAAU,IAAA,CAAK,UAAQ,IAAA,CAAK,IAAA,EAAM,QAAA,CAAS,IAAI,CAAC;AAAA,OAClD;AACA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,YAAA,EAAe,aAAA,CAAc,MAAM,CAAA,mBAAA,EAAsB,SAAA,CAAU,IAAA;AAAA,UACjE;AAAA,SACD,CAAA;AAAA,OACH;AACA,MAAA,OAAO,aAAA;AAAA,IACT;AAEA,IAAA,OAAO,KAAA;AAAA,EACT,CAAA;AAQA,EAAA,MAAM,kBAAA,GAAqB,OAAO,QAAA,EAAkB,IAAA,KAAiB;AACnE,IAAA,MAAM,EAAE,MAAM,WAAA,EAAY,GAAI,MAAM,OAAA,CAAQ,IAAA,CAAK,MAAM,UAAA,CAAW;AAAA,MAChE,KAAA;AAAA,MACA,IAAA,EAAM,QAAA;AAAA,MACN;AAAA,KACD,CAAA;AAED,IAAA,IAAI,KAAA,CAAM,OAAA,CAAQ,WAAW,CAAA,EAAG;AAC9B,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,0CAAA,EAA6C,IAAI,CAAA,CAAE,CAAA;AAAA,IACrE;AAEA,IAAA,IAAI,EAAE,SAAA,IAAa,WAAA,CAAA,IAAgB,WAAA,CAAY,SAAS,MAAA,EAAQ;AAC9D,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,sBAAA,EAAyB,YAAY,IAAI,CAAA,WAAA,EAAc,IAAI,CAAA,CAAE,CAAA;AAAA,IAC/E;AAGA,IAAA,MAAM,OAAA,GAAU,OAAO,IAAA,CAAK,WAAA,CAAY,SAAS,QAAQ,CAAA,CAAE,SAAS,OAAO,CAAA;AAE3E,IAAA,OAAO,OAAA;AAAA,EACT,CAAA;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,QAAA,EAAU,YAAA,EAAc,kBAAA,EAAmB;AAC7D;;;;"}
1
+ {"version":3,"file":"github.cjs.js","sources":["../../src/services/github.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\n\nexport const createGitHubService = async ({\n config,\n logger,\n}: {\n config: RootConfigService;\n logger: LoggerService;\n}) => {\n // Dynamic import for ESM-only octokit v5\n const { App } = await import('octokit');\n // Get configuration values\n const owner = config.getString('aiAssistant.ingestors.github.owner');\n const appId = config.getString('aiAssistant.ingestors.github.appId');\n const privateKey = config.getString(\n 'aiAssistant.ingestors.github.privateKey',\n );\n const installationId = config.getNumber(\n 'aiAssistant.ingestors.github.installationId',\n );\n const baseUrl = config.getOptionalString(\n 'aiAssistant.ingestors.github.baseUrl',\n );\n\n logger.info(`Connecting to GitHub App for owner: ${owner}`);\n\n if (!owner || !appId || !privateKey || !installationId) {\n throw new Error(\n 'GitHub owner, appId, privateKey, and installationId are required',\n );\n }\n\n // Create GitHub App instance\n const app = new App({\n appId,\n privateKey,\n ...(baseUrl && { baseUrl }),\n });\n\n // Get installation-specific Octokit instance\n const octokit = await app.getInstallationOctokit(installationId);\n\n logger.info(`Connected to GitHub App for owner: ${owner}`);\n\n /**\n * Get a list of repositories for the specified GitHub owner\n * @returns List of repositories for the specified GitHub owner\n */\n const getRepos = async () => {\n const { data: repositories } =\n await octokit.rest.apps.listReposAccessibleToInstallation({\n per_page: 100,\n });\n\n // Filter repositories by owner if needed\n const repos = repositories.repositories.filter(\n repo => repo.owner?.login?.toLowerCase() === owner.toLowerCase(),\n );\n\n logger.info(`Found ${repos.length} repositories for owner ${owner}`);\n\n return repos;\n };\n\n /**\n * Get a list of files in the specified GitHub repository\n * @param repoName The name of the repository\n * @param fileTypes Optional list of file types to filter by\n * @returns List of files in the specified GitHub repository\n */\n const getRepoFiles = async (repoName: string, fileTypes?: string[]) => {\n const { data: tree } = await octokit.rest.git.getTree({\n owner,\n repo: repoName,\n tree_sha: 'HEAD',\n recursive: 'true',\n });\n\n // Filter to only files (not directories)\n const files = tree.tree.filter((item: any) => item.type === 'blob');\n\n logger.info(`Found ${files.length} files in GitHub repository ${repoName}`);\n\n if (fileTypes && fileTypes.length > 0) {\n const filteredFiles = files.filter((file: any) =>\n fileTypes.some(type => file.path?.endsWith(type)),\n );\n logger.info(\n `Filtered to ${filteredFiles.length} files with types: ${fileTypes.join(\n ', ',\n )}`,\n );\n return filteredFiles;\n }\n\n return files;\n };\n\n /**\n * Get the content of a specific file in a GitHub repository\n * @param repoName The name of the repository\n * @param path The path of the file\n * @returns The content of the file\n */\n const getRepoFileContent = async (repoName: string, path: string) => {\n const { data: fileContent } = await octokit.rest.repos.getContent({\n owner,\n repo: repoName,\n path,\n });\n\n if (Array.isArray(fileContent)) {\n throw new Error(`Expected file but got directory for path: ${path}`);\n }\n\n if (!('content' in fileContent) || fileContent.type !== 'file') {\n throw new Error(\n `Expected file but got ${fileContent.type} for path: ${path}`,\n );\n }\n\n // Decode base64 content\n const content = Buffer.from(fileContent.content, 'base64').toString(\n 'utf-8',\n );\n\n return content;\n };\n\n return { owner, getRepos, getRepoFiles, getRepoFileContent };\n};\n"],"names":[],"mappings":";;AAKO,MAAM,sBAAsB,OAAO;AAAA,EACxC,MAAA;AAAA,EACA;AACF,CAAA,KAGM;AAEJ,EAAA,MAAM,EAAE,GAAA,EAAI,GAAI,MAAM,OAAO,SAAS,CAAA;AAEtC,EAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,SAAA,CAAU,oCAAoC,CAAA;AACnE,EAAA,MAAM,KAAA,GAAQ,MAAA,CAAO,SAAA,CAAU,oCAAoC,CAAA;AACnE,EAAA,MAAM,aAAa,MAAA,CAAO,SAAA;AAAA,IACxB;AAAA,GACF;AACA,EAAA,MAAM,iBAAiB,MAAA,CAAO,SAAA;AAAA,IAC5B;AAAA,GACF;AACA,EAAA,MAAM,UAAU,MAAA,CAAO,iBAAA;AAAA,IACrB;AAAA,GACF;AAEA,EAAA,MAAA,CAAO,IAAA,CAAK,CAAA,oCAAA,EAAuC,KAAK,CAAA,CAAE,CAAA;AAE1D,EAAA,IAAI,CAAC,KAAA,IAAS,CAAC,SAAS,CAAC,UAAA,IAAc,CAAC,cAAA,EAAgB;AACtD,IAAA,MAAM,IAAI,KAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAGA,EAAA,MAAM,GAAA,GAAM,IAAI,GAAA,CAAI;AAAA,IAClB,KAAA;AAAA,IACA,UAAA;AAAA,IACA,GAAI,OAAA,IAAW,EAAE,OAAA;AAAQ,GAC1B,CAAA;AAGD,EAAA,MAAM,OAAA,GAAU,MAAM,GAAA,CAAI,sBAAA,CAAuB,cAAc,CAAA;AAE/D,EAAA,MAAA,CAAO,IAAA,CAAK,CAAA,mCAAA,EAAsC,KAAK,CAAA,CAAE,CAAA;AAMzD,EAAA,MAAM,WAAW,YAAY;AAC3B,IAAA,MAAM,EAAE,MAAM,YAAA,EAAa,GACzB,MAAM,OAAA,CAAQ,IAAA,CAAK,KAAK,iCAAA,CAAkC;AAAA,MACxD,QAAA,EAAU;AAAA,KACX,CAAA;AAGH,IAAA,MAAM,KAAA,GAAQ,aAAa,YAAA,CAAa,MAAA;AAAA,MACtC,UAAQ,IAAA,CAAK,KAAA,EAAO,OAAO,WAAA,EAAY,KAAM,MAAM,WAAA;AAAY,KACjE;AAEA,IAAA,MAAA,CAAO,KAAK,CAAA,MAAA,EAAS,KAAA,CAAM,MAAM,CAAA,wBAAA,EAA2B,KAAK,CAAA,CAAE,CAAA;AAEnE,IAAA,OAAO,KAAA;AAAA,EACT,CAAA;AAQA,EAAA,MAAM,YAAA,GAAe,OAAO,QAAA,EAAkB,SAAA,KAAyB;AACrE,IAAA,MAAM,EAAE,MAAM,IAAA,EAAK,GAAI,MAAM,OAAA,CAAQ,IAAA,CAAK,IAAI,OAAA,CAAQ;AAAA,MACpD,KAAA;AAAA,MACA,IAAA,EAAM,QAAA;AAAA,MACN,QAAA,EAAU,MAAA;AAAA,MACV,SAAA,EAAW;AAAA,KACZ,CAAA;AAGD,IAAA,MAAM,KAAA,GAAQ,KAAK,IAAA,CAAK,MAAA,CAAO,CAAC,IAAA,KAAc,IAAA,CAAK,SAAS,MAAM,CAAA;AAElE,IAAA,MAAA,CAAO,KAAK,CAAA,MAAA,EAAS,KAAA,CAAM,MAAM,CAAA,4BAAA,EAA+B,QAAQ,CAAA,CAAE,CAAA;AAE1E,IAAA,IAAI,SAAA,IAAa,SAAA,CAAU,MAAA,GAAS,CAAA,EAAG;AACrC,MAAA,MAAM,gBAAgB,KAAA,CAAM,MAAA;AAAA,QAAO,CAAC,SAClC,SAAA,CAAU,IAAA,CAAK,UAAQ,IAAA,CAAK,IAAA,EAAM,QAAA,CAAS,IAAI,CAAC;AAAA,OAClD;AACA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,YAAA,EAAe,aAAA,CAAc,MAAM,CAAA,mBAAA,EAAsB,SAAA,CAAU,IAAA;AAAA,UACjE;AAAA,SACD,CAAA;AAAA,OACH;AACA,MAAA,OAAO,aAAA;AAAA,IACT;AAEA,IAAA,OAAO,KAAA;AAAA,EACT,CAAA;AAQA,EAAA,MAAM,kBAAA,GAAqB,OAAO,QAAA,EAAkB,IAAA,KAAiB;AACnE,IAAA,MAAM,EAAE,MAAM,WAAA,EAAY,GAAI,MAAM,OAAA,CAAQ,IAAA,CAAK,MAAM,UAAA,CAAW;AAAA,MAChE,KAAA;AAAA,MACA,IAAA,EAAM,QAAA;AAAA,MACN;AAAA,KACD,CAAA;AAED,IAAA,IAAI,KAAA,CAAM,OAAA,CAAQ,WAAW,CAAA,EAAG;AAC9B,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,0CAAA,EAA6C,IAAI,CAAA,CAAE,CAAA;AAAA,IACrE;AAEA,IAAA,IAAI,EAAE,SAAA,IAAa,WAAA,CAAA,IAAgB,WAAA,CAAY,SAAS,MAAA,EAAQ;AAC9D,MAAA,MAAM,IAAI,KAAA;AAAA,QACR,CAAA,sBAAA,EAAyB,WAAA,CAAY,IAAI,CAAA,WAAA,EAAc,IAAI,CAAA;AAAA,OAC7D;AAAA,IACF;AAGA,IAAA,MAAM,UAAU,MAAA,CAAO,IAAA,CAAK,WAAA,CAAY,OAAA,EAAS,QAAQ,CAAA,CAAE,QAAA;AAAA,MACzD;AAAA,KACF;AAEA,IAAA,OAAO,OAAA;AAAA,EACT,CAAA;AAEA,EAAA,OAAO,EAAE,KAAA,EAAO,QAAA,EAAU,YAAA,EAAc,kBAAA,EAAmB;AAC7D;;;;"}
@@ -11,12 +11,8 @@ const createGitHubIngestor = async ({
11
11
  }) => {
12
12
  const defaultFileTypes = [".md", ".json"];
13
13
  const repositoriesFilter = config.getOptional("aiAssistant.ingestors.github.repositories");
14
- const fileTypes = config.getOptionalStringArray(
15
- "aiAssistant.ingestors.github.fileTypes"
16
- ) ?? defaultFileTypes;
17
- const filesBatchSize = config.getOptionalNumber(
18
- "aiAssistant.ingestors.github.filesBatchSize"
19
- ) ?? defaultFileBatchSize.DEFAULT_FILE_BATCH_SIZE;
14
+ const fileTypes = config.getOptionalStringArray("aiAssistant.ingestors.github.fileTypes") ?? defaultFileTypes;
15
+ const filesBatchSize = config.getOptionalNumber("aiAssistant.ingestors.github.filesBatchSize") ?? defaultFileBatchSize.DEFAULT_FILE_BATCH_SIZE;
20
16
  const githubService = await github.createGitHubService({ config, logger });
21
17
  const ingestRepositoryByFileBatch = async ({
22
18
  repo,
@@ -44,7 +40,10 @@ const createGitHubIngestor = async ({
44
40
  repo.name,
45
41
  file.path
46
42
  );
47
- const completionStats = backstagePluginAiAssistantCommon.getProgressStats(globalIndex + 1, files.length);
43
+ const completionStats = backstagePluginAiAssistantCommon.getProgressStats(
44
+ globalIndex + 1,
45
+ files.length
46
+ );
48
47
  logger.info(
49
48
  `Retrieved content for GitHub file: "${file.path}" in repository: "${repo.name}" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`
50
49
  );
@@ -1 +1 @@
1
- {"version":3,"file":"ingestor.cjs.js","sources":["../../src/services/ingestor.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { createGitHubService } from './github';\nimport {\n EmbeddingDocument,\n Ingestor,\n IngestorOptions,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { MODULE_ID } from '../constants/module';\nimport { Config } from '../../config';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_FILE_BATCH_SIZE } from '../constants/default-file-batch-size';\n\nexport const createGitHubIngestor = async ({\n config,\n logger,\n}: {\n config: RootConfigService;\n logger: LoggerService;\n}): Promise<Ingestor> => {\n // Default to common file types if none are specified\n const defaultFileTypes = ['.md', '.json'];\n\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['github']['repositories']\n >('aiAssistant.ingestors.github.repositories');\n\n const fileTypes =\n config.getOptionalStringArray(\n 'aiAssistant.ingestors.github.fileTypes',\n ) ?? defaultFileTypes;\n\n // Get batch size for processing files (default to 50 files per batch)\n const filesBatchSize =\n config.getOptionalNumber(\n 'aiAssistant.ingestors.github.filesBatchSize',\n ) ?? DEFAULT_FILE_BATCH_SIZE;\n\n // Create GitHub service\n const githubService = await createGitHubService({ config, logger });\n\n /** Ingest GitHub repository files in batches\n * @param repo - The repository to ingest files from\n * @param files - The list of files to ingest from the repository\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the repository\n */\n const ingestRepositoryByFileBatch = async ({\n repo,\n files,\n saveDocumentsBatch,\n }: {\n repo: any;\n files: any[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${files.length} files from repository \"${repo.name}\" in batches of ${filesBatchSize}`,\n );\n\n let totalDocumentsIngested = 0;\n\n // Process files in batches to manage memory and performance\n\n // Calculate total number of batches\n const totalBatches = Math.ceil(files.length / filesBatchSize);\n\n // Process each batch\n for (\n let batchStart = 0;\n batchStart < files.length;\n batchStart += filesBatchSize\n ) {\n const batchEnd = Math.min(batchStart + filesBatchSize, files.length);\n const filesBatch = files.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / filesBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${filesBatch.length} files) for repository \"${repo.name}\"`,\n );\n\n // Generate embedding documents for each file in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < filesBatch.length; index++) {\n const file = filesBatch[index];\n const globalIndex = batchStart + index;\n\n try {\n const content = await githubService.getRepoFileContent(\n repo.name,\n file.path!,\n );\n\n const completionStats = getProgressStats(globalIndex + 1, files.length);\n\n logger.info(\n `Retrieved content for GitHub file: \"${file.path}\" in repository: \"${repo.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n // Generate proper GitHub URL for the file\n const githubUrl = `https://github.com/${githubService.owner}/${repo.name}/blob/${repo.default_branch || 'main'}/${file.path}`;\n \n // Create enhanced content with URL reference and metadata\n const enhancedContent = `Repository: ${repo.name}\n File Path: ${file.path}\n GitHub URL: ${githubUrl}\n ${repo.description ? `Repository Description: ${repo.description}` : ''}\n \n Content:\n ${content}`;\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repo.id}:${file.path}`,\n url: githubUrl,\n owner: githubService.owner,\n repository: repo.name,\n filePath: file.path,\n fileName: file.path?.split('/').pop() || '',\n branch: repo.default_branch || 'main',\n repositoryDescription: repo.description || '',\n },\n content: enhancedContent,\n };\n\n documents.push(document);\n } catch (error) {\n logger.warn(\n `Failed to retrieve content for GitHub file: ${file.path}. Error: ${error}`,\n );\n // Continue with other files even if one fails\n continue;\n }\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for GitHub repository: ${repo.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest GitHub repositories in batches */\n const ingestGitHubBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await githubService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found for the GitHub owner');\n return;\n }\n\n logger.info(\n `Filtering for repositories: ${repositoriesFilter\n ?.map(repo => repo.name)\n .join(', ')}`,\n );\n\n // Filter repositories if a filter is provided in the config\n const repositoriesToIngest = repositoriesFilter\n ? repositoriesList.filter(repo =>\n repositoriesFilter?.some(\n filteredRepo =>\n filteredRepo.name.toLowerCase() === repo.name.toLowerCase(),\n ),\n )\n : repositoriesList;\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from GitHub`,\n );\n\n // Get files from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name.toLowerCase(),\n )?.fileTypes ?? fileTypes;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n // Get the files to be ingested from the repository based on the file types filter\n const files = await githubService.getRepoFiles(\n repo.name,\n repositoryFileTypesFilter,\n );\n\n if (files.length === 0) {\n logger.warn(\n `No files found for ingestion in the GitHub repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n logger.debug(`Files: ${JSON.stringify(files, null, 2)}`);\n\n const { totalDocumentsIngested } = await ingestRepositoryByFileBatch({\n repo,\n files,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the GitHub repository ${repo.name} (${repo.id})`,\n );\n continue;\n }\n\n logger.info(\n `Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for GitHub repository: ${repo.name}`,\n );\n }\n };\n\n const ingest: Ingestor['ingest'] = async ({ saveDocumentsBatch }) => {\n await ingestGitHubBatch(saveDocumentsBatch);\n };\n\n return {\n id: MODULE_ID,\n ingest,\n };\n};\n"],"names":["DEFAULT_FILE_BATCH_SIZE","createGitHubService","getProgressStats","MODULE_ID"],"mappings":";;;;;;;AAeO,MAAM,uBAAuB,OAAO;AAAA,EACzC,MAAA;AAAA,EACA;AACF,CAAA,KAGyB;AAEvB,EAAA,MAAM,gBAAA,GAAmB,CAAC,KAAA,EAAO,OAAO,CAAA;AAGxC,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,2CAA2C,CAAA;AAE7C,EAAA,MAAM,YACJ,MAAA,CAAO,sBAAA;AAAA,IACL;AAAA,GACF,IAAK,gBAAA;AAGP,EAAA,MAAM,iBACJ,MAAA,CAAO,iBAAA;AAAA,IACL;AAAA,GACF,IAAKA,4CAAA;AAGP,EAAA,MAAM,gBAAgB,MAAMC,0BAAA,CAAoB,EAAE,MAAA,EAAQ,QAAQ,CAAA;AAQlE,EAAA,MAAM,8BAA8B,OAAO;AAAA,IACzC,IAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,2BAA2B,IAAA,CAAK,IAAI,mBAAmB,cAAc,CAAA;AAAA,KACjG;AAEA,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAK7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAG5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,wBAAA,EAA2B,IAAA,CAAK,IAAI,CAAA,CAAA;AAAA,OAC3G;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,IAAI;AACF,UAAA,MAAM,OAAA,GAAU,MAAM,aAAA,CAAc,kBAAA;AAAA,YAClC,IAAA,CAAK,IAAA;AAAA,YACL,IAAA,CAAK;AAAA,WACP;AAEA,UAAA,MAAM,eAAA,GAAkBC,iDAAA,CAAiB,WAAA,GAAc,CAAA,EAAG,MAAM,MAAM,CAAA;AAEtE,UAAA,MAAA,CAAO,IAAA;AAAA,YACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,kBAAA,EAAqB,KAAK,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,WACjL;AAGA,UAAA,MAAM,SAAA,GAAY,CAAA,mBAAA,EAAsB,aAAA,CAAc,KAAK,CAAA,CAAA,EAAI,IAAA,CAAK,IAAI,CAAA,MAAA,EAAS,IAAA,CAAK,cAAA,IAAkB,MAAM,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAG3H,UAAA,MAAM,eAAA,GAAkB,CAAA,YAAA,EAAe,IAAA,CAAK,IAAI;AAAA,qBAAA,EACnC,KAAK,IAAI;AAAA,sBAAA,EACR,SAAS;AAAA,UAAA,EACrB,KAAK,WAAA,GAAc,CAAA,wBAAA,EAA2B,IAAA,CAAK,WAAW,KAAK,EAAE;AAAA;AAAA;AAAA,UAAA,EAGrE,OAAO,CAAA,CAAA;AAET,UAAA,MAAM,QAAA,GAA8B;AAAA,YAClC,QAAA,EAAU;AAAA,cACR,MAAA,EAAQC,kBAAA;AAAA,cACR,IAAI,CAAA,EAAG,IAAA,CAAK,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,cAC3B,GAAA,EAAK,SAAA;AAAA,cACL,OAAO,aAAA,CAAc,KAAA;AAAA,cACrB,YAAY,IAAA,CAAK,IAAA;AAAA,cACjB,UAAU,IAAA,CAAK,IAAA;AAAA,cACf,UAAU,IAAA,CAAK,IAAA,EAAM,MAAM,GAAG,CAAA,CAAE,KAAI,IAAK,EAAA;AAAA,cACzC,MAAA,EAAQ,KAAK,cAAA,IAAkB,MAAA;AAAA,cAC/B,qBAAA,EAAuB,KAAK,WAAA,IAAe;AAAA,aAC7C;AAAA,YACA,OAAA,EAAS;AAAA,WACX;AAEA,UAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,QACzB,SAAS,KAAA,EAAO;AACd,UAAA,MAAA,CAAO,IAAA;AAAA,YACL,CAAA,4CAAA,EAA+C,IAAA,CAAK,IAAI,CAAA,SAAA,EAAY,KAAK,CAAA;AAAA,WAC3E;AAEA,UAAA;AAAA,QACF;AAAA,MACF;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,2CAAA,EAA8C,IAAA,CAAK,IAAI,CAAA;AAAA,OAC5H;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAGA,EAAA,MAAM,iBAAA,GAAoB,OACxB,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,aAAA,CAAc,QAAA,EAAS;AAEtD,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,4CAA4C,CAAA;AACxD,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,oBAC3B,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CACtB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAGA,IAAA,MAAM,oBAAA,GAAuB,qBACzB,gBAAA,CAAiB,MAAA;AAAA,MAAO,UACtB,kBAAA,EAAoB,IAAA;AAAA,QAClB,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAK,WAAA;AAAY;AAC9D,KACF,GACA,gBAAA;AAEJ,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,yBAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,4BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAK,WAAA;AAAY,SACnD,SAAA,IAAa,SAAA;AAElB,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAGA,MAAA,MAAM,KAAA,GAAQ,MAAM,aAAA,CAAc,YAAA;AAAA,QAChC,IAAA,CAAK,IAAA;AAAA,QACL;AAAA,OACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,yDACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAEvD,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,2BAAA,CAA4B;AAAA,QACnE,IAAA;AAAA,QACA,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,6EAAA,EAAgF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SACvG;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,gCAAA,EAAmC,sBAAsB,CAAA,wEAAA,EAA2E,IAAA,CAAK,IAAI,CAAA;AAAA,OAC/I;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,MAAM,MAAA,GAA6B,OAAO,EAAE,kBAAA,EAAmB,KAAM;AACnE,IAAA,MAAM,kBAAkB,kBAAkB,CAAA;AAAA,EAC5C,CAAA;AAEA,EAAA,OAAO;AAAA,IACL,EAAA,EAAIA,kBAAA;AAAA,IACJ;AAAA,GACF;AACF;;;;"}
1
+ {"version":3,"file":"ingestor.cjs.js","sources":["../../src/services/ingestor.ts"],"sourcesContent":["import {\n LoggerService,\n RootConfigService,\n} from '@backstage/backend-plugin-api';\nimport { createGitHubService } from './github';\nimport {\n EmbeddingDocument,\n Ingestor,\n IngestorOptions,\n} from '@sweetoburrito/backstage-plugin-ai-assistant-node';\nimport { MODULE_ID } from '../constants/module';\nimport { Config } from '../../config';\nimport { getProgressStats } from '@sweetoburrito/backstage-plugin-ai-assistant-common';\nimport { DEFAULT_FILE_BATCH_SIZE } from '../constants/default-file-batch-size';\n\nexport const createGitHubIngestor = async ({\n config,\n logger,\n}: {\n config: RootConfigService;\n logger: LoggerService;\n}): Promise<Ingestor> => {\n // Default to common file types if none are specified\n const defaultFileTypes = ['.md', '.json'];\n\n // Get configuration values\n const repositoriesFilter = config.getOptional<\n Config['aiAssistant']['ingestors']['github']['repositories']\n >('aiAssistant.ingestors.github.repositories');\n\n const fileTypes =\n config.getOptionalStringArray('aiAssistant.ingestors.github.fileTypes') ??\n defaultFileTypes;\n\n // Get batch size for processing files (default to 50 files per batch)\n const filesBatchSize =\n config.getOptionalNumber('aiAssistant.ingestors.github.filesBatchSize') ??\n DEFAULT_FILE_BATCH_SIZE;\n\n // Create GitHub service\n const githubService = await createGitHubService({ config, logger });\n\n /** Ingest GitHub repository files in batches\n * @param repo - The repository to ingest files from\n * @param files - The list of files to ingest from the repository\n * @param saveDocumentsBatch - Function to save a batch of embedding documents\n * @returns Total number of documents ingested and sent for embedding from the repository\n */\n const ingestRepositoryByFileBatch = async ({\n repo,\n files,\n saveDocumentsBatch,\n }: {\n repo: any;\n files: any[];\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'];\n }) => {\n logger.info(\n `Processing ${files.length} files from repository \"${repo.name}\" in batches of ${filesBatchSize}`,\n );\n\n let totalDocumentsIngested = 0;\n\n // Process files in batches to manage memory and performance\n\n // Calculate total number of batches\n const totalBatches = Math.ceil(files.length / filesBatchSize);\n\n // Process each batch\n for (\n let batchStart = 0;\n batchStart < files.length;\n batchStart += filesBatchSize\n ) {\n const batchEnd = Math.min(batchStart + filesBatchSize, files.length);\n const filesBatch = files.slice(batchStart, batchEnd);\n const batchNumber = Math.floor(batchStart / filesBatchSize) + 1;\n\n logger.info(\n `Processing batch ${batchNumber}/${totalBatches} (${filesBatch.length} files) for repository \"${repo.name}\"`,\n );\n\n // Generate embedding documents for each file in the current batch\n const documents: EmbeddingDocument[] = [];\n\n for (let index = 0; index < filesBatch.length; index++) {\n const file = filesBatch[index];\n const globalIndex = batchStart + index;\n\n try {\n const content = await githubService.getRepoFileContent(\n repo.name,\n file.path!,\n );\n\n const completionStats = getProgressStats(\n globalIndex + 1,\n files.length,\n );\n\n logger.info(\n `Retrieved content for GitHub file: \"${file.path}\" in repository: \"${repo.name}\" [Progress: ${completionStats.completed}/${completionStats.total} (${completionStats.percentage}%) completed of repository]`,\n );\n\n // Generate proper GitHub URL for the file\n const githubUrl = `https://github.com/${githubService.owner}/${\n repo.name\n }/blob/${repo.default_branch || 'main'}/${file.path}`;\n\n // Create enhanced content with URL reference and metadata\n const enhancedContent = `Repository: ${repo.name}\n File Path: ${file.path}\n GitHub URL: ${githubUrl}\n ${\n repo.description\n ? `Repository Description: ${repo.description}`\n : ''\n }\n \n Content:\n ${content}`;\n\n const document: EmbeddingDocument = {\n metadata: {\n source: MODULE_ID,\n id: `${repo.id}:${file.path}`,\n url: githubUrl,\n owner: githubService.owner,\n repository: repo.name,\n filePath: file.path,\n fileName: file.path?.split('/').pop() || '',\n branch: repo.default_branch || 'main',\n repositoryDescription: repo.description || '',\n },\n content: enhancedContent,\n };\n\n documents.push(document);\n } catch (error) {\n logger.warn(\n `Failed to retrieve content for GitHub file: ${file.path}. Error: ${error}`,\n );\n // Continue with other files even if one fails\n continue;\n }\n }\n\n // Save the current batch of documents\n await saveDocumentsBatch(documents);\n\n totalDocumentsIngested += documents.length;\n\n logger.info(\n `Batch ${batchNumber}/${totalBatches} completed: ${documents.length} documents ingested for GitHub repository: ${repo.name}`,\n );\n }\n\n return { totalDocumentsIngested };\n };\n\n /** Ingest GitHub repositories in batches */\n const ingestGitHubBatch = async (\n saveDocumentsBatch: IngestorOptions['saveDocumentsBatch'],\n ) => {\n const repositoriesList = await githubService.getRepos();\n\n if (repositoriesList.length === 0) {\n logger.warn('No repositories found for the GitHub owner');\n return;\n }\n\n logger.info(\n `Filtering for repositories: ${repositoriesFilter\n ?.map(repo => repo.name)\n .join(', ')}`,\n );\n\n // Filter repositories if a filter is provided in the config\n const repositoriesToIngest = repositoriesFilter\n ? repositoriesList.filter(repo =>\n repositoriesFilter?.some(\n filteredRepo =>\n filteredRepo.name.toLowerCase() === repo.name.toLowerCase(),\n ),\n )\n : repositoriesList;\n\n if (repositoriesToIngest.length === 0) {\n logger.warn(\n 'No repositories found for ingestion after applying the filter',\n );\n return;\n }\n\n logger.info(\n `Ingesting ${repositoriesToIngest.length} repositories from GitHub`,\n );\n\n // Get files from each repository and create documents to be embedded\n for (const repo of repositoriesToIngest) {\n logger.info(\n `Beginning ingestion for repository: ${repo.name} (${repo.id})`,\n );\n\n // Determine the file types to use for this repository or use default\n const repositoryFileTypesFilter =\n repositoriesFilter?.find(\n r => r.name.toLowerCase() === repo.name.toLowerCase(),\n )?.fileTypes ?? fileTypes;\n\n logger.info(\n `Processing file types for repository ${\n repo.name\n }: [${repositoryFileTypesFilter.join(', ')}]`,\n );\n\n // Get the files to be ingested from the repository based on the file types filter\n const files = await githubService.getRepoFiles(\n repo.name,\n repositoryFileTypesFilter,\n );\n\n if (files.length === 0) {\n logger.warn(\n `No files found for ingestion in the GitHub repository ${\n repo.name\n } (${\n repo.id\n }) with the specified file types filter: [${repositoryFileTypesFilter.join(\n ', ',\n )}]`,\n );\n continue;\n }\n\n logger.debug(`Files: ${JSON.stringify(files, null, 2)}`);\n\n const { totalDocumentsIngested } = await ingestRepositoryByFileBatch({\n repo,\n files,\n saveDocumentsBatch,\n });\n\n if (totalDocumentsIngested === 0) {\n logger.warn(\n `No documents were ingested and sent for embedding from the GitHub repository ${repo.name} (${repo.id})`,\n );\n continue;\n }\n\n logger.info(\n `Repository ingestion completed: ${totalDocumentsIngested} total documents ingested and sent for embedding for GitHub repository: ${repo.name}`,\n );\n }\n };\n\n const ingest: Ingestor['ingest'] = async ({ saveDocumentsBatch }) => {\n await ingestGitHubBatch(saveDocumentsBatch);\n };\n\n return {\n id: MODULE_ID,\n ingest,\n };\n};\n"],"names":["DEFAULT_FILE_BATCH_SIZE","createGitHubService","getProgressStats","MODULE_ID"],"mappings":";;;;;;;AAeO,MAAM,uBAAuB,OAAO;AAAA,EACzC,MAAA;AAAA,EACA;AACF,CAAA,KAGyB;AAEvB,EAAA,MAAM,gBAAA,GAAmB,CAAC,KAAA,EAAO,OAAO,CAAA;AAGxC,EAAA,MAAM,kBAAA,GAAqB,MAAA,CAAO,WAAA,CAEhC,2CAA2C,CAAA;AAE7C,EAAA,MAAM,SAAA,GACJ,MAAA,CAAO,sBAAA,CAAuB,wCAAwC,CAAA,IACtE,gBAAA;AAGF,EAAA,MAAM,cAAA,GACJ,MAAA,CAAO,iBAAA,CAAkB,6CAA6C,CAAA,IACtEA,4CAAA;AAGF,EAAA,MAAM,gBAAgB,MAAMC,0BAAA,CAAoB,EAAE,MAAA,EAAQ,QAAQ,CAAA;AAQlE,EAAA,MAAM,8BAA8B,OAAO;AAAA,IACzC,IAAA;AAAA,IACA,KAAA;AAAA,IACA;AAAA,GACF,KAIM;AACJ,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,cAAc,KAAA,CAAM,MAAM,2BAA2B,IAAA,CAAK,IAAI,mBAAmB,cAAc,CAAA;AAAA,KACjG;AAEA,IAAA,IAAI,sBAAA,GAAyB,CAAA;AAK7B,IAAA,MAAM,YAAA,GAAe,IAAA,CAAK,IAAA,CAAK,KAAA,CAAM,SAAS,cAAc,CAAA;AAG5D,IAAA,KAAA,IACM,aAAa,CAAA,EACjB,UAAA,GAAa,KAAA,CAAM,MAAA,EACnB,cAAc,cAAA,EACd;AACA,MAAA,MAAM,WAAW,IAAA,CAAK,GAAA,CAAI,UAAA,GAAa,cAAA,EAAgB,MAAM,MAAM,CAAA;AACnE,MAAA,MAAM,UAAA,GAAa,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,QAAQ,CAAA;AACnD,MAAA,MAAM,WAAA,GAAc,IAAA,CAAK,KAAA,CAAM,UAAA,GAAa,cAAc,CAAA,GAAI,CAAA;AAE9D,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,iBAAA,EAAoB,WAAW,CAAA,CAAA,EAAI,YAAY,KAAK,UAAA,CAAW,MAAM,CAAA,wBAAA,EAA2B,IAAA,CAAK,IAAI,CAAA,CAAA;AAAA,OAC3G;AAGA,MAAA,MAAM,YAAiC,EAAC;AAExC,MAAA,KAAA,IAAS,KAAA,GAAQ,CAAA,EAAG,KAAA,GAAQ,UAAA,CAAW,QAAQ,KAAA,EAAA,EAAS;AACtD,QAAA,MAAM,IAAA,GAAO,WAAW,KAAK,CAAA;AAC7B,QAAA,MAAM,cAAc,UAAA,GAAa,KAAA;AAEjC,QAAA,IAAI;AACF,UAAA,MAAM,OAAA,GAAU,MAAM,aAAA,CAAc,kBAAA;AAAA,YAClC,IAAA,CAAK,IAAA;AAAA,YACL,IAAA,CAAK;AAAA,WACP;AAEA,UAAA,MAAM,eAAA,GAAkBC,iDAAA;AAAA,YACtB,WAAA,GAAc,CAAA;AAAA,YACd,KAAA,CAAM;AAAA,WACR;AAEA,UAAA,MAAA,CAAO,IAAA;AAAA,YACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,kBAAA,EAAqB,KAAK,IAAI,CAAA,aAAA,EAAgB,eAAA,CAAgB,SAAS,CAAA,CAAA,EAAI,eAAA,CAAgB,KAAK,CAAA,EAAA,EAAK,gBAAgB,UAAU,CAAA,2BAAA;AAAA,WACjL;AAGA,UAAA,MAAM,SAAA,GAAY,CAAA,mBAAA,EAAsB,aAAA,CAAc,KAAK,CAAA,CAAA,EACzD,IAAA,CAAK,IACP,CAAA,MAAA,EAAS,IAAA,CAAK,cAAA,IAAkB,MAAM,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAGnD,UAAA,MAAM,eAAA,GAAkB,CAAA,YAAA,EAAe,IAAA,CAAK,IAAI;AAAA,qBAAA,EACnC,KAAK,IAAI;AAAA,sBAAA,EACR,SAAS;AAAA,UAAA,EAErB,KAAK,WAAA,GACD,CAAA,wBAAA,EAA2B,IAAA,CAAK,WAAW,KAC3C,EACN;AAAA;AAAA;AAAA,UAAA,EAGE,OAAO,CAAA,CAAA;AAET,UAAA,MAAM,QAAA,GAA8B;AAAA,YAClC,QAAA,EAAU;AAAA,cACR,MAAA,EAAQC,kBAAA;AAAA,cACR,IAAI,CAAA,EAAG,IAAA,CAAK,EAAE,CAAA,CAAA,EAAI,KAAK,IAAI,CAAA,CAAA;AAAA,cAC3B,GAAA,EAAK,SAAA;AAAA,cACL,OAAO,aAAA,CAAc,KAAA;AAAA,cACrB,YAAY,IAAA,CAAK,IAAA;AAAA,cACjB,UAAU,IAAA,CAAK,IAAA;AAAA,cACf,UAAU,IAAA,CAAK,IAAA,EAAM,MAAM,GAAG,CAAA,CAAE,KAAI,IAAK,EAAA;AAAA,cACzC,MAAA,EAAQ,KAAK,cAAA,IAAkB,MAAA;AAAA,cAC/B,qBAAA,EAAuB,KAAK,WAAA,IAAe;AAAA,aAC7C;AAAA,YACA,OAAA,EAAS;AAAA,WACX;AAEA,UAAA,SAAA,CAAU,KAAK,QAAQ,CAAA;AAAA,QACzB,SAAS,KAAA,EAAO;AACd,UAAA,MAAA,CAAO,IAAA;AAAA,YACL,CAAA,4CAAA,EAA+C,IAAA,CAAK,IAAI,CAAA,SAAA,EAAY,KAAK,CAAA;AAAA,WAC3E;AAEA,UAAA;AAAA,QACF;AAAA,MACF;AAGA,MAAA,MAAM,mBAAmB,SAAS,CAAA;AAElC,MAAA,sBAAA,IAA0B,SAAA,CAAU,MAAA;AAEpC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,MAAA,EAAS,WAAW,CAAA,CAAA,EAAI,YAAY,eAAe,SAAA,CAAU,MAAM,CAAA,2CAAA,EAA8C,IAAA,CAAK,IAAI,CAAA;AAAA,OAC5H;AAAA,IACF;AAEA,IAAA,OAAO,EAAE,sBAAA,EAAuB;AAAA,EAClC,CAAA;AAGA,EAAA,MAAM,iBAAA,GAAoB,OACxB,kBAAA,KACG;AACH,IAAA,MAAM,gBAAA,GAAmB,MAAM,aAAA,CAAc,QAAA,EAAS;AAEtD,IAAA,IAAI,gBAAA,CAAiB,WAAW,CAAA,EAAG;AACjC,MAAA,MAAA,CAAO,KAAK,4CAA4C,CAAA;AACxD,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,4BAAA,EAA+B,oBAC3B,GAAA,CAAI,CAAA,IAAA,KAAQ,KAAK,IAAI,CAAA,CACtB,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,KACf;AAGA,IAAA,MAAM,oBAAA,GAAuB,qBACzB,gBAAA,CAAiB,MAAA;AAAA,MAAO,UACtB,kBAAA,EAAoB,IAAA;AAAA,QAClB,kBACE,YAAA,CAAa,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAK,WAAA;AAAY;AAC9D,KACF,GACA,gBAAA;AAEJ,IAAA,IAAI,oBAAA,CAAqB,WAAW,CAAA,EAAG;AACrC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL;AAAA,OACF;AACA,MAAA;AAAA,IACF;AAEA,IAAA,MAAA,CAAO,IAAA;AAAA,MACL,CAAA,UAAA,EAAa,qBAAqB,MAAM,CAAA,yBAAA;AAAA,KAC1C;AAGA,IAAA,KAAA,MAAW,QAAQ,oBAAA,EAAsB;AACvC,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,oCAAA,EAAuC,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,OAC9D;AAGA,MAAA,MAAM,4BACJ,kBAAA,EAAoB,IAAA;AAAA,QAClB,OAAK,CAAA,CAAE,IAAA,CAAK,aAAY,KAAM,IAAA,CAAK,KAAK,WAAA;AAAY,SACnD,SAAA,IAAa,SAAA;AAElB,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,wCACE,IAAA,CAAK,IACP,MAAM,yBAAA,CAA0B,IAAA,CAAK,IAAI,CAAC,CAAA,CAAA;AAAA,OAC5C;AAGA,MAAA,MAAM,KAAA,GAAQ,MAAM,aAAA,CAAc,YAAA;AAAA,QAChC,IAAA,CAAK,IAAA;AAAA,QACL;AAAA,OACF;AAEA,MAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,yDACE,IAAA,CAAK,IACP,KACE,IAAA,CAAK,EACP,4CAA4C,yBAAA,CAA0B,IAAA;AAAA,YACpE;AAAA,WACD,CAAA,CAAA;AAAA,SACH;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,KAAA,CAAM,UAAU,IAAA,CAAK,SAAA,CAAU,OAAO,IAAA,EAAM,CAAC,CAAC,CAAA,CAAE,CAAA;AAEvD,MAAA,MAAM,EAAE,sBAAA,EAAuB,GAAI,MAAM,2BAAA,CAA4B;AAAA,QACnE,IAAA;AAAA,QACA,KAAA;AAAA,QACA;AAAA,OACD,CAAA;AAED,MAAA,IAAI,2BAA2B,CAAA,EAAG;AAChC,QAAA,MAAA,CAAO,IAAA;AAAA,UACL,CAAA,6EAAA,EAAgF,IAAA,CAAK,IAAI,CAAA,EAAA,EAAK,KAAK,EAAE,CAAA,CAAA;AAAA,SACvG;AACA,QAAA;AAAA,MACF;AAEA,MAAA,MAAA,CAAO,IAAA;AAAA,QACL,CAAA,gCAAA,EAAmC,sBAAsB,CAAA,wEAAA,EAA2E,IAAA,CAAK,IAAI,CAAA;AAAA,OAC/I;AAAA,IACF;AAAA,EACF,CAAA;AAEA,EAAA,MAAM,MAAA,GAA6B,OAAO,EAAE,kBAAA,EAAmB,KAAM;AACnE,IAAA,MAAM,kBAAkB,kBAAkB,CAAA;AAAA,EAC5C,CAAA;AAEA,EAAA,OAAO;AAAA,IACL,EAAA,EAAIA,kBAAA;AAAA,IACJ;AAAA,GACF;AACF;;;;"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sweetoburrito/backstage-plugin-ai-assistant-backend-module-ingestor-github",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "license": "Apache-2.0",
5
5
  "description": "The ingestor-github backend module for the ai-assistant plugin.",
6
6
  "main": "dist/index.cjs.js",
@@ -29,8 +29,8 @@
29
29
  },
30
30
  "dependencies": {
31
31
  "@backstage/backend-plugin-api": "^1.4.1",
32
- "@sweetoburrito/backstage-plugin-ai-assistant-common": "^0.4.0",
33
- "@sweetoburrito/backstage-plugin-ai-assistant-node": "^0.4.0",
32
+ "@sweetoburrito/backstage-plugin-ai-assistant-common": "^0.5.0",
33
+ "@sweetoburrito/backstage-plugin-ai-assistant-node": "^0.5.0",
34
34
  "octokit": "^5.0.0"
35
35
  },
36
36
  "devDependencies": {