ghcr-manager 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/dist/cli/_args.d.ts +6 -0
- package/dist/cli/_args.js +38 -0
- package/dist/cli/_logger.d.ts +9 -0
- package/dist/cli/_logger.js +24 -0
- package/dist/cli/_scan-command.d.ts +1 -0
- package/dist/cli/_scan-command.js +32 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +30 -0
- package/dist/core/_types.d.ts +49 -0
- package/dist/core/_types.js +1 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.js +1 -0
- package/dist/db/_manifest-reachability.d.ts +2 -0
- package/dist/db/_manifest-reachability.js +94 -0
- package/dist/db/_scan-writer.d.ts +18 -0
- package/dist/db/_scan-writer.js +176 -0
- package/dist/db/_schema.d.ts +2 -0
- package/dist/db/_schema.js +19 -0
- package/dist/db/_snapshot-repository.d.ts +24 -0
- package/dist/db/_snapshot-repository.js +98 -0
- package/dist/db/index.d.ts +4 -0
- package/dist/db/index.js +9 -0
- package/dist/ingest/github/_manifest-client.d.ts +8 -0
- package/dist/ingest/github/_manifest-client.js +100 -0
- package/dist/ingest/github/_manifest-ingest.d.ts +3 -0
- package/dist/ingest/github/_manifest-ingest.js +104 -0
- package/dist/ingest/github/_package-version-page-load.d.ts +13 -0
- package/dist/ingest/github/_package-version-page-load.js +52 -0
- package/dist/ingest/github/_packages-client.d.ts +10 -0
- package/dist/ingest/github/_packages-client.js +59 -0
- package/dist/ingest/github/_paginated-ingest.d.ts +11 -0
- package/dist/ingest/github/_paginated-ingest.js +28 -0
- package/dist/ingest/github/_parallel-paginated-ingest.d.ts +11 -0
- package/dist/ingest/github/_parallel-paginated-ingest.js +49 -0
- package/dist/ingest/github/_registry-token-client.d.ts +6 -0
- package/dist/ingest/github/_registry-token-client.js +67 -0
- package/dist/ingest/github/_shared.d.ts +28 -0
- package/dist/ingest/github/_shared.js +102 -0
- package/dist/ingest/github/index.d.ts +7 -0
- package/dist/ingest/github/index.js +26 -0
- package/dist/tuning/index.d.ts +6 -0
- package/dist/tuning/index.js +6 -0
- package/package.json +59 -0
- package/resources/sql/schema/001_schema.sql +109 -0
- package/resources/sql/views/001_v_latest_scan_per_package.sql +27 -0
- package/resources/sql/views/002_v_missing_digests.sql +32 -0
- package/resources/sql/views/003_v_missing_digests_related_manifests.sql +78 -0
- package/resources/sql/views/004_v_manifests_related_manifests.sql +142 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
export function initializeSchema(database) {
|
|
4
|
+
_initializeSqlDirectory(database, "schema");
|
|
5
|
+
_initializeSqlDirectory(database, "views");
|
|
6
|
+
}
|
|
7
|
+
function _initializeSqlDirectory(database, directoryName) {
|
|
8
|
+
const sqlDirectory = join(process.cwd(), "resources", "sql", directoryName);
|
|
9
|
+
if (!existsSync(sqlDirectory)) {
|
|
10
|
+
return;
|
|
11
|
+
}
|
|
12
|
+
const sqlFiles = readdirSync(sqlDirectory)
|
|
13
|
+
.filter((file) => file.endsWith(".sql"))
|
|
14
|
+
.sort();
|
|
15
|
+
for (const sqlFile of sqlFiles) {
|
|
16
|
+
const sql = readFileSync(join(sqlDirectory, sqlFile), "utf8");
|
|
17
|
+
database.exec(sql);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type Database from "better-sqlite3";
|
|
2
|
+
export declare class SnapshotRepository {
|
|
3
|
+
#private;
|
|
4
|
+
constructor(database: Database.Database);
|
|
5
|
+
getPackageMetadata(scanId: number): {
|
|
6
|
+
owner: string;
|
|
7
|
+
packageName: string;
|
|
8
|
+
scanCompletedAt: string;
|
|
9
|
+
};
|
|
10
|
+
getTaggedDigests(scanId: number): Set<string>;
|
|
11
|
+
getDigestsForTags(scanId: number, tags: string[]): Set<string>;
|
|
12
|
+
getChildDigests(scanId: number, parentDigests: Iterable<string>): string[];
|
|
13
|
+
getVersionsCreatedBefore(scanId: number, cutoffTimestamp: string): Array<{
|
|
14
|
+
versionId: number;
|
|
15
|
+
digest: string;
|
|
16
|
+
}>;
|
|
17
|
+
getTaggedVersionIds(scanId: number): number[];
|
|
18
|
+
countPackageVersions(scanId: number): number;
|
|
19
|
+
countTaggedVersions(scanId: number): number;
|
|
20
|
+
countTags(scanId: number): number;
|
|
21
|
+
countManifests(scanId: number): number;
|
|
22
|
+
countManifestEdges(scanId: number): number;
|
|
23
|
+
listPackageVersionDigests(scanId: number): string[];
|
|
24
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
export class SnapshotRepository {
|
|
2
|
+
#database;
|
|
3
|
+
constructor(database) {
|
|
4
|
+
this.#database = database;
|
|
5
|
+
}
|
|
6
|
+
getPackageMetadata(scanId) {
|
|
7
|
+
const row = this.#database
|
|
8
|
+
.prepare(`
|
|
9
|
+
SELECT owner, package_name, scan_completed_at
|
|
10
|
+
FROM package_scans
|
|
11
|
+
WHERE scan_id = ?
|
|
12
|
+
`)
|
|
13
|
+
.get(scanId);
|
|
14
|
+
if (!row) {
|
|
15
|
+
throw new Error(`database does not contain package scan for scan_id=${scanId}`);
|
|
16
|
+
}
|
|
17
|
+
if (!row.scan_completed_at) {
|
|
18
|
+
throw new Error(`scan ${scanId} has not completed`);
|
|
19
|
+
}
|
|
20
|
+
return {
|
|
21
|
+
owner: row.owner,
|
|
22
|
+
packageName: row.package_name,
|
|
23
|
+
scanCompletedAt: row.scan_completed_at
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
getTaggedDigests(scanId) {
|
|
27
|
+
return _getDigestSet(this.#database.prepare("SELECT DISTINCT digest FROM tags WHERE scan_id = ?").all(scanId), "digest");
|
|
28
|
+
}
|
|
29
|
+
getDigestsForTags(scanId, tags) {
|
|
30
|
+
if (tags.length === 0) {
|
|
31
|
+
return new Set();
|
|
32
|
+
}
|
|
33
|
+
const placeholders = tags.map(() => "?").join(", ");
|
|
34
|
+
const rows = this.#database
|
|
35
|
+
.prepare(`SELECT DISTINCT digest FROM tags WHERE scan_id = ? AND tag IN (${placeholders})`)
|
|
36
|
+
.all(scanId, ...tags);
|
|
37
|
+
return _getDigestSet(rows, "digest");
|
|
38
|
+
}
|
|
39
|
+
getChildDigests(scanId, parentDigests) {
|
|
40
|
+
const digestList = [...parentDigests];
|
|
41
|
+
if (digestList.length === 0) {
|
|
42
|
+
return [];
|
|
43
|
+
}
|
|
44
|
+
const placeholders = digestList.map(() => "?").join(", ");
|
|
45
|
+
const rows = this.#database
|
|
46
|
+
.prepare(`SELECT child_digest FROM manifest_edges WHERE scan_id = ? AND parent_digest IN (${placeholders})`)
|
|
47
|
+
.all(scanId, ...digestList);
|
|
48
|
+
return rows.map((row) => row.child_digest);
|
|
49
|
+
}
|
|
50
|
+
getVersionsCreatedBefore(scanId, cutoffTimestamp) {
|
|
51
|
+
const rows = this.#database
|
|
52
|
+
.prepare(`
|
|
53
|
+
SELECT version_id, digest
|
|
54
|
+
FROM package_versions
|
|
55
|
+
WHERE scan_id = ? AND created_at < ?
|
|
56
|
+
ORDER BY version_id
|
|
57
|
+
`)
|
|
58
|
+
.all(scanId, cutoffTimestamp);
|
|
59
|
+
return rows.map((row) => ({
|
|
60
|
+
versionId: row.version_id,
|
|
61
|
+
digest: row.digest
|
|
62
|
+
}));
|
|
63
|
+
}
|
|
64
|
+
getTaggedVersionIds(scanId) {
|
|
65
|
+
const rows = this.#database
|
|
66
|
+
.prepare("SELECT DISTINCT version_id FROM tags WHERE scan_id = ? ORDER BY version_id")
|
|
67
|
+
.all(scanId);
|
|
68
|
+
return rows.map((row) => row.version_id);
|
|
69
|
+
}
|
|
70
|
+
countPackageVersions(scanId) {
|
|
71
|
+
return _count(this.#database, "SELECT COUNT(*) AS total FROM package_versions WHERE scan_id = ?", "total", scanId);
|
|
72
|
+
}
|
|
73
|
+
countTaggedVersions(scanId) {
|
|
74
|
+
return _count(this.#database, "SELECT COUNT(DISTINCT version_id) AS total FROM tags WHERE scan_id = ?", "total", scanId);
|
|
75
|
+
}
|
|
76
|
+
countTags(scanId) {
|
|
77
|
+
return _count(this.#database, "SELECT COUNT(*) AS total FROM tags WHERE scan_id = ?", "total", scanId);
|
|
78
|
+
}
|
|
79
|
+
countManifests(scanId) {
|
|
80
|
+
return _count(this.#database, "SELECT COUNT(*) AS total FROM manifests WHERE scan_id = ?", "total", scanId);
|
|
81
|
+
}
|
|
82
|
+
countManifestEdges(scanId) {
|
|
83
|
+
return _count(this.#database, "SELECT COUNT(*) AS total FROM manifest_edges WHERE scan_id = ?", "total", scanId);
|
|
84
|
+
}
|
|
85
|
+
listPackageVersionDigests(scanId) {
|
|
86
|
+
const rows = this.#database
|
|
87
|
+
.prepare("SELECT digest FROM package_versions WHERE scan_id = ? ORDER BY version_id")
|
|
88
|
+
.all(scanId);
|
|
89
|
+
return rows.map((row) => row.digest);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
function _getDigestSet(rows, key) {
|
|
93
|
+
return new Set(rows.map((row) => row[key]));
|
|
94
|
+
}
|
|
95
|
+
function _count(database, sql, field, ...params) {
|
|
96
|
+
const row = database.prepare(sql).get(...params);
|
|
97
|
+
return row[field];
|
|
98
|
+
}
|
package/dist/db/index.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import Database from "better-sqlite3";
|
|
2
|
+
import { initializeSchema } from "./_schema.js";
|
|
3
|
+
export { ScanWriter } from "./_scan-writer.js";
|
|
4
|
+
export { SnapshotRepository } from "./_snapshot-repository.js";
|
|
5
|
+
export function openDatabase(databasePath) {
|
|
6
|
+
const database = new Database(databasePath);
|
|
7
|
+
initializeSchema(database);
|
|
8
|
+
return database;
|
|
9
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { ManifestDescriptorRecord, ManifestEdgeRecord, ManifestRecord } from "../../core/index.js";
|
|
2
|
+
import { type FetchLike, type GitHubScanOptions } from "./_shared.js";
|
|
3
|
+
export declare function loadManifestGraph(fetchImpl: FetchLike, registryBaseUrl: string, digest: string, registryToken: string, options: GitHubScanOptions): Promise<{
|
|
4
|
+
record: ManifestRecord;
|
|
5
|
+
descriptorRecords: ManifestDescriptorRecord[];
|
|
6
|
+
edgeRecords: ManifestEdgeRecord[];
|
|
7
|
+
rawJson: string;
|
|
8
|
+
}>;
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import { acceptedManifestMediaTypes, buildFetchTransportErrorMessage, buildHttpErrorMessage, withFetchRetry } from "./_shared.js";
|
|
2
|
+
export async function loadManifestGraph(fetchImpl, registryBaseUrl, digest, registryToken, options) {
|
|
3
|
+
const startTime = Date.now();
|
|
4
|
+
const url = new URL(`/v2/${options.owner}/${options.packageName}/manifests/${digest}`, registryBaseUrl);
|
|
5
|
+
let response;
|
|
6
|
+
try {
|
|
7
|
+
response = await withFetchRetry(async () => {
|
|
8
|
+
const manifestResponse = await fetchImpl(url.toString(), {
|
|
9
|
+
headers: {
|
|
10
|
+
Accept: acceptedManifestMediaTypes,
|
|
11
|
+
Authorization: `Bearer ${registryToken}`,
|
|
12
|
+
"User-Agent": "ghcr-manager"
|
|
13
|
+
}
|
|
14
|
+
});
|
|
15
|
+
if (!manifestResponse.ok && _shouldRetryStatus(manifestResponse.status)) {
|
|
16
|
+
throw new Error(await buildHttpErrorMessage(manifestResponse, `GHCR manifest request for ${digest} failed`));
|
|
17
|
+
}
|
|
18
|
+
return manifestResponse;
|
|
19
|
+
}, {
|
|
20
|
+
logger: options.logger,
|
|
21
|
+
label: `GHCR manifest request for ${digest}`,
|
|
22
|
+
shouldRetry: (error) => _shouldRetryError(error)
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
catch (error) {
|
|
26
|
+
throw new Error(buildFetchTransportErrorMessage(error, `GHCR manifest request for ${digest} failed`), {
|
|
27
|
+
cause: error
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
if (!response.ok) {
|
|
31
|
+
throw new Error(await buildHttpErrorMessage(response, `GHCR manifest request for ${digest} failed`));
|
|
32
|
+
}
|
|
33
|
+
const mediaTypeHeader = response.headers.get("content-type")?.split(";")[0];
|
|
34
|
+
const document = (await response.json());
|
|
35
|
+
const rawJson = JSON.stringify(document);
|
|
36
|
+
const mediaType = document.mediaType ?? mediaTypeHeader;
|
|
37
|
+
if (!mediaType) {
|
|
38
|
+
throw new Error(`manifest response for ${digest} did not include a media type`);
|
|
39
|
+
}
|
|
40
|
+
options.logger.debug(`Loaded GHCR manifest ${digest} in ${Date.now() - startTime}ms (${mediaType})`);
|
|
41
|
+
return {
|
|
42
|
+
rawJson,
|
|
43
|
+
record: {
|
|
44
|
+
digest,
|
|
45
|
+
mediaType,
|
|
46
|
+
artifactType: document.artifactType,
|
|
47
|
+
configMediaType: document.config?.mediaType,
|
|
48
|
+
subjectDigest: document.subject?.digest,
|
|
49
|
+
annotations: document.annotations
|
|
50
|
+
},
|
|
51
|
+
descriptorRecords: _buildDescriptorRecords(digest, document),
|
|
52
|
+
edgeRecords: _buildEdges(digest, document)
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
function _buildDescriptorRecords(parentDigest, document) {
|
|
56
|
+
const records = [];
|
|
57
|
+
for (const child of document.manifests ?? []) {
|
|
58
|
+
if (!child.digest || !child.mediaType) {
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
records.push({
|
|
62
|
+
parentDigest,
|
|
63
|
+
childDigest: child.digest,
|
|
64
|
+
mediaType: child.mediaType,
|
|
65
|
+
artifactType: child.artifactType,
|
|
66
|
+
platform: child.platform
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
return records;
|
|
70
|
+
}
|
|
71
|
+
function _buildEdges(parentDigest, document) {
|
|
72
|
+
const edges = [];
|
|
73
|
+
for (const child of document.manifests ?? []) {
|
|
74
|
+
if (!child.digest || !child.mediaType) {
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
edges.push({
|
|
78
|
+
parentDigest,
|
|
79
|
+
childDigest: child.digest,
|
|
80
|
+
edgeKind: "image-child"
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
if (document.subject?.digest) {
|
|
84
|
+
edges.push({
|
|
85
|
+
parentDigest: document.subject.digest,
|
|
86
|
+
childDigest: parentDigest,
|
|
87
|
+
edgeKind: "referrer"
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
return edges;
|
|
91
|
+
}
|
|
92
|
+
function _shouldRetryStatus(status) {
|
|
93
|
+
return status === 429 || status === 502 || status === 503 || status === 504;
|
|
94
|
+
}
|
|
95
|
+
function _shouldRetryError(error) {
|
|
96
|
+
if (!(error instanceof Error)) {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
return /fetch failed|status 429|status 502|status 503|status 504/.test(error.message);
|
|
100
|
+
}
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { ScanWriter, SnapshotRepository } from "../../db/index.js";
|
|
2
|
+
import { type FetchLike, type GitHubScanOptions } from "./_shared.js";
|
|
3
|
+
export declare function ingestManifests(fetchImpl: FetchLike, registryBaseUrl: string, options: GitHubScanOptions, writer: ScanWriter, repository: SnapshotRepository, scanId: number): Promise<void>;
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { manifestFetchConcurrency, manifestIngestProgressStepRatio } from "../../tuning/index.js";
|
|
2
|
+
import { loadManifestGraph } from "./_manifest-client.js";
|
|
3
|
+
import { loadRegistryPullToken } from "./_registry-token-client.js";
|
|
4
|
+
export async function ingestManifests(fetchImpl, registryBaseUrl, options, writer, repository, scanId) {
|
|
5
|
+
const pendingDigests = repository.listPackageVersionDigests(scanId);
|
|
6
|
+
const initialDigestCount = pendingDigests.length;
|
|
7
|
+
const progressStep = Math.max(1, Math.ceil(initialDigestCount * manifestIngestProgressStepRatio));
|
|
8
|
+
const queuedDigests = new Set(pendingDigests);
|
|
9
|
+
const fetchedDigests = new Set();
|
|
10
|
+
const persistedDigests = new Set();
|
|
11
|
+
const registryPullTokenState = {};
|
|
12
|
+
options.logger.info(`Fetching manifests for ${pendingDigests.length} package versions`);
|
|
13
|
+
let completed = 0;
|
|
14
|
+
const edgeRecords = [];
|
|
15
|
+
const activeLoads = new Set();
|
|
16
|
+
while (pendingDigests.length > 0 || activeLoads.size > 0) {
|
|
17
|
+
while (pendingDigests.length > 0 && activeLoads.size < manifestFetchConcurrency) {
|
|
18
|
+
const digest = pendingDigests.shift();
|
|
19
|
+
if (!digest || fetchedDigests.has(digest)) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
const load = _loadQueuedManifest(digest, fetchImpl, registryBaseUrl, options, writer, pendingDigests, queuedDigests, fetchedDigests, persistedDigests, edgeRecords, completed, async () => (await _getRegistryPullToken(fetchImpl, registryBaseUrl, options, registryPullTokenState)).token, () => {
|
|
23
|
+
completed += 1;
|
|
24
|
+
if (completed % progressStep === 0 || pendingDigests.length === 0) {
|
|
25
|
+
options.logger.info(`Fetched manifests ${completed}/${queuedDigests.size}`);
|
|
26
|
+
}
|
|
27
|
+
}).finally(() => {
|
|
28
|
+
activeLoads.delete(load);
|
|
29
|
+
});
|
|
30
|
+
activeLoads.add(load);
|
|
31
|
+
}
|
|
32
|
+
if (activeLoads.size > 0) {
|
|
33
|
+
await Promise.race(activeLoads);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
options.logger.info(`Starting manifest graph processing for ${edgeRecords.length} edges`);
|
|
37
|
+
let persistedEdgeCount = 0;
|
|
38
|
+
for (const edge of edgeRecords) {
|
|
39
|
+
if (!persistedDigests.has(edge.parentDigest) || !persistedDigests.has(edge.childDigest)) {
|
|
40
|
+
continue;
|
|
41
|
+
}
|
|
42
|
+
writer.insertManifestEdge(edge);
|
|
43
|
+
persistedEdgeCount += 1;
|
|
44
|
+
}
|
|
45
|
+
options.logger.info(`Inserted ${persistedEdgeCount} manifest edges; rebuilding reachability`);
|
|
46
|
+
writer.rebuildManifestReachability();
|
|
47
|
+
options.logger.info("Completed manifest graph processing");
|
|
48
|
+
}
|
|
49
|
+
async function _loadQueuedManifest(digest, fetchImpl, registryBaseUrl, options, writer, pendingDigests, queuedDigests, fetchedDigests, persistedDigests, edgeRecords, completed, getRegistryToken, onComplete) {
|
|
50
|
+
options.logger.debug(`Fetching manifest ${completed + 1}/${queuedDigests.size}: ${digest}`);
|
|
51
|
+
let manifest;
|
|
52
|
+
try {
|
|
53
|
+
manifest = await loadManifestGraph(fetchImpl, registryBaseUrl, digest, await getRegistryToken(), options);
|
|
54
|
+
}
|
|
55
|
+
catch (error) {
|
|
56
|
+
if (_isMissingManifestError(error)) {
|
|
57
|
+
options.logger.warn(`Skipping missing GHCR manifest ${digest}`);
|
|
58
|
+
fetchedDigests.add(digest);
|
|
59
|
+
onComplete();
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
writer.insertManifest(manifest.record);
|
|
65
|
+
persistedDigests.add(manifest.record.digest);
|
|
66
|
+
writer.insertManifestPayload(manifest.record.digest, manifest.rawJson);
|
|
67
|
+
for (const descriptor of manifest.descriptorRecords) {
|
|
68
|
+
writer.insertManifestDescriptor(descriptor);
|
|
69
|
+
_enqueueDigest(descriptor.childDigest, pendingDigests, queuedDigests, fetchedDigests);
|
|
70
|
+
}
|
|
71
|
+
edgeRecords.push(...manifest.edgeRecords);
|
|
72
|
+
for (const edge of manifest.edgeRecords) {
|
|
73
|
+
_enqueueDigest(edge.parentDigest, pendingDigests, queuedDigests, fetchedDigests);
|
|
74
|
+
_enqueueDigest(edge.childDigest, pendingDigests, queuedDigests, fetchedDigests);
|
|
75
|
+
}
|
|
76
|
+
fetchedDigests.add(digest);
|
|
77
|
+
onComplete();
|
|
78
|
+
}
|
|
79
|
+
function _isMissingManifestError(error) {
|
|
80
|
+
if (!(error instanceof Error)) {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
return /GHCR manifest request .* failed - status 404/.test(error.message);
|
|
84
|
+
}
|
|
85
|
+
async function _getRegistryPullToken(fetchImpl, registryBaseUrl, options, registryPullTokenState) {
|
|
86
|
+
if (registryPullTokenState.token && Date.now() < registryPullTokenState.token.expiresAt - 5000) {
|
|
87
|
+
return registryPullTokenState.token;
|
|
88
|
+
}
|
|
89
|
+
if (!registryPullTokenState.load) {
|
|
90
|
+
registryPullTokenState.load = loadRegistryPullToken(fetchImpl, registryBaseUrl, options).finally(() => {
|
|
91
|
+
registryPullTokenState.load = undefined;
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
const registryPullToken = await registryPullTokenState.load;
|
|
95
|
+
registryPullTokenState.token = registryPullToken;
|
|
96
|
+
return registryPullToken;
|
|
97
|
+
}
|
|
98
|
+
function _enqueueDigest(digest, pendingDigests, queuedDigests, fetchedDigests) {
|
|
99
|
+
if (queuedDigests.has(digest) || fetchedDigests.has(digest)) {
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
pendingDigests.push(digest);
|
|
103
|
+
queuedDigests.add(digest);
|
|
104
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { type FetchLike, type GitHubScanOptions } from "./_shared.js";
|
|
2
|
+
export interface GitHubPackageVersionPageItem {
|
|
3
|
+
id: number;
|
|
4
|
+
name: string;
|
|
5
|
+
created_at: string;
|
|
6
|
+
updated_at: string;
|
|
7
|
+
metadata?: {
|
|
8
|
+
container?: {
|
|
9
|
+
tags?: string[];
|
|
10
|
+
};
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
export declare function loadPackageVersionPage(fetchImpl: FetchLike, githubApiBaseUrl: string, options: GitHubScanOptions, page: number): Promise<GitHubPackageVersionPageItem[]>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { buildFetchTransportErrorMessage, buildHttpErrorMessage, withFetchRetry } from "./_shared.js";
|
|
2
|
+
export async function loadPackageVersionPage(fetchImpl, githubApiBaseUrl, options, page) {
|
|
3
|
+
const startTime = Date.now();
|
|
4
|
+
const url = buildPackageVersionPageUrl(githubApiBaseUrl, options, page);
|
|
5
|
+
let response;
|
|
6
|
+
try {
|
|
7
|
+
response = await withFetchRetry(async () => {
|
|
8
|
+
const pageResponse = await fetchImpl(url, {
|
|
9
|
+
headers: {
|
|
10
|
+
Accept: "application/vnd.github+json",
|
|
11
|
+
Authorization: `Bearer ${options.token}`,
|
|
12
|
+
"User-Agent": "ghcr-manager",
|
|
13
|
+
"X-GitHub-Api-Version": "2022-11-28"
|
|
14
|
+
}
|
|
15
|
+
});
|
|
16
|
+
if (!pageResponse.ok && _shouldRetryStatus(pageResponse.status)) {
|
|
17
|
+
throw new Error(await buildHttpErrorMessage(pageResponse, `GitHub Packages request for page ${page} failed`));
|
|
18
|
+
}
|
|
19
|
+
return pageResponse;
|
|
20
|
+
}, {
|
|
21
|
+
logger: options.logger,
|
|
22
|
+
label: `GitHub Packages request for page ${page}`,
|
|
23
|
+
shouldRetry: (error) => _shouldRetryError(error)
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
catch (error) {
|
|
27
|
+
throw new Error(buildFetchTransportErrorMessage(error, `GitHub Packages request for page ${page} failed`), {
|
|
28
|
+
cause: error
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
if (!response.ok) {
|
|
32
|
+
throw new Error(await buildHttpErrorMessage(response, "GitHub Packages request failed"));
|
|
33
|
+
}
|
|
34
|
+
const pageItems = (await response.json());
|
|
35
|
+
options.logger.debug(`Loaded GitHub package-version page ${page} in ${Date.now() - startTime}ms (${pageItems.length} items)`);
|
|
36
|
+
return pageItems;
|
|
37
|
+
}
|
|
38
|
+
function buildPackageVersionPageUrl(githubApiBaseUrl, options, page) {
|
|
39
|
+
const url = new URL(`/orgs/${encodeURIComponent(options.owner)}/packages/container/${encodeURIComponent(options.packageName)}/versions`, githubApiBaseUrl);
|
|
40
|
+
url.searchParams.set("per_page", "100");
|
|
41
|
+
url.searchParams.set("page", String(page));
|
|
42
|
+
return url.toString();
|
|
43
|
+
}
|
|
44
|
+
function _shouldRetryStatus(status) {
|
|
45
|
+
return status === 429 || status === 502 || status === 503 || status === 504;
|
|
46
|
+
}
|
|
47
|
+
function _shouldRetryError(error) {
|
|
48
|
+
if (!(error instanceof Error)) {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
return /fetch failed|status 429|status 502|status 503|status 504/.test(error.message);
|
|
52
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { PackageVersionRecord, TagRecord } from "../../core/index.js";
|
|
2
|
+
import type { ScanWriter } from "../../db/index.js";
|
|
3
|
+
import { type GitHubPackageVersionPageItem } from "./_package-version-page-load.js";
|
|
4
|
+
import { type FetchLike, type GitHubScanOptions } from "./_shared.js";
|
|
5
|
+
export declare function ingestPackageVersions(fetchImpl: FetchLike, githubApiBaseUrl: string, options: GitHubScanOptions, writer: ScanWriter): Promise<{
|
|
6
|
+
packageVersions: number;
|
|
7
|
+
tags: number;
|
|
8
|
+
}>;
|
|
9
|
+
export declare function buildTags(packageVersions: PackageVersionRecord[]): TagRecord[];
|
|
10
|
+
export declare function normalizePackageVersions(packageVersions: GitHubPackageVersionPageItem[]): PackageVersionRecord[];
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { loadPackageVersionPage } from "./_package-version-page-load.js";
|
|
2
|
+
import { ingestParallelPaginated } from "./_parallel-paginated-ingest.js";
|
|
3
|
+
export async function ingestPackageVersions(fetchImpl, githubApiBaseUrl, options, writer) {
|
|
4
|
+
let tagCount = 0;
|
|
5
|
+
const result = await ingestParallelPaginated({
|
|
6
|
+
logger: options.logger,
|
|
7
|
+
loadPage(page) {
|
|
8
|
+
return loadPackageVersionPage(fetchImpl, githubApiBaseUrl, options, page);
|
|
9
|
+
},
|
|
10
|
+
writePage(pageItems) {
|
|
11
|
+
_writePage(writer, pageItems);
|
|
12
|
+
tagCount += _countTags(pageItems);
|
|
13
|
+
}
|
|
14
|
+
});
|
|
15
|
+
return { packageVersions: result.items, tags: tagCount };
|
|
16
|
+
}
|
|
17
|
+
export function buildTags(packageVersions) {
|
|
18
|
+
const tags = [];
|
|
19
|
+
for (const version of packageVersions) {
|
|
20
|
+
const metadata = version.metadata?.container;
|
|
21
|
+
const tagNames = Array.isArray(metadata?.tags)
|
|
22
|
+
? metadata.tags.filter((tag) => typeof tag === "string")
|
|
23
|
+
: [];
|
|
24
|
+
for (const tagName of tagNames) {
|
|
25
|
+
tags.push({
|
|
26
|
+
tag: tagName,
|
|
27
|
+
digest: version.digest,
|
|
28
|
+
versionId: version.versionId
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return tags.sort((left, right) => left.tag.localeCompare(right.tag));
|
|
33
|
+
}
|
|
34
|
+
export function normalizePackageVersions(packageVersions) {
|
|
35
|
+
return packageVersions
|
|
36
|
+
.map((version) => ({
|
|
37
|
+
versionId: version.id,
|
|
38
|
+
digest: version.name,
|
|
39
|
+
createdAt: version.created_at,
|
|
40
|
+
updatedAt: version.updated_at,
|
|
41
|
+
metadata: version.metadata
|
|
42
|
+
}))
|
|
43
|
+
.sort((left, right) => left.versionId - right.versionId);
|
|
44
|
+
}
|
|
45
|
+
function _writePage(writer, pageItems) {
|
|
46
|
+
const versions = normalizePackageVersions(pageItems);
|
|
47
|
+
const rawItemsByVersionId = new Map(pageItems.map((pageItem) => [pageItem.id, pageItem]));
|
|
48
|
+
for (const version of versions) {
|
|
49
|
+
writer.insertPackageVersion(version);
|
|
50
|
+
writer.insertPackageVersionPayload(version.versionId, JSON.stringify(rawItemsByVersionId.get(version.versionId)));
|
|
51
|
+
}
|
|
52
|
+
const tags = buildTags(versions);
|
|
53
|
+
for (const tag of tags) {
|
|
54
|
+
writer.insertTag(tag);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function _countTags(pageItems) {
|
|
58
|
+
return buildTags(normalizePackageVersions(pageItems)).length;
|
|
59
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { GitHubScanLogger } from "./_shared.js";
|
|
2
|
+
export interface PaginatedIngestOptions<T> {
|
|
3
|
+
loadPage(page: number): Promise<T[]>;
|
|
4
|
+
writePage(pageItems: T[], page: number): Promise<void> | void;
|
|
5
|
+
logger: GitHubScanLogger;
|
|
6
|
+
}
|
|
7
|
+
export interface PaginatedIngestResult {
|
|
8
|
+
pages: number;
|
|
9
|
+
items: number;
|
|
10
|
+
}
|
|
11
|
+
export declare function ingestPaginated<T>(options: PaginatedIngestOptions<T>): Promise<PaginatedIngestResult>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { paginatedIngestProgressIntervalPages } from "../../tuning/index.js";
|
|
2
|
+
const _DEFAULT_PAGE_SIZE = 100;
|
|
3
|
+
const _PROGRESS_LABEL = "GitHub package-version pages";
|
|
4
|
+
export async function ingestPaginated(options) {
|
|
5
|
+
let pages = 0;
|
|
6
|
+
let items = 0;
|
|
7
|
+
let lastLoggedPage = 0;
|
|
8
|
+
for (let page = 1;; page += 1) {
|
|
9
|
+
const pageItems = await options.loadPage(page);
|
|
10
|
+
if (pageItems.length === 0) {
|
|
11
|
+
break;
|
|
12
|
+
}
|
|
13
|
+
await options.writePage(pageItems, page);
|
|
14
|
+
pages = page;
|
|
15
|
+
items += pageItems.length;
|
|
16
|
+
if (page === 1 || page % paginatedIngestProgressIntervalPages === 0 || pageItems.length < _DEFAULT_PAGE_SIZE) {
|
|
17
|
+
options.logger.info(`Loaded ${_PROGRESS_LABEL} ${page} (${items} items total)`);
|
|
18
|
+
lastLoggedPage = page;
|
|
19
|
+
}
|
|
20
|
+
if (pageItems.length < _DEFAULT_PAGE_SIZE) {
|
|
21
|
+
break;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
if (pages > 0 && lastLoggedPage !== pages) {
|
|
25
|
+
options.logger.info(`Loaded ${_PROGRESS_LABEL} ${pages} (${items} items total)`);
|
|
26
|
+
}
|
|
27
|
+
return { pages, items };
|
|
28
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { GitHubScanLogger } from "./_shared.js";
|
|
2
|
+
export interface ParallelPaginatedIngestOptions<T> {
|
|
3
|
+
loadPage(page: number): Promise<T[]>;
|
|
4
|
+
writePage(pageItems: T[], page: number): Promise<void> | void;
|
|
5
|
+
logger: GitHubScanLogger;
|
|
6
|
+
}
|
|
7
|
+
export interface ParallelPaginatedIngestResult {
|
|
8
|
+
pages: number;
|
|
9
|
+
items: number;
|
|
10
|
+
}
|
|
11
|
+
export declare function ingestParallelPaginated<T>(options: ParallelPaginatedIngestOptions<T>): Promise<ParallelPaginatedIngestResult>;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { packageVersionPageFetchConcurrency, paginatedIngestProgressIntervalPages } from "../../tuning/index.js";
|
|
2
|
+
const _DEFAULT_PAGE_SIZE = 100;
|
|
3
|
+
const _PROGRESS_LABEL = "GitHub package-version pages";
|
|
4
|
+
export async function ingestParallelPaginated(options) {
|
|
5
|
+
const firstPageItems = await options.loadPage(1);
|
|
6
|
+
let pages = 0;
|
|
7
|
+
let items = 0;
|
|
8
|
+
let lastLoggedPage = 0;
|
|
9
|
+
if (firstPageItems.length === 0) {
|
|
10
|
+
return { pages: 0, items: 0 };
|
|
11
|
+
}
|
|
12
|
+
await options.writePage(firstPageItems, 1);
|
|
13
|
+
pages = 1;
|
|
14
|
+
items = firstPageItems.length;
|
|
15
|
+
options.logger.info(`Loaded ${_PROGRESS_LABEL} 1 (${items} items total)`);
|
|
16
|
+
lastLoggedPage = 1;
|
|
17
|
+
if (firstPageItems.length < _DEFAULT_PAGE_SIZE) {
|
|
18
|
+
return { pages, items };
|
|
19
|
+
}
|
|
20
|
+
let nextPage = 2;
|
|
21
|
+
let stopPageExclusive = Number.POSITIVE_INFINITY;
|
|
22
|
+
const workers = Array.from({ length: packageVersionPageFetchConcurrency }, async () => {
|
|
23
|
+
while (nextPage < stopPageExclusive) {
|
|
24
|
+
const page = nextPage;
|
|
25
|
+
nextPage += 1;
|
|
26
|
+
const pageItems = await options.loadPage(page);
|
|
27
|
+
if (pageItems.length === 0) {
|
|
28
|
+
stopPageExclusive = Math.min(stopPageExclusive, page);
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
await options.writePage(pageItems, page);
|
|
32
|
+
pages = Math.max(pages, page);
|
|
33
|
+
items += pageItems.length;
|
|
34
|
+
if (page % paginatedIngestProgressIntervalPages === 0 || pageItems.length < _DEFAULT_PAGE_SIZE) {
|
|
35
|
+
options.logger.info(`Loaded ${_PROGRESS_LABEL} ${page} (${items} items total)`);
|
|
36
|
+
lastLoggedPage = Math.max(lastLoggedPage, page);
|
|
37
|
+
}
|
|
38
|
+
if (pageItems.length < _DEFAULT_PAGE_SIZE) {
|
|
39
|
+
stopPageExclusive = Math.min(stopPageExclusive, page + 1);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
await Promise.all(workers);
|
|
45
|
+
if (pages > 0 && lastLoggedPage !== pages) {
|
|
46
|
+
options.logger.info(`Loaded ${_PROGRESS_LABEL} ${pages} (${items} items total)`);
|
|
47
|
+
}
|
|
48
|
+
return { pages, items };
|
|
49
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { type FetchLike, type GitHubScanOptions } from "./_shared.js";
|
|
2
|
+
export interface RegistryPullToken {
|
|
3
|
+
token: string;
|
|
4
|
+
expiresAt: number;
|
|
5
|
+
}
|
|
6
|
+
export declare function loadRegistryPullToken(fetchImpl: FetchLike, registryBaseUrl: string, options: GitHubScanOptions): Promise<RegistryPullToken>;
|