hoolix 0.0.1-beta.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +337 -0
- package/STABILITY.md +109 -0
- package/bin/hoolix.js +50 -0
- package/bin/mcp-portal.js +12 -0
- package/bin/postinstall.js +61 -0
- package/dist/app/contracts.d.ts +105 -0
- package/dist/app/contracts.d.ts.map +1 -0
- package/dist/app/contracts.js +2 -0
- package/dist/app/contracts.js.map +1 -0
- package/dist/app/events.d.ts +13 -0
- package/dist/app/events.d.ts.map +1 -0
- package/dist/app/events.js +13 -0
- package/dist/app/events.js.map +1 -0
- package/dist/app/services/analytics.d.ts +42 -0
- package/dist/app/services/analytics.d.ts.map +1 -0
- package/dist/app/services/analytics.js +106 -0
- package/dist/app/services/analytics.js.map +1 -0
- package/dist/app/services/catalog.d.ts +14 -0
- package/dist/app/services/catalog.d.ts.map +1 -0
- package/dist/app/services/catalog.js +26 -0
- package/dist/app/services/catalog.js.map +1 -0
- package/dist/app/services/credentials.d.ts +42 -0
- package/dist/app/services/credentials.d.ts.map +1 -0
- package/dist/app/services/credentials.js +143 -0
- package/dist/app/services/credentials.js.map +1 -0
- package/dist/app/services/servers.d.ts +15 -0
- package/dist/app/services/servers.d.ts.map +1 -0
- package/dist/app/services/servers.js +445 -0
- package/dist/app/services/servers.js.map +1 -0
- package/dist/catalog/community.d.ts +19 -0
- package/dist/catalog/community.d.ts.map +1 -0
- package/dist/catalog/community.js +53 -0
- package/dist/catalog/community.js.map +1 -0
- package/dist/catalog/templates.d.ts +436 -0
- package/dist/catalog/templates.d.ts.map +1 -0
- package/dist/catalog/templates.js +489 -0
- package/dist/catalog/templates.js.map +1 -0
- package/dist/commands/audit.d.ts +2 -0
- package/dist/commands/audit.d.ts.map +1 -0
- package/dist/commands/audit.js +122 -0
- package/dist/commands/audit.js.map +1 -0
- package/dist/commands/bundle.d.ts +11 -0
- package/dist/commands/bundle.d.ts.map +1 -0
- package/dist/commands/bundle.js +299 -0
- package/dist/commands/bundle.js.map +1 -0
- package/dist/commands/clients.d.ts +6 -0
- package/dist/commands/clients.d.ts.map +1 -0
- package/dist/commands/clients.js +242 -0
- package/dist/commands/clients.js.map +1 -0
- package/dist/commands/completion.d.ts +18 -0
- package/dist/commands/completion.d.ts.map +1 -0
- package/dist/commands/completion.js +495 -0
- package/dist/commands/completion.js.map +1 -0
- package/dist/commands/connect.d.ts +10 -0
- package/dist/commands/connect.d.ts.map +1 -0
- package/dist/commands/connect.js +463 -0
- package/dist/commands/connect.js.map +1 -0
- package/dist/commands/create.d.ts +2 -0
- package/dist/commands/create.d.ts.map +1 -0
- package/dist/commands/create.js +607 -0
- package/dist/commands/create.js.map +1 -0
- package/dist/commands/delete.d.ts +2 -0
- package/dist/commands/delete.d.ts.map +1 -0
- package/dist/commands/delete.js +44 -0
- package/dist/commands/delete.js.map +1 -0
- package/dist/commands/doctor.d.ts +2 -0
- package/dist/commands/doctor.d.ts.map +1 -0
- package/dist/commands/doctor.js +259 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/export.d.ts +2 -0
- package/dist/commands/export.d.ts.map +1 -0
- package/dist/commands/export.js +93 -0
- package/dist/commands/export.js.map +1 -0
- package/dist/commands/gui.d.ts +2 -0
- package/dist/commands/gui.d.ts.map +1 -0
- package/dist/commands/gui.js +19 -0
- package/dist/commands/gui.js.map +1 -0
- package/dist/commands/import.d.ts +2 -0
- package/dist/commands/import.d.ts.map +1 -0
- package/dist/commands/import.js +102 -0
- package/dist/commands/import.js.map +1 -0
- package/dist/commands/info.d.ts +2 -0
- package/dist/commands/info.d.ts.map +1 -0
- package/dist/commands/info.js +151 -0
- package/dist/commands/info.js.map +1 -0
- package/dist/commands/list.d.ts +2 -0
- package/dist/commands/list.d.ts.map +1 -0
- package/dist/commands/list.js +90 -0
- package/dist/commands/list.js.map +1 -0
- package/dist/commands/reindex.d.ts +2 -0
- package/dist/commands/reindex.d.ts.map +1 -0
- package/dist/commands/reindex.js +186 -0
- package/dist/commands/reindex.js.map +1 -0
- package/dist/commands/rotate.d.ts +2 -0
- package/dist/commands/rotate.d.ts.map +1 -0
- package/dist/commands/rotate.js +67 -0
- package/dist/commands/rotate.js.map +1 -0
- package/dist/commands/secrets.d.ts +10 -0
- package/dist/commands/secrets.d.ts.map +1 -0
- package/dist/commands/secrets.js +293 -0
- package/dist/commands/secrets.js.map +1 -0
- package/dist/commands/start.d.ts +2 -0
- package/dist/commands/start.d.ts.map +1 -0
- package/dist/commands/start.js +234 -0
- package/dist/commands/start.js.map +1 -0
- package/dist/commands/stats.d.ts +2 -0
- package/dist/commands/stats.d.ts.map +1 -0
- package/dist/commands/stats.js +220 -0
- package/dist/commands/stats.js.map +1 -0
- package/dist/commands/stop.d.ts +2 -0
- package/dist/commands/stop.d.ts.map +1 -0
- package/dist/commands/stop.js +24 -0
- package/dist/commands/stop.js.map +1 -0
- package/dist/commands/templates.d.ts +2 -0
- package/dist/commands/templates.d.ts.map +1 -0
- package/dist/commands/templates.js +168 -0
- package/dist/commands/templates.js.map +1 -0
- package/dist/commands/trial.d.ts +2 -0
- package/dist/commands/trial.d.ts.map +1 -0
- package/dist/commands/trial.js +61 -0
- package/dist/commands/trial.js.map +1 -0
- package/dist/commands/uninstall.d.ts +2 -0
- package/dist/commands/uninstall.d.ts.map +1 -0
- package/dist/commands/uninstall.js +114 -0
- package/dist/commands/uninstall.js.map +1 -0
- package/dist/commands/update.d.ts +2 -0
- package/dist/commands/update.d.ts.map +1 -0
- package/dist/commands/update.js +104 -0
- package/dist/commands/update.js.map +1 -0
- package/dist/commands/verify.d.ts +2 -0
- package/dist/commands/verify.d.ts.map +1 -0
- package/dist/commands/verify.js +301 -0
- package/dist/commands/verify.js.map +1 -0
- package/dist/core/config.d.ts +25 -0
- package/dist/core/config.d.ts.map +1 -0
- package/dist/core/config.js +54 -0
- package/dist/core/config.js.map +1 -0
- package/dist/core/errors.d.ts +25 -0
- package/dist/core/errors.d.ts.map +1 -0
- package/dist/core/errors.js +53 -0
- package/dist/core/errors.js.map +1 -0
- package/dist/core/logger.d.ts +3 -0
- package/dist/core/logger.d.ts.map +1 -0
- package/dist/core/logger.js +12 -0
- package/dist/core/logger.js.map +1 -0
- package/dist/core/paths.d.ts +15 -0
- package/dist/core/paths.d.ts.map +1 -0
- package/dist/core/paths.js +51 -0
- package/dist/core/paths.js.map +1 -0
- package/dist/core/registry.d.ts +474 -0
- package/dist/core/registry.d.ts.map +1 -0
- package/dist/core/registry.js +186 -0
- package/dist/core/registry.js.map +1 -0
- package/dist/core/updater.d.ts +16 -0
- package/dist/core/updater.d.ts.map +1 -0
- package/dist/core/updater.js +317 -0
- package/dist/core/updater.js.map +1 -0
- package/dist/core/version.d.ts +2 -0
- package/dist/core/version.d.ts.map +1 -0
- package/dist/core/version.js +3 -0
- package/dist/core/version.js.map +1 -0
- package/dist/index.d.ts +13 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +270 -0
- package/dist/index.js.map +1 -0
- package/dist/ingestion/chunker.d.ts +13 -0
- package/dist/ingestion/chunker.d.ts.map +1 -0
- package/dist/ingestion/chunker.js +107 -0
- package/dist/ingestion/chunker.js.map +1 -0
- package/dist/ingestion/cleaners.d.ts +10 -0
- package/dist/ingestion/cleaners.d.ts.map +1 -0
- package/dist/ingestion/cleaners.js +61 -0
- package/dist/ingestion/cleaners.js.map +1 -0
- package/dist/ingestion/detectors.d.ts +5 -0
- package/dist/ingestion/detectors.d.ts.map +1 -0
- package/dist/ingestion/detectors.js +25 -0
- package/dist/ingestion/detectors.js.map +1 -0
- package/dist/ingestion/fetchers.d.ts +38 -0
- package/dist/ingestion/fetchers.d.ts.map +1 -0
- package/dist/ingestion/fetchers.js +296 -0
- package/dist/ingestion/fetchers.js.map +1 -0
- package/dist/ingestion/github.d.ts +60 -0
- package/dist/ingestion/github.d.ts.map +1 -0
- package/dist/ingestion/github.js +314 -0
- package/dist/ingestion/github.js.map +1 -0
- package/dist/ingestion/pipeline.d.ts +3 -0
- package/dist/ingestion/pipeline.d.ts.map +1 -0
- package/dist/ingestion/pipeline.js +160 -0
- package/dist/ingestion/pipeline.js.map +1 -0
- package/dist/ingestion/types.d.ts +51 -0
- package/dist/ingestion/types.d.ts.map +1 -0
- package/dist/ingestion/types.js +2 -0
- package/dist/ingestion/types.js.map +1 -0
- package/dist/lib/auth.d.ts +2 -0
- package/dist/lib/auth.d.ts.map +1 -0
- package/dist/lib/auth.js +6 -0
- package/dist/lib/auth.js.map +1 -0
- package/dist/lib/embedding.d.ts +10 -0
- package/dist/lib/embedding.d.ts.map +1 -0
- package/dist/lib/embedding.js +21 -0
- package/dist/lib/embedding.js.map +1 -0
- package/dist/mcp/host.d.ts +16 -0
- package/dist/mcp/host.d.ts.map +1 -0
- package/dist/mcp/host.js +307 -0
- package/dist/mcp/host.js.map +1 -0
- package/dist/mcp/proxy-host.d.ts +25 -0
- package/dist/mcp/proxy-host.d.ts.map +1 -0
- package/dist/mcp/proxy-host.js +393 -0
- package/dist/mcp/proxy-host.js.map +1 -0
- package/dist/mcp/stdio-host.d.ts +19 -0
- package/dist/mcp/stdio-host.d.ts.map +1 -0
- package/dist/mcp/stdio-host.js +175 -0
- package/dist/mcp/stdio-host.js.map +1 -0
- package/dist/process/manager.d.ts +74 -0
- package/dist/process/manager.d.ts.map +1 -0
- package/dist/process/manager.js +322 -0
- package/dist/process/manager.js.map +1 -0
- package/dist/rag/models.d.ts +30 -0
- package/dist/rag/models.d.ts.map +1 -0
- package/dist/rag/models.js +30 -0
- package/dist/rag/models.js.map +1 -0
- package/dist/rag/store.d.ts +63 -0
- package/dist/rag/store.d.ts.map +1 -0
- package/dist/rag/store.js +505 -0
- package/dist/rag/store.js.map +1 -0
- package/dist/rag/types.d.ts +56 -0
- package/dist/rag/types.d.ts.map +1 -0
- package/dist/rag/types.js +2 -0
- package/dist/rag/types.js.map +1 -0
- package/dist/sources/plugins.d.ts +25 -0
- package/dist/sources/plugins.d.ts.map +1 -0
- package/dist/sources/plugins.js +55 -0
- package/dist/sources/plugins.js.map +1 -0
- package/dist/sources/registry.d.ts +19 -0
- package/dist/sources/registry.d.ts.map +1 -0
- package/dist/sources/registry.js +183 -0
- package/dist/sources/registry.js.map +1 -0
- package/dist/sources/types.d.ts +361 -0
- package/dist/sources/types.d.ts.map +1 -0
- package/dist/sources/types.js +59 -0
- package/dist/sources/types.js.map +1 -0
- package/dist/tui/index.d.ts +22 -0
- package/dist/tui/index.d.ts.map +1 -0
- package/dist/tui/index.js +711 -0
- package/dist/tui/index.js.map +1 -0
- package/dist/ui/format.d.ts +27 -0
- package/dist/ui/format.d.ts.map +1 -0
- package/dist/ui/format.js +80 -0
- package/dist/ui/format.js.map +1 -0
- package/dist/ui/help.d.ts +2 -0
- package/dist/ui/help.d.ts.map +1 -0
- package/dist/ui/help.js +106 -0
- package/dist/ui/help.js.map +1 -0
- package/dist/web/assets.d.ts +2 -0
- package/dist/web/assets.d.ts.map +1 -0
- package/dist/web/assets.js +659 -0
- package/dist/web/assets.js.map +1 -0
- package/dist/web/server.d.ts +9 -0
- package/dist/web/server.d.ts.map +1 -0
- package/dist/web/server.js +349 -0
- package/dist/web/server.js.map +1 -0
- package/package.json +105 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { FetchResult } from './fetchers.js';
|
|
2
|
+
import { fetchTextWithFallback as defaultFetchTextWithFallback, fetchWithRetry as defaultFetchWithRetry } from './fetchers.js';
|
|
3
|
+
interface GitHubFetchers {
|
|
4
|
+
fetchTextWithFallback: typeof defaultFetchTextWithFallback;
|
|
5
|
+
fetchWithRetry: typeof defaultFetchWithRetry;
|
|
6
|
+
}
|
|
7
|
+
export declare function setGitHubFetchersForTests(fetchers: Partial<GitHubFetchers> | null): void;
|
|
8
|
+
export interface GitHubRepoInfo {
|
|
9
|
+
owner: string;
|
|
10
|
+
repo: string;
|
|
11
|
+
ref: string;
|
|
12
|
+
subpath?: string;
|
|
13
|
+
originalUrl: string;
|
|
14
|
+
}
|
|
15
|
+
export declare function getGitHubToken(): string | undefined;
|
|
16
|
+
export declare function isGitHubRepoUrl(url: string): boolean;
|
|
17
|
+
/**
|
|
18
|
+
* Parse GitHub repo URL into owner/repo/ref/subpath.
|
|
19
|
+
* Handles /tree/REF , /blob/ , trailing .git, subdirs.
|
|
20
|
+
* Defaults ref to 'HEAD' (GitHub resolves to default branch).
|
|
21
|
+
*/
|
|
22
|
+
export declare function parseGitHubRepoUrl(url: string): GitHubRepoInfo | null;
|
|
23
|
+
/** Try common llms + README locations (raw) for a GitHub repo. Returns first usable.
|
|
24
|
+
* Now supports private repos by passing Authorization header when token is provided.
|
|
25
|
+
*/
|
|
26
|
+
export declare function fetchPrimaryGitHubContent(info: GitHubRepoInfo, opts?: {
|
|
27
|
+
token?: string;
|
|
28
|
+
timeout?: number;
|
|
29
|
+
}): Promise<FetchResult | null>;
|
|
30
|
+
/**
|
|
31
|
+
* Discover candidate doc files for a GitHub repo.
|
|
32
|
+
* - If token: use /git/trees?recursive=1 (rich, respects .gitignore if present).
|
|
33
|
+
* - Else: return a small set of likely README + docs/ .md + llms candidates (no API call).
|
|
34
|
+
* Returns list of {rawUrl, blobUrl, path} sorted with llms/README first.
|
|
35
|
+
*/
|
|
36
|
+
export declare function discoverGitHubDocFiles(info: GitHubRepoInfo, opts?: {
|
|
37
|
+
token?: string;
|
|
38
|
+
maxPages?: number;
|
|
39
|
+
onProgress?: (c: number, t: number) => void;
|
|
40
|
+
}): Promise<Array<{
|
|
41
|
+
rawUrl: string;
|
|
42
|
+
blobUrl: string;
|
|
43
|
+
path: string;
|
|
44
|
+
}>>;
|
|
45
|
+
/**
|
|
46
|
+
* High-level: given a github URL, return primary (best llms/README) + additional pages for multi-file ingest.
|
|
47
|
+
* Reuses existing fetch* helpers + parse for manifests.
|
|
48
|
+
* Always falls back safely.
|
|
49
|
+
*/
|
|
50
|
+
export declare function fetchGitHubRepoDocumentation(url: string, opts?: {
|
|
51
|
+
discoverLlms?: boolean;
|
|
52
|
+
maxPages?: number;
|
|
53
|
+
onProgress?: (completed: number, total: number) => void;
|
|
54
|
+
token?: string;
|
|
55
|
+
}): Promise<{
|
|
56
|
+
primary: FetchResult;
|
|
57
|
+
pages?: FetchResult[];
|
|
58
|
+
} | null>;
|
|
59
|
+
export {};
|
|
60
|
+
//# sourceMappingURL=github.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github.d.ts","sourceRoot":"","sources":["../../src/ingestion/github.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AACjD,OAAO,EACL,qBAAqB,IAAI,4BAA4B,EACrD,cAAc,IAAI,qBAAqB,EACxC,MAAM,eAAe,CAAC;AAEvB,UAAU,cAAc;IACtB,qBAAqB,EAAE,OAAO,4BAA4B,CAAC;IAC3D,cAAc,EAAE,OAAO,qBAAqB,CAAC;CAC9C;AAOD,wBAAgB,yBAAyB,CAAC,QAAQ,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG,IAAI,GAAG,IAAI,CAKxF;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,wBAAgB,cAAc,IAAI,MAAM,GAAG,SAAS,CAEnD;AAED,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAOpD;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,GAAG,IAAI,CA0BrE;AAwBD;;GAEG;AACH,wBAAsB,yBAAyB,CAC7C,IAAI,EAAE,cAAc,EACpB,IAAI,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAA;CAAO,GAC9C,OAAO,CAAC,WAAW,GAAG,IAAI,CAAC,CAqD7B;AAwBD;;;;;GAKG;AACH,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,cAAc,EACpB,IAAI,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,KAAK,IAAI,CAAA;CAAO,GAC5F,OAAO,CAAC,KAAK,CAAC;IAAE,MAAM,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAwFnE;AAED;;;;GAIG;AACH,wBAAsB,4BAA4B,CAChD,GAAG,EAAE,MAAM,EACX,IAAI,GAAE;IACJ,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;IACxD,KAAK,CAAC,EAAE,MAAM,CAAC;CACX,GACL,OAAO,CAAC;IAAE,OAAO,EAAE,WAAW,CAAC;IAAC,KAAK,CAAC,EAAE,WAAW,EAAE,CAAA;CAAE,GAAG,IAAI,CAAC,CAuEjE"}
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
import { logger } from '../core/logger.js';
|
|
2
|
+
import { fetchTextWithFallback as defaultFetchTextWithFallback, fetchWithRetry as defaultFetchWithRetry, } from './fetchers.js';
|
|
3
|
+
let githubFetchers = {
|
|
4
|
+
fetchTextWithFallback: defaultFetchTextWithFallback,
|
|
5
|
+
fetchWithRetry: defaultFetchWithRetry,
|
|
6
|
+
};
|
|
7
|
+
export function setGitHubFetchersForTests(fetchers) {
|
|
8
|
+
githubFetchers = {
|
|
9
|
+
fetchTextWithFallback: fetchers?.fetchTextWithFallback || defaultFetchTextWithFallback,
|
|
10
|
+
fetchWithRetry: fetchers?.fetchWithRetry || defaultFetchWithRetry,
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
export function getGitHubToken() {
|
|
14
|
+
return process.env.GITHUB_TOKEN || process.env.GH_TOKEN || undefined;
|
|
15
|
+
}
|
|
16
|
+
export function isGitHubRepoUrl(url) {
|
|
17
|
+
try {
|
|
18
|
+
const u = new URL(url);
|
|
19
|
+
return u.hostname === 'github.com' || u.hostname === 'www.github.com';
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Parse GitHub repo URL into owner/repo/ref/subpath.
|
|
27
|
+
* Handles /tree/REF , /blob/ , trailing .git, subdirs.
|
|
28
|
+
* Defaults ref to 'HEAD' (GitHub resolves to default branch).
|
|
29
|
+
*/
|
|
30
|
+
export function parseGitHubRepoUrl(url) {
|
|
31
|
+
try {
|
|
32
|
+
const u = new URL(url.replace(/\.git$/, ''));
|
|
33
|
+
if (u.hostname !== 'github.com' && u.hostname !== 'www.github.com')
|
|
34
|
+
return null;
|
|
35
|
+
const parts = u.pathname.replace(/^\/+|\/+$/g, '').split('/');
|
|
36
|
+
if (parts.length < 2)
|
|
37
|
+
return null;
|
|
38
|
+
const owner = parts[0];
|
|
39
|
+
const repo = parts[1];
|
|
40
|
+
let ref = 'HEAD';
|
|
41
|
+
let subpath;
|
|
42
|
+
// /owner/repo/tree/REF[/sub]
|
|
43
|
+
// /owner/repo[/sub]
|
|
44
|
+
if (parts[2] === 'tree' || parts[2] === 'blob') {
|
|
45
|
+
ref = parts[3] || 'HEAD';
|
|
46
|
+
if (parts.length > 4)
|
|
47
|
+
subpath = parts.slice(4).join('/');
|
|
48
|
+
}
|
|
49
|
+
else if (parts.length > 2) {
|
|
50
|
+
subpath = parts.slice(2).join('/');
|
|
51
|
+
}
|
|
52
|
+
return { owner, repo, ref, subpath, originalUrl: url };
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
function rawBase(info) {
|
|
59
|
+
return `https://raw.githubusercontent.com/${info.owner}/${info.repo}/${info.ref}`;
|
|
60
|
+
}
|
|
61
|
+
function blobBase(info) {
|
|
62
|
+
const ref = info.ref === 'HEAD' ? 'main' : info.ref; // best-effort for human urls
|
|
63
|
+
return `https://github.com/${info.owner}/${info.repo}/blob/${ref}`;
|
|
64
|
+
}
|
|
65
|
+
function apiBase(info) {
|
|
66
|
+
return `https://api.github.com/repos/${info.owner}/${info.repo}`;
|
|
67
|
+
}
|
|
68
|
+
function authHeaders(token) {
|
|
69
|
+
const h = {
|
|
70
|
+
'User-Agent': 'hoolix/0.2 (https://github.com/JayLLM/hoolix)',
|
|
71
|
+
Accept: 'application/vnd.github+json',
|
|
72
|
+
};
|
|
73
|
+
if (token)
|
|
74
|
+
h['Authorization'] = `token ${token}`;
|
|
75
|
+
return h;
|
|
76
|
+
}
|
|
77
|
+
/** Try common llms + README locations (raw) for a GitHub repo. Returns first usable.
|
|
78
|
+
* Now supports private repos by passing Authorization header when token is provided.
|
|
79
|
+
*/
|
|
80
|
+
export async function fetchPrimaryGitHubContent(info, opts = {}) {
|
|
81
|
+
const token = opts.token || getGitHubToken();
|
|
82
|
+
const candidates = [];
|
|
83
|
+
const base = rawBase(info);
|
|
84
|
+
const sub = info.subpath ? `/${info.subpath.replace(/^\//, '')}` : '';
|
|
85
|
+
// llms priority (full then txt) at root + /docs + subpath
|
|
86
|
+
candidates.push(`${base}${sub}/llms-full.txt`, `${base}${sub}/llms.txt`, `${base}/llms-full.txt`, `${base}/llms.txt`, `${base}/docs/llms-full.txt`, `${base}/docs/llms.txt`, `${base}/README.md`, `${base}/docs/README.md`, `${base}/doc/README.md`, `${base}/README`);
|
|
87
|
+
const headers = { Accept: 'text/plain' };
|
|
88
|
+
if (token) {
|
|
89
|
+
headers['Authorization'] = `token ${token}`;
|
|
90
|
+
}
|
|
91
|
+
for (const cand of candidates) {
|
|
92
|
+
try {
|
|
93
|
+
const { text, contentType } = await githubFetchers.fetchTextWithFallback(cand, {
|
|
94
|
+
timeout: opts.timeout || 15000,
|
|
95
|
+
headers,
|
|
96
|
+
});
|
|
97
|
+
if (text && text.length > 200) {
|
|
98
|
+
// Return a "blob" style url for grounding (human friendly) when possible
|
|
99
|
+
const blobUrl = cand.replace(rawBase(info), blobBase(info)).replace(/\/raw\/[^/]+\//, `/blob/${info.ref}/`);
|
|
100
|
+
return {
|
|
101
|
+
content: text,
|
|
102
|
+
contentType: contentType || 'text/markdown',
|
|
103
|
+
url: blobUrl,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
catch (err) {
|
|
108
|
+
// For private repos, 401/403 usually means missing/invalid token or no access.
|
|
109
|
+
// 404 is common while probing optional llms/README candidates, so keep it quiet.
|
|
110
|
+
if (token && (err?.message?.includes('401') || err?.message?.includes('403'))) {
|
|
111
|
+
logger.warn(`GitHub raw fetch failed for private repo candidate ${cand}. Check GITHUB_TOKEN has 'repo' scope (classic) or appropriate fine-grained permissions.`);
|
|
112
|
+
}
|
|
113
|
+
else if (!token && (err?.message?.includes('401') || err?.message?.includes('403') || err?.message?.includes('404'))) {
|
|
114
|
+
// Common for private repo without token; only warn once-ish via debug to avoid spam, user sees via higher level
|
|
115
|
+
logger.debug(`GitHub candidate ${cand} failed (likely private; set GITHUB_TOKEN for access).`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
/** Crude .gitignore aware filter + hardcoded ignores. */
|
|
122
|
+
function shouldIgnore(path, gitignoreText) {
|
|
123
|
+
const p = path.toLowerCase();
|
|
124
|
+
const hard = [
|
|
125
|
+
'node_modules/', '.git/', 'dist/', 'build/', 'out/', '.next/', 'target/', 'vendor/',
|
|
126
|
+
'coverage/', 'tmp/', 'temp/', '.cache/', '__pycache__/', '.venv/', 'site-packages/',
|
|
127
|
+
'.DS_Store', '.exe', '.dll', '.so', '.dylib', '.bin', '.o', '.a', '.zip', '.tar', '.gz', '.7z',
|
|
128
|
+
'.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.pdf', '.mp4', '.mov',
|
|
129
|
+
];
|
|
130
|
+
if (hard.some(h => p.includes(h)))
|
|
131
|
+
return true;
|
|
132
|
+
if (!gitignoreText)
|
|
133
|
+
return false;
|
|
134
|
+
const lines = gitignoreText.split(/\r?\n/).map(l => l.trim()).filter(l => l && !l.startsWith('#'));
|
|
135
|
+
for (const line of lines) {
|
|
136
|
+
const pat = line.replace(/^\//, '').replace(/\*/g, '.*');
|
|
137
|
+
try {
|
|
138
|
+
if (new RegExp(pat, 'i').test(path))
|
|
139
|
+
return true;
|
|
140
|
+
}
|
|
141
|
+
catch { }
|
|
142
|
+
}
|
|
143
|
+
return false;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Discover candidate doc files for a GitHub repo.
|
|
147
|
+
* - If token: use /git/trees?recursive=1 (rich, respects .gitignore if present).
|
|
148
|
+
* - Else: return a small set of likely README + docs/ .md + llms candidates (no API call).
|
|
149
|
+
* Returns list of {rawUrl, blobUrl, path} sorted with llms/README first.
|
|
150
|
+
*/
|
|
151
|
+
export async function discoverGitHubDocFiles(info, opts = {}) {
|
|
152
|
+
const token = opts.token || getGitHubToken();
|
|
153
|
+
const max = opts.maxPages || 80;
|
|
154
|
+
const results = [];
|
|
155
|
+
const rBase = rawBase(info);
|
|
156
|
+
const bBase = blobBase(info);
|
|
157
|
+
const sub = info.subpath ? `/${info.subpath.replace(/^\//, '')}` : '';
|
|
158
|
+
if (token) {
|
|
159
|
+
try {
|
|
160
|
+
const treeUrl = `${apiBase(info)}/git/trees/${encodeURIComponent(info.ref)}?recursive=1`;
|
|
161
|
+
const res = await githubFetchers.fetchWithRetry(treeUrl, { headers: authHeaders(token), timeout: 15000 });
|
|
162
|
+
if (res.status === 403 || res.status === 429) {
|
|
163
|
+
const rem = res.headers.get('x-ratelimit-remaining');
|
|
164
|
+
logger.warn(`GitHub API rate limited (remaining=${rem ?? '?'}) — falling back to limited discovery (~12 files). ` +
|
|
165
|
+
`Set GITHUB_TOKEN for 5,000 req/hr vs 60 req/hr unauthenticated: export GITHUB_TOKEN=<token>`);
|
|
166
|
+
// fall through to limited direct
|
|
167
|
+
}
|
|
168
|
+
else if (res.ok) {
|
|
169
|
+
const data = await res.json();
|
|
170
|
+
let gitignoreText;
|
|
171
|
+
const tree = data.tree || [];
|
|
172
|
+
// fetch .gitignore if present (best effort) — pass token for private repos
|
|
173
|
+
const gi = tree.find((e) => e.path === '.gitignore' && e.type === 'blob');
|
|
174
|
+
if (gi) {
|
|
175
|
+
try {
|
|
176
|
+
const giRaw = `${rBase}/.gitignore`;
|
|
177
|
+
const giHeaders = { Accept: 'text/plain' };
|
|
178
|
+
if (token)
|
|
179
|
+
giHeaders['Authorization'] = `token ${token}`;
|
|
180
|
+
const { text } = await githubFetchers.fetchTextWithFallback(giRaw, { timeout: 8000, headers: giHeaders });
|
|
181
|
+
gitignoreText = text;
|
|
182
|
+
}
|
|
183
|
+
catch { }
|
|
184
|
+
}
|
|
185
|
+
const candidates = tree
|
|
186
|
+
.filter((e) => e.type === 'blob' && /\.(md|mdx|txt|rst)$/i.test(e.path))
|
|
187
|
+
.map((e) => e.path)
|
|
188
|
+
.filter((p) => {
|
|
189
|
+
if (sub && !p.startsWith(sub.replace(/^\//, '')))
|
|
190
|
+
return false;
|
|
191
|
+
return !shouldIgnore(p, gitignoreText);
|
|
192
|
+
})
|
|
193
|
+
.sort((a, b) => {
|
|
194
|
+
// prioritize llms + readme + docs/
|
|
195
|
+
const score = (p) => {
|
|
196
|
+
const l = p.toLowerCase();
|
|
197
|
+
if (l.includes('llms-full'))
|
|
198
|
+
return 0;
|
|
199
|
+
if (l.endsWith('llms.txt'))
|
|
200
|
+
return 1;
|
|
201
|
+
if (l.includes('readme'))
|
|
202
|
+
return 2;
|
|
203
|
+
if (l.startsWith('docs/') || l.startsWith('doc/'))
|
|
204
|
+
return 3;
|
|
205
|
+
return 10;
|
|
206
|
+
};
|
|
207
|
+
return score(a) - score(b) || a.localeCompare(b);
|
|
208
|
+
})
|
|
209
|
+
.slice(0, max);
|
|
210
|
+
for (const p of candidates) {
|
|
211
|
+
const rawUrl = `${rBase}/${p}`;
|
|
212
|
+
const blobUrl = `${bBase}/${p}`;
|
|
213
|
+
results.push({ rawUrl, blobUrl, path: p });
|
|
214
|
+
}
|
|
215
|
+
return results;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
catch (e) {
|
|
219
|
+
logger.debug(`GitHub tree discovery failed, using limited candidates: ${e?.message || e}`);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// Limited direct candidates (no or failed token; always works for public)
|
|
223
|
+
const limited = [
|
|
224
|
+
`${sub}/llms-full.txt`, `${sub}/llms.txt`,
|
|
225
|
+
'llms-full.txt', 'llms.txt',
|
|
226
|
+
'README.md', 'docs/README.md', 'doc/README.md',
|
|
227
|
+
'docs/index.md', 'README',
|
|
228
|
+
].map(p => p.replace(/^\//, ''));
|
|
229
|
+
for (const p of limited) {
|
|
230
|
+
if (results.length >= 12)
|
|
231
|
+
break;
|
|
232
|
+
if (sub && !p.startsWith(sub.replace(/^\//, '')) && !p.startsWith('llms'))
|
|
233
|
+
continue;
|
|
234
|
+
results.push({
|
|
235
|
+
rawUrl: `${rBase}/${p}`,
|
|
236
|
+
blobUrl: `${bBase}/${p}`,
|
|
237
|
+
path: p,
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
return results.slice(0, max);
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* High-level: given a github URL, return primary (best llms/README) + additional pages for multi-file ingest.
|
|
244
|
+
* Reuses existing fetch* helpers + parse for manifests.
|
|
245
|
+
* Always falls back safely.
|
|
246
|
+
*/
|
|
247
|
+
export async function fetchGitHubRepoDocumentation(url, opts = {}) {
|
|
248
|
+
const info = parseGitHubRepoUrl(url);
|
|
249
|
+
if (!info)
|
|
250
|
+
return null;
|
|
251
|
+
const token = opts.token || getGitHubToken();
|
|
252
|
+
const maxPages = opts.maxPages || 80;
|
|
253
|
+
if (!token) {
|
|
254
|
+
logger.warn('No GITHUB_TOKEN set — using limited file discovery (~12 files max). ' +
|
|
255
|
+
'For full repo indexing set: export GITHUB_TOKEN=<token> (needs "repo" scope or "Contents: Read" fine-grained permission).');
|
|
256
|
+
}
|
|
257
|
+
// Primary (llms or README)
|
|
258
|
+
let primary = await fetchPrimaryGitHubContent(info, { token });
|
|
259
|
+
if (!primary) {
|
|
260
|
+
// fallback to raw README at least (pass auth for private repos)
|
|
261
|
+
try {
|
|
262
|
+
const fallback = `${rawBase(info)}/README.md`;
|
|
263
|
+
const headers = { Accept: 'text/plain' };
|
|
264
|
+
if (token)
|
|
265
|
+
headers['Authorization'] = `token ${token}`;
|
|
266
|
+
const { text } = await githubFetchers.fetchTextWithFallback(fallback, { timeout: 12000, headers });
|
|
267
|
+
if (text)
|
|
268
|
+
primary = { content: text, contentType: 'text/markdown', url: `${blobBase(info)}/README.md` };
|
|
269
|
+
}
|
|
270
|
+
catch (e) {
|
|
271
|
+
if (!token && (e?.message?.includes('401') || e?.message?.includes('403') || e?.message?.includes('404'))) {
|
|
272
|
+
logger.warn('GitHub raw fetch failed. For private repos set GITHUB_TOKEN (with repo scope) and re-run.');
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
if (!primary)
|
|
277
|
+
return null;
|
|
278
|
+
// Additional pages (for non-manifest github docs)
|
|
279
|
+
let pages = [];
|
|
280
|
+
if (opts.discoverLlms !== false) {
|
|
281
|
+
const discovered = await discoverGitHubDocFiles(info, {
|
|
282
|
+
token,
|
|
283
|
+
maxPages,
|
|
284
|
+
onProgress: opts.onProgress,
|
|
285
|
+
});
|
|
286
|
+
// Skip the primary if it was one of them
|
|
287
|
+
const toFetch = discovered.filter(d => d.rawUrl !== primary.url && d.blobUrl !== primary.url);
|
|
288
|
+
if (toFetch.length > 0) {
|
|
289
|
+
// Reuse concurrent fetch (but map to raw for content, keep blob for metadata.url)
|
|
290
|
+
// Thread Authorization for private repo raw fetches when GITHUB_TOKEN present (fixes full private support)
|
|
291
|
+
const pageHeaders = { Accept: 'text/plain' };
|
|
292
|
+
if (token)
|
|
293
|
+
pageHeaders['Authorization'] = `token ${token}`;
|
|
294
|
+
const fetchResults = await Promise.all(toFetch.slice(0, maxPages - 1).map(async (d) => {
|
|
295
|
+
try {
|
|
296
|
+
const { text } = await githubFetchers.fetchTextWithFallback(d.rawUrl, { timeout: 15000, headers: pageHeaders });
|
|
297
|
+
if (text && text.length > 80) {
|
|
298
|
+
return { content: text, contentType: 'text/markdown', url: d.blobUrl };
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
catch (e) {
|
|
302
|
+
if (!token && (e?.message?.includes('401') || e?.message?.includes('403'))) {
|
|
303
|
+
logger.debug(`Private GitHub page fetch likely requires GITHUB_TOKEN: ${d.path}`);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
return null;
|
|
307
|
+
}));
|
|
308
|
+
pages = fetchResults.filter(Boolean);
|
|
309
|
+
}
|
|
310
|
+
// If primary was llms manifest, let normal pipeline expand it (parseLlmsManifestUrls will resolve relatives correctly against blob/raw urls)
|
|
311
|
+
}
|
|
312
|
+
return { primary, pages: pages.length ? pages : undefined };
|
|
313
|
+
}
|
|
314
|
+
//# sourceMappingURL=github.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"github.js","sourceRoot":"","sources":["../../src/ingestion/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,OAAO,EACL,qBAAqB,IAAI,4BAA4B,EACrD,cAAc,IAAI,qBAAqB,GACxC,MAAM,eAAe,CAAC;AAOvB,IAAI,cAAc,GAAmB;IACnC,qBAAqB,EAAE,4BAA4B;IACnD,cAAc,EAAE,qBAAqB;CACtC,CAAC;AAEF,MAAM,UAAU,yBAAyB,CAAC,QAAwC;IAChF,cAAc,GAAG;QACf,qBAAqB,EAAE,QAAQ,EAAE,qBAAqB,IAAI,4BAA4B;QACtF,cAAc,EAAE,QAAQ,EAAE,cAAc,IAAI,qBAAqB;KAClE,CAAC;AACJ,CAAC;AAUD,MAAM,UAAU,cAAc;IAC5B,OAAO,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,OAAO,CAAC,GAAG,CAAC,QAAQ,IAAI,SAAS,CAAC;AACvE,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,GAAW;IACzC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QACvB,OAAO,CAAC,CAAC,QAAQ,KAAK,YAAY,IAAI,CAAC,CAAC,QAAQ,KAAK,gBAAgB,CAAC;IACxE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC;QAC7C,IAAI,CAAC,CAAC,QAAQ,KAAK,YAAY,IAAI,CAAC,CAAC,QAAQ,KAAK,gBAAgB;YAAE,OAAO,IAAI,CAAC;QAEhF,MAAM,KAAK,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9D,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,IAAI,CAAC;QAElC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACvB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,IAAI,GAAG,GAAG,MAAM,CAAC;QACjB,IAAI,OAA2B,CAAC;QAEhC,6BAA6B;QAC7B,oBAAoB;QACpB,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,MAAM,EAAE,CAAC;YAC/C,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC;YACzB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC3D,CAAC;aAAM,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrC,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,EAAE,CAAC;IACzD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,OAAO,CAAC,IAAoB;IACnC,OAAO,qCAAqC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;AACpF,CAAC;AAED,SAAS,QAAQ,CAAC,IAAoB;IACpC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,6BAA6B;IAClF,OAAO,sBAAsB,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,SAAS,GAAG,EAAE,CAAC;AACrE,CAAC;AAED,SAAS,OAAO,CAAC,IAAoB;IACnC,OAAO,gCAAgC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;AACnE,CAAC;AAED,SAAS,WAAW,CAAC,KAAc;IACjC,MAAM,CAAC,GAA2B;QAChC,YAAY,EAAE,+CAA+C;QAC7D,MAAM,EAAE,6BAA6B;KACtC,CAAC;IACF,IAAI,KAAK;QAAE,CAAC,CAAC,eAAe,CAAC,GAAG,SAAS,KAAK,EAAE,CAAC;IACjD,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,IAAoB,EACpB,OAA6C,EAAE;IAE/C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,cAAc,EAAE,CAAC;IAC7C,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAEtE,0DAA0D;IAC1D,UAAU,CAAC,IAAI,CACb,GAAG,IAAI,GAAG,GAAG,gBAAgB,EAC7B,GAAG,IAAI,GAAG,GAAG,WAAW,EACxB,GAAG,IAAI,gBAAgB,EACvB,GAAG,IAAI,WAAW,EAClB,GAAG,IAAI,qBAAqB,EAC5B,GAAG,IAAI,gBAAgB,EACvB,GAAG,IAAI,YAAY,EACnB,GAAG,IAAI,iBAAiB,EACxB,GAAG,IAAI,gBAAgB,EACvB,GAAG,IAAI,SAAS,CACjB,CAAC;IAEF,MAAM,OAAO,GAA2B,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;IACjE,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,eAAe,CAAC,GAAG,SAAS,KAAK,EAAE,CAAC;IAC9C,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC;YACH,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,GAAG,MAAM,cAAc,CAAC,qBAAqB,CAAC,IAAI,EAAE;gBAC7E,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,KAAK;gBAC9B,OAAO;aACR,CAAC,CAAC;YACH,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;gBAC9B,yEAAyE;gBACzE,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,EAAE,SAAS,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;gBAC5G,OAAO;oBACL,OAAO,EAAE,IAAI;oBACb,WAAW,EAAE,WAAW,IAAI,eAAe;oBAC3C,GAAG,EAAE,OAAO;iBACb,CAAC;YACJ,CAAC;QACH,CAAC;QAAC,OAAO,GAAQ,EAAE,CAAC;YAClB,+EAA+E;YAC/E,iFAAiF;YACjF,IAAI,KAAK,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBAC9E,MAAM,CAAC,IAAI,CAAC,sDAAsD,IAAI,0FAA0F,CAAC,CAAC;YACpK,CAAC;iBAAM,IAAI,CAAC,KAAK,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACvH,gHAAgH;gBAChH,MAAM,CAAC,KAAK,CAAC,oBAAoB,IAAI,wDAAwD,CAAC,CAAC;YACjG,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,yDAAyD;AACzD,SAAS,YAAY,CAAC,IAAY,EAAE,aAAsB;IACxD,MAAM,CAAC,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAC7B,MAAM,IAAI,GAAG;QACX,eAAe,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS;QACnF,WAAW,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,cAAc,EAAE,QAAQ,EAAE,gBAAgB;QACnF,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK;QAC9F,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;KACxE,CAAC;IACF,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IAE/C,IAAI,CAAC,aAAa;QAAE,OAAO,KAAK,CAAC;IACjC,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IACnG,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QACzD,IAAI,CAAC;YACH,IAAI,IAAI,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBAAE,OAAO,IAAI,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC,CAAA,CAAC;IACZ,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,IAAoB,EACpB,OAA2F,EAAE;IAE7F,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,cAAc,EAAE,CAAC;IAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;IAChC,MAAM,OAAO,GAA6D,EAAE,CAAC;IAE7E,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5B,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAEtE,IAAI,KAAK,EAAE,CAAC;QACV,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,cAAc,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,cAAc,CAAC;YACzF,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,cAAc,CAAC,OAAO,EAAE,EAAE,OAAO,EAAE,WAAW,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;YAC1G,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;gBAC7C,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;gBACrD,MAAM,CAAC,IAAI,CACT,sCAAsC,GAAG,IAAI,GAAG,qDAAqD;oBACrG,6FAA6F,CAC9F,CAAC;gBACF,iCAAiC;YACnC,CAAC;iBAAM,IAAI,GAAG,CAAC,EAAE,EAAE,CAAC;gBAClB,MAAM,IAAI,GAAQ,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;gBACnC,IAAI,aAAiC,CAAC;gBACtC,MAAM,IAAI,GAAU,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;gBAEpC,2EAA2E;gBAC3E,MAAM,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,YAAY,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;gBAC/E,IAAI,EAAE,EAAE,CAAC;oBACP,IAAI,CAAC;wBACH,MAAM,KAAK,GAAG,GAAG,KAAK,aAAa,CAAC;wBACpC,MAAM,SAAS,GAA2B,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;wBACnE,IAAI,KAAK;4BAAE,SAAS,CAAC,eAAe,CAAC,GAAG,SAAS,KAAK,EAAE,CAAC;wBACzD,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,cAAc,CAAC,qBAAqB,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,CAAC;wBAC1G,aAAa,GAAG,IAAI,CAAC;oBACvB,CAAC;oBAAC,MAAM,CAAC,CAAA,CAAC;gBACZ,CAAC;gBAED,MAAM,UAAU,GAAG,IAAI;qBACpB,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,sBAAsB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;qBAC5E,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;qBACvB,MAAM,CAAC,CAAC,CAAS,EAAE,EAAE;oBACpB,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;wBAAE,OAAO,KAAK,CAAC;oBAC/D,OAAO,CAAC,YAAY,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;gBACzC,CAAC,CAAC;qBACD,IAAI,CAAC,CAAC,CAAS,EAAE,CAAS,EAAE,EAAE;oBAC7B,mCAAmC;oBACnC,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE;wBAC1B,MAAM,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;wBAC1B,IAAI,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC;4BAAE,OAAO,CAAC,CAAC;wBACtC,IAAI,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC;4BAAE,OAAO,CAAC,CAAC;wBACrC,IAAI,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC;4BAAE,OAAO,CAAC,CAAC;wBACnC,IAAI,CAAC,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;4BAAE,OAAO,CAAC,CAAC;wBAC5D,OAAO,EAAE,CAAC;oBACZ,CAAC,CAAC;oBACF,OAAO,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;gBACnD,CAAC,CAAC;qBACD,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAEjB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;oBAC3B,MAAM,MAAM,GAAG,GAAG,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC/B,MAAM,OAAO,GAAG,GAAG,KAAK,IAAI,CAAC,EAAE,CAAC;oBAChC,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBACD,OAAO,OAAO,CAAC;YACjB,CAAC;QACH,CAAC;QAAC,OAAO,CAAM,EAAE,CAAC;YAChB,MAAM,CAAC,KAAK,CAAC,2DAA2D,CAAC,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC,CAAC;QAC7F,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,MAAM,OAAO,GAAG;QACd,GAAG,GAAG,gBAAgB,EAAE,GAAG,GAAG,WAAW;QACzC,eAAe,EAAE,UAAU;QAC3B,WAAW,EAAE,gBAAgB,EAAE,eAAe;QAC9C,eAAe,EAAE,QAAQ;KAC1B,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;IAEjC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,MAAM,IAAI,EAAE;YAAE,MAAM;QAChC,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;YAAE,SAAS;QACpF,OAAO,CAAC,IAAI,CAAC;YACX,MAAM,EAAE,GAAG,KAAK,IAAI,CAAC,EAAE;YACvB,OAAO,EAAE,GAAG,KAAK,IAAI,CAAC,EAAE;YACxB,IAAI,EAAE,CAAC;SACR,CAAC,CAAC;IACL,CAAC;IACD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AAC/B,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,4BAA4B,CAChD,GAAW,EACX,OAKI,EAAE;IAEN,MAAM,IAAI,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IAEvB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,cAAc,EAAE,CAAC;IAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;IAErC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,CAAC,IAAI,CACT,sEAAsE;YACtE,4HAA4H,CAC7H,CAAC;IACJ,CAAC;IAED,2BAA2B;IAC3B,IAAI,OAAO,GAAG,MAAM,yBAAyB,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;IAC/D,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,gEAAgE;QAChE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,YAAY,CAAC;YAC9C,MAAM,OAAO,GAA2B,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;YACjE,IAAI,KAAK;gBAAE,OAAO,CAAC,eAAe,CAAC,GAAG,SAAS,KAAK,EAAE,CAAC;YACvD,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,cAAc,CAAC,qBAAqB,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;YACnG,IAAI,IAAI;gBAAE,OAAO,GAAG,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,eAAe,EAAE,GAAG,EAAE,GAAG,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;QAC1G,CAAC;QAAC,OAAO,CAAM,EAAE,CAAC;YAChB,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBAC1G,MAAM,CAAC,IAAI,CAAC,2FAA2F,CAAC,CAAC;YAC3G,CAAC;QACH,CAAC;IACH,CAAC;IACD,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,kDAAkD;IAClD,IAAI,KAAK,GAAkB,EAAE,CAAC;IAC9B,IAAI,IAAI,CAAC,YAAY,KAAK,KAAK,EAAE,CAAC;QAChC,MAAM,UAAU,GAAG,MAAM,sBAAsB,CAAC,IAAI,EAAE;YACpD,KAAK;YACL,QAAQ;YACR,UAAU,EAAE,IAAI,CAAC,UAAU;SAC5B,CAAC,CAAC;QAEH,yCAAyC;QACzC,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAQ,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,KAAK,OAAQ,CAAC,GAAG,CAAC,CAAC;QAEhG,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,kFAAkF;YAClF,2GAA2G;YAC3G,MAAM,WAAW,GAA2B,EAAE,MAAM,EAAE,YAAY,EAAE,CAAC;YACrE,IAAI,KAAK;gBAAE,WAAW,CAAC,eAAe,CAAC,GAAG,SAAS,KAAK,EAAE,CAAC;YAC3D,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;gBAC7C,IAAI,CAAC;oBACH,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,cAAc,CAAC,qBAAqB,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,WAAW,EAAE,CAAC,CAAC;oBAChH,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;wBAC7B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,eAAe,EAAE,GAAG,EAAE,CAAC,CAAC,OAAO,EAAiB,CAAC;oBACxF,CAAC;gBACH,CAAC;gBAAC,OAAO,CAAM,EAAE,CAAC;oBAChB,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;wBAC3E,MAAM,CAAC,KAAK,CAAC,2DAA2D,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;oBACpF,CAAC;gBACH,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC,CAAC,CACH,CAAC;YACF,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,OAAO,CAAkB,CAAC;QACxD,CAAC;QAED,6IAA6I;IAC/I,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;AAC9D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/ingestion/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,gBAAgB,EAAE,eAAe,EAAmC,MAAM,YAAY,CAAC;AAWrG,wBAAsB,mBAAmB,CACvC,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,eAAe,CAAC,CA4K1B"}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { logger } from '../core/logger.js';
|
|
2
|
+
import { fetchDocumentation, parseLlmsManifestUrls, fetchPagesConcurrently, } from './fetchers.js';
|
|
3
|
+
import { detectSourceType, isLikelyMarkdown, isGitHubRepoUrl } from './detectors.js';
|
|
4
|
+
import { htmlToMarkdown, normalizeMarkdown } from './cleaners.js';
|
|
5
|
+
import { chunkMarkdown } from './chunker.js';
|
|
6
|
+
import { fetchGitHubRepoDocumentation, getGitHubToken } from './github.js';
|
|
7
|
+
export async function ingestDocumentation(url, options = {}) {
|
|
8
|
+
const start = Date.now();
|
|
9
|
+
const { onProgress, maxChunks = 8000, chunkSize = 1100, chunkOverlap = 180, maxPages = 80, } = options;
|
|
10
|
+
const emit = (p) => onProgress?.(p);
|
|
11
|
+
emit({ stage: 'detect', message: 'Detecting source type...' });
|
|
12
|
+
// Primary fetch (prefers llms-full.txt sibling when input is llms.txt)
|
|
13
|
+
emit({ stage: 'fetch', message: `Fetching ${url}...` });
|
|
14
|
+
let fetched = await fetchDocumentation(url, { headers: options.headers || {} });
|
|
15
|
+
emit({ stage: 'fetch', message: `Fetched ${fetched.content.length.toLocaleString()} chars` });
|
|
16
|
+
const sourceType = detectSourceType(fetched.url, fetched.content);
|
|
17
|
+
// pagesToProcess starts with primary; may expand for manifests below.
|
|
18
|
+
let pagesToProcess = [fetched];
|
|
19
|
+
let pagesProcessed = 1;
|
|
20
|
+
let pagesDiscovered = 1;
|
|
21
|
+
let truncated = false;
|
|
22
|
+
// Manifest expansion (only for llms.txt that is not -full and contains links/URLs)
|
|
23
|
+
const isLlmsManifest = fetched.url.includes('llms.txt') &&
|
|
24
|
+
!fetched.url.includes('llms-full.txt') &&
|
|
25
|
+
(fetched.content.includes('[') || /^- /m.test(fetched.content) || /https?:\/\//.test(fetched.content));
|
|
26
|
+
if (isLlmsManifest) {
|
|
27
|
+
emit({ stage: 'manifest', message: 'Parsing llms.txt manifest for page links...' });
|
|
28
|
+
const manifestUrls = parseLlmsManifestUrls(fetched.content, fetched.url);
|
|
29
|
+
pagesDiscovered = Math.max(pagesDiscovered, 1 + manifestUrls.length);
|
|
30
|
+
const toFetch = manifestUrls.slice(0, Math.max(0, maxPages - 1));
|
|
31
|
+
if (manifestUrls.length > toFetch.length)
|
|
32
|
+
truncated = true;
|
|
33
|
+
if (toFetch.length > 0) {
|
|
34
|
+
emit({
|
|
35
|
+
stage: 'pages',
|
|
36
|
+
message: `Fetching ${toFetch.length} pages from manifest (concurrency 3-4)...`,
|
|
37
|
+
current: 0,
|
|
38
|
+
total: toFetch.length,
|
|
39
|
+
});
|
|
40
|
+
const morePages = await fetchPagesConcurrently(toFetch, 3, (completed, total) => {
|
|
41
|
+
emit({
|
|
42
|
+
stage: 'pages',
|
|
43
|
+
message: `Fetching pages (${completed}/${total})...`,
|
|
44
|
+
current: completed,
|
|
45
|
+
total,
|
|
46
|
+
});
|
|
47
|
+
}, options.headers || {});
|
|
48
|
+
if (morePages.length > 0) {
|
|
49
|
+
pagesToProcess = [fetched, ...morePages];
|
|
50
|
+
pagesProcessed = pagesToProcess.length;
|
|
51
|
+
emit({
|
|
52
|
+
stage: 'pages',
|
|
53
|
+
message: `Fetched ${morePages.length} additional pages`,
|
|
54
|
+
current: pagesProcessed,
|
|
55
|
+
total: pagesProcessed,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
// GitHub expansion (when not already handled by llms manifest and source looks like github repo)
|
|
61
|
+
// Uses dedicated discovery (tree if token, limited candidates otherwise) so we get multiple .md/README/llms files
|
|
62
|
+
// with correct per-file metadata.url (blob form for grounding).
|
|
63
|
+
const isGh = sourceType === 'github' || isGitHubRepoUrl(fetched.url);
|
|
64
|
+
if (isGh && !isLlmsManifest && pagesToProcess.length === 1) {
|
|
65
|
+
try {
|
|
66
|
+
emit({ stage: 'manifest', message: 'Discovering GitHub documentation files (README, docs/, llms)...' });
|
|
67
|
+
const ghToken = getGitHubToken();
|
|
68
|
+
const ghRes = await fetchGitHubRepoDocumentation(fetched.url, {
|
|
69
|
+
discoverLlms: true,
|
|
70
|
+
maxPages: maxPages || 80,
|
|
71
|
+
onProgress: (c, t) => emit({ stage: 'pages', message: `GitHub files (${c}/${t})...`, current: c, total: t }),
|
|
72
|
+
token: ghToken,
|
|
73
|
+
});
|
|
74
|
+
if (ghRes && ghRes.pages && ghRes.pages.length > 0) {
|
|
75
|
+
pagesToProcess = [ghRes.primary, ...ghRes.pages];
|
|
76
|
+
pagesProcessed = pagesToProcess.length;
|
|
77
|
+
pagesDiscovered = Math.max(pagesDiscovered, pagesProcessed);
|
|
78
|
+
emit({ stage: 'pages', message: `Fetched ${ghRes.pages.length} additional GitHub files`, current: pagesProcessed, total: pagesProcessed });
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
catch (e) {
|
|
82
|
+
logger.debug(`GitHub multi-file expansion failed, using primary only: ${e?.message || e}`);
|
|
83
|
+
// keep the single primary; normal flow continues
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Per-page clean+chunk so metadata.url is the real page (not root manifest) for grounding.
|
|
87
|
+
emit({ stage: 'clean', message: `Converting ${pagesToProcess.length} page(s) to Markdown...` });
|
|
88
|
+
let chunks = [];
|
|
89
|
+
for (const page of pagesToProcess) {
|
|
90
|
+
let markdown;
|
|
91
|
+
if (page.contentType.includes('html') && !isLikelyMarkdown(page.contentType, page.content)) {
|
|
92
|
+
markdown = htmlToMarkdown(page.content, page.url);
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
markdown = normalizeMarkdown(page.content);
|
|
96
|
+
}
|
|
97
|
+
if (!markdown || markdown.length < 100)
|
|
98
|
+
continue;
|
|
99
|
+
const pageTitle = extractTitle(markdown, page.url);
|
|
100
|
+
const pageChunks = chunkMarkdown(markdown, page.url, pageTitle, {
|
|
101
|
+
targetSize: chunkSize,
|
|
102
|
+
overlap: chunkOverlap,
|
|
103
|
+
});
|
|
104
|
+
chunks.push(...pageChunks);
|
|
105
|
+
if (chunks.length > maxChunks) {
|
|
106
|
+
logger.warn(`Truncating from ${chunks.length} to ${maxChunks} chunks`);
|
|
107
|
+
chunks = chunks.slice(0, maxChunks);
|
|
108
|
+
truncated = true;
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (chunks.length === 0) {
|
|
113
|
+
throw new Error('Could not extract meaningful content from the provided URL(s).');
|
|
114
|
+
}
|
|
115
|
+
const totalChars = chunks.reduce((sum, c) => sum + c.content.length, 0);
|
|
116
|
+
emit({
|
|
117
|
+
stage: 'chunk',
|
|
118
|
+
message: 'Chunking documentation...',
|
|
119
|
+
});
|
|
120
|
+
const isLlmsFull = fetched.url.includes('llms-full.txt');
|
|
121
|
+
const pagesInfo = isLlmsFull
|
|
122
|
+
? 'llms-full.txt (concatenated documentation)'
|
|
123
|
+
: `${pagesProcessed} page(s)`;
|
|
124
|
+
emit({
|
|
125
|
+
stage: 'done',
|
|
126
|
+
message: `Ingestion complete: ${chunks.length} chunks, ${totalChars.toLocaleString()} chars from ${pagesInfo}`,
|
|
127
|
+
});
|
|
128
|
+
const baseTitle = extractTitle(pagesToProcess[0]?.content || '', pagesToProcess[0]?.url || url);
|
|
129
|
+
const result = {
|
|
130
|
+
sourceUrl: fetched.url,
|
|
131
|
+
sourceType,
|
|
132
|
+
title: baseTitle,
|
|
133
|
+
chunks,
|
|
134
|
+
stats: {
|
|
135
|
+
totalChunks: chunks.length,
|
|
136
|
+
totalChars,
|
|
137
|
+
pagesProcessed,
|
|
138
|
+
pagesDiscovered,
|
|
139
|
+
durationMs: Date.now() - start,
|
|
140
|
+
truncated,
|
|
141
|
+
maxChunks,
|
|
142
|
+
maxPages,
|
|
143
|
+
},
|
|
144
|
+
rawMarkdown: (pagesToProcess[0]?.content || '').slice(0, 50_000),
|
|
145
|
+
};
|
|
146
|
+
return result;
|
|
147
|
+
}
|
|
148
|
+
function extractTitle(md, fallbackUrl) {
|
|
149
|
+
const h1 = md.match(/^#\s+(.+)$/m);
|
|
150
|
+
if (h1)
|
|
151
|
+
return h1[1].trim();
|
|
152
|
+
try {
|
|
153
|
+
const u = new URL(fallbackUrl);
|
|
154
|
+
return u.hostname + u.pathname;
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
return 'Documentation';
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
//# sourceMappingURL=pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/ingestion/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAE3C,OAAO,EACL,kBAAkB,EAClB,qBAAqB,EACrB,sBAAsB,GACvB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACrF,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAClE,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,4BAA4B,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE3E,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,GAAW,EACX,UAA4B,EAAE;IAE9B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,EACJ,UAAU,EACV,SAAS,GAAG,IAAI,EAChB,SAAS,GAAG,IAAI,EAChB,YAAY,GAAG,GAAG,EAClB,QAAQ,GAAG,EAAE,GACd,GAAG,OAAO,CAAC;IAEZ,MAAM,IAAI,GAAqB,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC;IAEtD,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,0BAA0B,EAAE,CAAC,CAAC;IAE/D,uEAAuE;IACvE,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,GAAG,KAAK,EAAE,CAAC,CAAC;IACxD,IAAI,OAAO,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;IAEhF,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAE9F,MAAM,UAAU,GAAG,gBAAgB,CAAC,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IAElE,sEAAsE;IACtE,IAAI,cAAc,GAAiE,CAAC,OAAO,CAAC,CAAC;IAC7F,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,mFAAmF;IACnF,MAAM,cAAc,GAClB,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC;QAChC,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC;QACtC,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC;IAEzG,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,6CAA6C,EAAE,CAAC,CAAC;QACpF,MAAM,YAAY,GAAG,qBAAqB,CAAC,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC;QACzE,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC;QACrE,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC;QACjE,IAAI,YAAY,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM;YAAE,SAAS,GAAG,IAAI,CAAC;QAE3D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,IAAI,CAAC;gBACH,KAAK,EAAE,OAAO;gBACd,OAAO,EAAE,YAAY,OAAO,CAAC,MAAM,2CAA2C;gBAC9E,OAAO,EAAE,CAAC;gBACV,KAAK,EAAE,OAAO,CAAC,MAAM;aACtB,CAAC,CAAC;YAEH,MAAM,SAAS,GAAG,MAAM,sBAAsB,CAAC,OAAO,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,EAAE,EAAE;gBAC9E,IAAI,CAAC;oBACH,KAAK,EAAE,OAAO;oBACd,OAAO,EAAE,mBAAmB,SAAS,IAAI,KAAK,MAAM;oBACpD,OAAO,EAAE,SAAS;oBAClB,KAAK;iBACN,CAAC,CAAC;YACL,CAAC,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;YAE1B,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzB,cAAc,GAAG,CAAC,OAAO,EAAE,GAAG,SAAS,CAAC,CAAC;gBACzC,cAAc,GAAG,cAAc,CAAC,MAAM,CAAC;gBACvC,IAAI,CAAC;oBACH,KAAK,EAAE,OAAO;oBACd,OAAO,EAAE,WAAW,SAAS,CAAC,MAAM,mBAAmB;oBACvD,OAAO,EAAE,cAAc;oBACvB,KAAK,EAAE,cAAc;iBACtB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,iGAAiG;IACjG,kHAAkH;IAClH,gEAAgE;IAChE,MAAM,IAAI,GAAG,UAAU,KAAK,QAAQ,IAAI,eAAe,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACrE,IAAI,IAAI,IAAI,CAAC,cAAc,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3D,IAAI,CAAC;YACH,IAAI,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,iEAAiE,EAAE,CAAC,CAAC;YACxG,MAAM,OAAO,GAAG,cAAc,EAAE,CAAC;YACjC,MAAM,KAAK,GAAG,MAAM,4BAA4B,CAAC,OAAO,CAAC,GAAG,EAAE;gBAC5D,YAAY,EAAE,IAAI;gBAClB,QAAQ,EAAE,QAAQ,IAAI,EAAE;gBACxB,UAAU,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,iBAAiB,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;gBAC5G,KAAK,EAAE,OAAO;aACf,CAAC,CAAC;YACH,IAAI,KAAK,IAAI,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACnD,cAAc,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;gBACjD,cAAc,GAAG,cAAc,CAAC,MAAM,CAAC;gBACvC,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,cAAc,CAAC,CAAC;gBAC5D,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,KAAK,CAAC,KAAK,CAAC,MAAM,0BAA0B,EAAE,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC;YAC7I,CAAC;QACH,CAAC;QAAC,OAAO,CAAM,EAAE,CAAC;YAChB,MAAM,CAAC,KAAK,CAAC,2DAA2D,CAAC,EAAE,OAAO,IAAI,CAAC,EAAE,CAAC,CAAC;YAC3F,iDAAiD;QACnD,CAAC;IACH,CAAC;IAED,2FAA2F;IAC3F,IAAI,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,cAAc,CAAC,MAAM,yBAAyB,EAAE,CAAC,CAAC;IAEhG,IAAI,MAAM,GAAoB,EAAE,CAAC;IACjC,KAAK,MAAM,IAAI,IAAI,cAAc,EAAE,CAAC;QAClC,IAAI,QAAgB,CAAC;QACrB,IAAI,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3F,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QACpD,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC7C,CAAC;QAED,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG;YAAE,SAAS;QAEjD,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QACnD,MAAM,UAAU,GAAG,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,SAAS,EAAE;YAC9D,UAAU,EAAE,SAAS;YACrB,OAAO,EAAE,YAAY;SACtB,CAAC,CAAC;QAEH,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAE3B,IAAI,MAAM,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;YAC9B,MAAM,CAAC,IAAI,CAAC,mBAAmB,MAAM,CAAC,MAAM,OAAO,SAAS,SAAS,CAAC,CAAC;YACvE,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;YACpC,SAAS,GAAG,IAAI,CAAC;YACjB,MAAM;QACR,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,KAAK,CAAC,gEAAgE,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAExE,IAAI,CAAC;QACH,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,2BAA2B;KACrC,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACzD,MAAM,SAAS,GAAG,UAAU;QAC1B,CAAC,CAAC,4CAA4C;QAC9C,CAAC,CAAC,GAAG,cAAc,UAAU,CAAC;IAEhC,IAAI,CAAC;QACH,KAAK,EAAE,MAAM;QACb,OAAO,EAAE,uBAAuB,MAAM,CAAC,MAAM,YAAY,UAAU,CAAC,cAAc,EAAE,eAAe,SAAS,EAAE;KAC/G,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,YAAY,CAC5B,cAAc,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,EAChC,cAAc,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,GAAG,CAC9B,CAAC;IAEF,MAAM,MAAM,GAAoB;QAC9B,SAAS,EAAE,OAAO,CAAC,GAAG;QACtB,UAAU;QACV,KAAK,EAAE,SAAS;QAChB,MAAM;QACN,KAAK,EAAE;YACL,WAAW,EAAE,MAAM,CAAC,MAAM;YAC1B,UAAU;YACV,cAAc;YACd,eAAe;YACf,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;YAC9B,SAAS;YACT,SAAS;YACT,QAAQ;SACT;QACD,WAAW,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC;KACjE,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,EAAU,EAAE,WAAmB;IACnD,MAAM,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IACnC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC;QAC/B,OAAO,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;IACjC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,eAAe,CAAC;IACzB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
export type SourceType = 'llms.txt' | 'github' | 'generic' | 'manual';
|
|
2
|
+
export interface IngestedChunk {
|
|
3
|
+
id: string;
|
|
4
|
+
content: string;
|
|
5
|
+
metadata: {
|
|
6
|
+
url: string;
|
|
7
|
+
title: string;
|
|
8
|
+
sectionPath?: string;
|
|
9
|
+
headings?: string[];
|
|
10
|
+
charCount: number;
|
|
11
|
+
order: number;
|
|
12
|
+
sourceId?: string;
|
|
13
|
+
sourceType?: string;
|
|
14
|
+
sourceLabel?: string;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
export interface IngestionResult {
|
|
18
|
+
sourceUrl: string;
|
|
19
|
+
sourceType: SourceType;
|
|
20
|
+
title: string;
|
|
21
|
+
chunks: IngestedChunk[];
|
|
22
|
+
stats: {
|
|
23
|
+
totalChunks: number;
|
|
24
|
+
totalChars: number;
|
|
25
|
+
pagesProcessed: number;
|
|
26
|
+
pagesDiscovered?: number;
|
|
27
|
+
durationMs: number;
|
|
28
|
+
truncated: boolean;
|
|
29
|
+
maxChunks: number;
|
|
30
|
+
maxPages: number;
|
|
31
|
+
};
|
|
32
|
+
rawMarkdown?: string;
|
|
33
|
+
}
|
|
34
|
+
export interface IngestionProgress {
|
|
35
|
+
stage: 'detect' | 'fetch' | 'manifest' | 'pages' | 'clean' | 'chunk' | 'embed' | 'done';
|
|
36
|
+
message: string;
|
|
37
|
+
current?: number;
|
|
38
|
+
total?: number;
|
|
39
|
+
percent?: number;
|
|
40
|
+
}
|
|
41
|
+
export type ProgressCallback = (progress: IngestionProgress) => void;
|
|
42
|
+
export interface IngestionOptions {
|
|
43
|
+
maxPages?: number;
|
|
44
|
+
maxChunks?: number;
|
|
45
|
+
chunkSize?: number;
|
|
46
|
+
chunkOverlap?: number;
|
|
47
|
+
onProgress?: ProgressCallback;
|
|
48
|
+
signal?: AbortSignal;
|
|
49
|
+
headers?: Record<string, string>;
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/ingestion/types.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,UAAU,GAAG,UAAU,GAAG,QAAQ,GAAG,SAAS,GAAG,QAAQ,CAAC;AAEtE,MAAM,WAAW,aAAa;IAC5B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE;QACR,GAAG,EAAE,MAAM,CAAC;QACZ,KAAK,EAAE,MAAM,CAAC;QACd,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;QACpB,SAAS,EAAE,MAAM,CAAC;QAClB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,UAAU,CAAC;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,aAAa,EAAE,CAAC;IACxB,KAAK,EAAE;QACL,WAAW,EAAE,MAAM,CAAC;QACpB,UAAU,EAAE,MAAM,CAAC;QACnB,cAAc,EAAE,MAAM,CAAC;QACvB,eAAe,CAAC,EAAE,MAAM,CAAC;QACzB,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,OAAO,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;IACF,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,QAAQ,GAAG,OAAO,GAAG,UAAU,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,OAAO,GAAG,MAAM,CAAC;IACxF,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,MAAM,gBAAgB,GAAG,CAAC,QAAQ,EAAE,iBAAiB,KAAK,IAAI,CAAC;AAErE,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,gBAAgB,CAAC;IAC9B,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAClC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/ingestion/types.ts"],"names":[],"mappings":""}
|