@memvid/maw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -0
- package/dist/bin/maw.d.ts +6 -0
- package/dist/bin/maw.d.ts.map +1 -0
- package/dist/bin/maw.js +275 -0
- package/dist/bin/maw.js.map +1 -0
- package/dist/src/crawler/index.d.ts +71 -0
- package/dist/src/crawler/index.d.ts.map +1 -0
- package/dist/src/crawler/index.js +249 -0
- package/dist/src/crawler/index.js.map +1 -0
- package/dist/src/crawler/robots.d.ts +26 -0
- package/dist/src/crawler/robots.d.ts.map +1 -0
- package/dist/src/crawler/robots.js +179 -0
- package/dist/src/crawler/robots.js.map +1 -0
- package/dist/src/crawler/sitemap.d.ts +36 -0
- package/dist/src/crawler/sitemap.d.ts.map +1 -0
- package/dist/src/crawler/sitemap.js +209 -0
- package/dist/src/crawler/sitemap.js.map +1 -0
- package/dist/src/engine/detector.d.ts +18 -0
- package/dist/src/engine/detector.d.ts.map +1 -0
- package/dist/src/engine/detector.js +155 -0
- package/dist/src/engine/detector.js.map +1 -0
- package/dist/src/engine/fetch.d.ts +18 -0
- package/dist/src/engine/fetch.d.ts.map +1 -0
- package/dist/src/engine/fetch.js +53 -0
- package/dist/src/engine/fetch.js.map +1 -0
- package/dist/src/engine/index.d.ts +39 -0
- package/dist/src/engine/index.d.ts.map +1 -0
- package/dist/src/engine/index.js +116 -0
- package/dist/src/engine/index.js.map +1 -0
- package/dist/src/engine/playwright.d.ts +23 -0
- package/dist/src/engine/playwright.d.ts.map +1 -0
- package/dist/src/engine/playwright.js +88 -0
- package/dist/src/engine/playwright.js.map +1 -0
- package/dist/src/engine/rebrowser.d.ts +22 -0
- package/dist/src/engine/rebrowser.d.ts.map +1 -0
- package/dist/src/engine/rebrowser.js +142 -0
- package/dist/src/engine/rebrowser.js.map +1 -0
- package/dist/src/extractor/cleaner.d.ts +13 -0
- package/dist/src/extractor/cleaner.d.ts.map +1 -0
- package/dist/src/extractor/cleaner.js +122 -0
- package/dist/src/extractor/cleaner.js.map +1 -0
- package/dist/src/extractor/index.d.ts +29 -0
- package/dist/src/extractor/index.d.ts.map +1 -0
- package/dist/src/extractor/index.js +162 -0
- package/dist/src/extractor/index.js.map +1 -0
- package/dist/src/extractor/links.d.ts +22 -0
- package/dist/src/extractor/links.d.ts.map +1 -0
- package/dist/src/extractor/links.js +92 -0
- package/dist/src/extractor/links.js.map +1 -0
- package/dist/src/extractor/markdown.d.ts +13 -0
- package/dist/src/extractor/markdown.d.ts.map +1 -0
- package/dist/src/extractor/markdown.js +94 -0
- package/dist/src/extractor/markdown.js.map +1 -0
- package/dist/src/git/index.d.ts +40 -0
- package/dist/src/git/index.d.ts.map +1 -0
- package/dist/src/git/index.js +303 -0
- package/dist/src/git/index.js.map +1 -0
- package/dist/src/index.d.ts +103 -0
- package/dist/src/index.d.ts.map +1 -0
- package/dist/src/index.js +229 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/ingestor/index.d.ts +95 -0
- package/dist/src/ingestor/index.d.ts.map +1 -0
- package/dist/src/ingestor/index.js +471 -0
- package/dist/src/ingestor/index.js.map +1 -0
- package/dist/src/utils/dedup.d.ts +66 -0
- package/dist/src/utils/dedup.d.ts.map +1 -0
- package/dist/src/utils/dedup.js +296 -0
- package/dist/src/utils/dedup.js.map +1 -0
- package/dist/src/utils/index.d.ts +3 -0
- package/dist/src/utils/index.d.ts.map +1 -0
- package/dist/src/utils/index.js +3 -0
- package/dist/src/utils/index.js.map +1 -0
- package/dist/src/utils/logger.d.ts +12 -0
- package/dist/src/utils/logger.d.ts.map +1 -0
- package/dist/src/utils/logger.js +49 -0
- package/dist/src/utils/logger.js.map +1 -0
- package/dist/src/utils/ui.d.ts +126 -0
- package/dist/src/utils/ui.d.ts.map +1 -0
- package/dist/src/utils/ui.js +357 -0
- package/dist/src/utils/ui.js.map +1 -0
- package/dist/src/utils/url.d.ts +21 -0
- package/dist/src/utils/url.d.ts.map +1 -0
- package/dist/src/utils/url.js +107 -0
- package/dist/src/utils/url.js.map +1 -0
- package/package.json +71 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* maw - Feed the maw. It never forgets.
|
|
3
|
+
*
|
|
4
|
+
* One command to consume entire websites, git repos, and files into searchable .mv2 files.
|
|
5
|
+
*/
|
|
6
|
+
export interface MawOptions {
|
|
7
|
+
output: string;
|
|
8
|
+
depth?: number;
|
|
9
|
+
concurrency?: number;
|
|
10
|
+
maxPages?: number;
|
|
11
|
+
rateLimit?: number;
|
|
12
|
+
timeout?: number;
|
|
13
|
+
includePattern?: RegExp;
|
|
14
|
+
excludePattern?: RegExp;
|
|
15
|
+
useSitemap?: boolean;
|
|
16
|
+
respectRobots?: boolean;
|
|
17
|
+
forceEngine?: 'fetch' | 'playwright' | 'rebrowser';
|
|
18
|
+
label?: string;
|
|
19
|
+
memoryId?: string;
|
|
20
|
+
memoryName?: string;
|
|
21
|
+
enableEmbedding?: boolean;
|
|
22
|
+
embeddingModel?: string;
|
|
23
|
+
quiet?: boolean;
|
|
24
|
+
verbose?: boolean;
|
|
25
|
+
}
|
|
26
|
+
export interface MawResult {
|
|
27
|
+
output: string;
|
|
28
|
+
pages: number;
|
|
29
|
+
size: number;
|
|
30
|
+
duration: number;
|
|
31
|
+
stoppedAtLimit?: boolean;
|
|
32
|
+
skippedDupes?: number;
|
|
33
|
+
memoryId?: string;
|
|
34
|
+
stats: {
|
|
35
|
+
fetch: number;
|
|
36
|
+
playwright: number;
|
|
37
|
+
rebrowser: number;
|
|
38
|
+
blocked: number;
|
|
39
|
+
dedup: {
|
|
40
|
+
localeSkipped: number;
|
|
41
|
+
similarSkipped: number;
|
|
42
|
+
total: number;
|
|
43
|
+
};
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Main maw function - crawl URLs, git repos, or files and save to .mv2 file
|
|
48
|
+
*/
|
|
49
|
+
export declare function maw(urls: string[], options: MawOptions): Promise<MawResult>;
|
|
50
|
+
/**
|
|
51
|
+
* Search in an .mv2 file
|
|
52
|
+
*/
|
|
53
|
+
export declare function find(path: string, query: string, options?: {
|
|
54
|
+
k?: number;
|
|
55
|
+
}): Promise<any>;
|
|
56
|
+
/**
|
|
57
|
+
* Ask a question using an .mv2 file
|
|
58
|
+
*/
|
|
59
|
+
export declare function ask(path: string, question: string, options?: {
|
|
60
|
+
model?: string;
|
|
61
|
+
apiKey?: string;
|
|
62
|
+
k?: number;
|
|
63
|
+
}): Promise<any>;
|
|
64
|
+
/**
|
|
65
|
+
* List documents in an .mv2 file
|
|
66
|
+
*/
|
|
67
|
+
export declare function list(path: string, options?: {
|
|
68
|
+
limit?: number;
|
|
69
|
+
}): Promise<any>;
|
|
70
|
+
/**
|
|
71
|
+
* Export documents from an .mv2 file with full content
|
|
72
|
+
*/
|
|
73
|
+
export declare function exportDocs(path: string, options?: {
|
|
74
|
+
limit?: number;
|
|
75
|
+
}): Promise<{
|
|
76
|
+
title: string;
|
|
77
|
+
uri: string;
|
|
78
|
+
content: string;
|
|
79
|
+
}[]>;
|
|
80
|
+
export interface PreviewResult {
|
|
81
|
+
domain: string;
|
|
82
|
+
totalPages: number;
|
|
83
|
+
hasSitemap: boolean;
|
|
84
|
+
estimatedSize?: string;
|
|
85
|
+
recentPages: Array<{
|
|
86
|
+
url: string;
|
|
87
|
+
lastmod?: string;
|
|
88
|
+
}>;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Preview available pages on a site (sitemap discovery)
|
|
92
|
+
*/
|
|
93
|
+
export declare function preview(url: string, options?: {
|
|
94
|
+
limit?: number;
|
|
95
|
+
}): Promise<PreviewResult>;
|
|
96
|
+
export type { CrawlOptions, CrawlResult } from './crawler/index.js';
|
|
97
|
+
export type { ExtractResult } from './extractor/index.js';
|
|
98
|
+
export type { EngineResult, EngineOptions, EngineStats } from './engine/index.js';
|
|
99
|
+
export { Crawler } from './crawler/index.js';
|
|
100
|
+
export { Extractor } from './extractor/index.js';
|
|
101
|
+
export { EngineWaterfall } from './engine/index.js';
|
|
102
|
+
export { createLogger, setLogMode } from './utils/logger.js';
|
|
103
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAQH,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,WAAW,CAAC,EAAE,OAAO,GAAG,YAAY,GAAG,WAAW,CAAC;IACnD,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE;QACL,KAAK,EAAE,MAAM,CAAC;QACd,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,KAAK,EAAE;YACL,aAAa,EAAE,MAAM,CAAC;YACtB,cAAc,EAAE,MAAM,CAAC;YACvB,KAAK,EAAE,MAAM,CAAC;SACf,CAAC;KACH,CAAC;CACH;AAgCD;;GAEG;AACH,wBAAsB,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,OAAO,EAAE,UAAU,GAAG,OAAO,CAAC,SAAS,CAAC,CAgFjF;AAkDD;;GAEG;AACH,wBAAsB,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,GAAE;IAAE,CAAC,CAAC,EAAE,MAAM,CAAA;CAAO,gBAEnF;AAED;;GAEG;AACH,wBAAsB,GAAG,CACvB,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,CAAC,EAAE,MAAM,CAAA;CAAO,gBAG9D;AAED;;GAEG;AACH,wBAAsB,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAO,gBAExE;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAO;;;;KAE9E;AAED,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,OAAO,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,KAAK,CAAC;QAAE,GAAG,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;CACvD;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAO,GAAG,OAAO,CAAC,aAAa,CAAC,CAoCnG;AAGD,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACpE,YAAY,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC1D,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAClF,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* maw - Feed the maw. It never forgets.
|
|
3
|
+
*
|
|
4
|
+
* One command to consume entire websites, git repos, and files into searchable .mv2 files.
|
|
5
|
+
*/
|
|
6
|
+
import { Crawler, SitemapParser } from './crawler/index.js';
|
|
7
|
+
import { ingestToMv2, ingestGitToMv2, getFileSize, searchMv2, askMv2, listMv2, exportMv2 } from './ingestor/index.js';
|
|
8
|
+
import { createLogger, setLogMode } from './utils/logger.js';
|
|
9
|
+
import { normalizeUrl } from './utils/url.js';
|
|
10
|
+
import { isGitUrl, isLocalGitRepo, readGitRepo } from './git/index.js';
|
|
11
|
+
const log = createLogger();
|
|
12
|
+
/**
|
|
13
|
+
* Check if URL is a specific page (not a domain root)
|
|
14
|
+
* e.g., https://stripe.com/docs/api -> true (specific page)
|
|
15
|
+
* https://stripe.com/ -> false (domain root)
|
|
16
|
+
* https://stripe.com -> false (domain root)
|
|
17
|
+
*/
|
|
18
|
+
function isSpecificPage(url) {
|
|
19
|
+
try {
|
|
20
|
+
const parsed = new URL(url);
|
|
21
|
+
const path = parsed.pathname;
|
|
22
|
+
// It's a specific page if path has content beyond just /
|
|
23
|
+
// Ignore common index patterns like /index.html
|
|
24
|
+
if (path === '/' || path === '')
|
|
25
|
+
return false;
|
|
26
|
+
if (path.match(/^\/(index\.(html?|php|aspx?)|default\.(html?|aspx?))$/i))
|
|
27
|
+
return false;
|
|
28
|
+
// Has a meaningful path
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Check if input is a git repo (URL or local path)
|
|
37
|
+
*/
|
|
38
|
+
function isGitInput(input) {
|
|
39
|
+
return isGitUrl(input) || isLocalGitRepo(input);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Main maw function - crawl URLs, git repos, or files and save to .mv2 file
|
|
43
|
+
*/
|
|
44
|
+
export async function maw(urls, options) {
|
|
45
|
+
setLogMode(options.quiet || false, options.verbose || false);
|
|
46
|
+
// Check if any input is a git repo
|
|
47
|
+
const gitInputs = urls.filter(u => isGitInput(u));
|
|
48
|
+
const webInputs = urls.filter(u => !isGitInput(u));
|
|
49
|
+
// If we have git repos, handle them
|
|
50
|
+
if (gitInputs.length > 0) {
|
|
51
|
+
return mawGit(gitInputs, options);
|
|
52
|
+
}
|
|
53
|
+
// Auto-detect single page mode: if ALL urls are specific pages, use single-page mode
|
|
54
|
+
const allSpecificPages = webInputs.every(u => isSpecificPage(u));
|
|
55
|
+
const singlePageMode = allSpecificPages && options.depth === undefined;
|
|
56
|
+
// Show mode indicator (CLI shows header, this adds context)
|
|
57
|
+
if (!options.quiet) {
|
|
58
|
+
if (singlePageMode) {
|
|
59
|
+
log.info(` Fetching ${webInputs.length} page${webInputs.length > 1 ? 's' : ''}...`);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
log.info(` Crawling (depth ${options.depth ?? 2}, max ${options.maxPages ?? 150} pages)...`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const crawler = new Crawler({
|
|
66
|
+
// Single page mode: depth=0, maxPages=urls.length, no sitemap
|
|
67
|
+
depth: singlePageMode ? 0 : (options.depth ?? 2),
|
|
68
|
+
concurrency: options.concurrency ?? 10,
|
|
69
|
+
maxPages: singlePageMode ? webInputs.length : (options.maxPages ?? 150),
|
|
70
|
+
rateLimit: options.rateLimit ?? 10,
|
|
71
|
+
timeout: options.timeout ?? 10000,
|
|
72
|
+
includePattern: options.includePattern,
|
|
73
|
+
excludePattern: options.excludePattern,
|
|
74
|
+
useSitemap: singlePageMode ? false : (options.useSitemap ?? true),
|
|
75
|
+
respectRobots: options.respectRobots ?? true,
|
|
76
|
+
forceEngine: options.forceEngine,
|
|
77
|
+
});
|
|
78
|
+
try {
|
|
79
|
+
// Crawl and ingest
|
|
80
|
+
const crawlResults = crawler.crawl(webInputs);
|
|
81
|
+
// Generate memory name from URLs if not provided
|
|
82
|
+
const memoryName = options.memoryName || webInputs.map(u => {
|
|
83
|
+
try {
|
|
84
|
+
return new URL(u).hostname.replace('www.', '');
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
return u;
|
|
88
|
+
}
|
|
89
|
+
}).join('-');
|
|
90
|
+
const ingestStats = await ingestToMv2(crawlResults, {
|
|
91
|
+
output: options.output,
|
|
92
|
+
label: options.label,
|
|
93
|
+
memoryId: options.memoryId,
|
|
94
|
+
memoryName,
|
|
95
|
+
enableEmbedding: options.enableEmbedding,
|
|
96
|
+
embeddingModel: options.embeddingModel,
|
|
97
|
+
});
|
|
98
|
+
// Get final stats
|
|
99
|
+
const engineStats = crawler.getStats();
|
|
100
|
+
const fileSize = await getFileSize(options.output);
|
|
101
|
+
return {
|
|
102
|
+
output: options.output,
|
|
103
|
+
pages: ingestStats.pages,
|
|
104
|
+
size: fileSize,
|
|
105
|
+
duration: ingestStats.duration,
|
|
106
|
+
stoppedAtLimit: ingestStats.stoppedAtLimit,
|
|
107
|
+
skippedDupes: ingestStats.skippedDupes,
|
|
108
|
+
memoryId: ingestStats.memoryId,
|
|
109
|
+
stats: {
|
|
110
|
+
fetch: engineStats.fetch,
|
|
111
|
+
playwright: engineStats.playwright,
|
|
112
|
+
rebrowser: engineStats.rebrowser,
|
|
113
|
+
blocked: engineStats.blocked,
|
|
114
|
+
dedup: engineStats.dedup || { localeSkipped: 0, similarSkipped: 0, total: 0 },
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
finally {
|
|
119
|
+
await crawler.close();
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Ingest git repos into .mv2 file
|
|
124
|
+
*/
|
|
125
|
+
async function mawGit(repos, options) {
|
|
126
|
+
const startTime = Date.now();
|
|
127
|
+
if (!options.quiet) {
|
|
128
|
+
log.info(` Reading ${repos.length} repo${repos.length > 1 ? 's' : ''}...`);
|
|
129
|
+
}
|
|
130
|
+
// Generate memory name from repo names
|
|
131
|
+
const memoryName = options.memoryName || repos.map(r => {
|
|
132
|
+
const match = r.match(/\/([^/]+?)(\.git)?$/);
|
|
133
|
+
return match ? match[1] : r.split('/').pop() || 'repo';
|
|
134
|
+
}).join('-');
|
|
135
|
+
// Read all git repos
|
|
136
|
+
const allFiles = readGitRepo(repos[0]); // For now, handle one repo at a time
|
|
137
|
+
const ingestStats = await ingestGitToMv2(allFiles, {
|
|
138
|
+
output: options.output,
|
|
139
|
+
label: options.label || 'code',
|
|
140
|
+
memoryId: options.memoryId,
|
|
141
|
+
memoryName,
|
|
142
|
+
enableEmbedding: options.enableEmbedding,
|
|
143
|
+
embeddingModel: options.embeddingModel,
|
|
144
|
+
});
|
|
145
|
+
const fileSize = await getFileSize(options.output);
|
|
146
|
+
const duration = Date.now() - startTime;
|
|
147
|
+
return {
|
|
148
|
+
output: options.output,
|
|
149
|
+
pages: ingestStats.files,
|
|
150
|
+
size: fileSize,
|
|
151
|
+
duration,
|
|
152
|
+
stoppedAtLimit: ingestStats.stoppedAtLimit,
|
|
153
|
+
memoryId: ingestStats.memoryId,
|
|
154
|
+
stats: {
|
|
155
|
+
fetch: ingestStats.files,
|
|
156
|
+
playwright: 0,
|
|
157
|
+
rebrowser: 0,
|
|
158
|
+
blocked: 0,
|
|
159
|
+
dedup: { localeSkipped: 0, similarSkipped: 0, total: ingestStats.files },
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Search in an .mv2 file
|
|
165
|
+
*/
|
|
166
|
+
export async function find(path, query, options = {}) {
|
|
167
|
+
return searchMv2(path, query, options);
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Ask a question using an .mv2 file
|
|
171
|
+
*/
|
|
172
|
+
export async function ask(path, question, options = {}) {
|
|
173
|
+
return askMv2(path, question, options);
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* List documents in an .mv2 file
|
|
177
|
+
*/
|
|
178
|
+
export async function list(path, options = {}) {
|
|
179
|
+
return listMv2(path, options);
|
|
180
|
+
}
|
|
181
|
+
/**
|
|
182
|
+
* Export documents from an .mv2 file with full content
|
|
183
|
+
*/
|
|
184
|
+
export async function exportDocs(path, options = {}) {
|
|
185
|
+
return exportMv2(path, options);
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Preview available pages on a site (sitemap discovery)
|
|
189
|
+
*/
|
|
190
|
+
export async function preview(url, options = {}) {
|
|
191
|
+
const normalized = normalizeUrl(url);
|
|
192
|
+
const parsedUrl = new URL(normalized);
|
|
193
|
+
const domain = parsedUrl.hostname;
|
|
194
|
+
const sitemap = new SitemapParser();
|
|
195
|
+
const pages = await sitemap.parseWithMetadata(normalized);
|
|
196
|
+
// Sort by lastmod (most recent first)
|
|
197
|
+
const sortedPages = pages.sort((a, b) => {
|
|
198
|
+
if (!a.lastmod && !b.lastmod)
|
|
199
|
+
return 0;
|
|
200
|
+
if (!a.lastmod)
|
|
201
|
+
return 1;
|
|
202
|
+
if (!b.lastmod)
|
|
203
|
+
return -1;
|
|
204
|
+
return new Date(b.lastmod).getTime() - new Date(a.lastmod).getTime();
|
|
205
|
+
});
|
|
206
|
+
const limit = options.limit || 20;
|
|
207
|
+
const recentPages = sortedPages.slice(0, limit).map(p => ({
|
|
208
|
+
url: p.loc,
|
|
209
|
+
lastmod: p.lastmod,
|
|
210
|
+
}));
|
|
211
|
+
// Estimate size (~300KB average per page for news sites, ~100KB for docs)
|
|
212
|
+
const avgPageSize = domain.includes('cnn') || domain.includes('news') ? 300 : 100;
|
|
213
|
+
const estimatedMB = (pages.length * avgPageSize) / 1024;
|
|
214
|
+
const estimatedSize = estimatedMB < 50
|
|
215
|
+
? `${estimatedMB.toFixed(0)}MB (fits in free tier)`
|
|
216
|
+
: `${estimatedMB.toFixed(0)}MB (needs API key for full crawl)`;
|
|
217
|
+
return {
|
|
218
|
+
domain,
|
|
219
|
+
totalPages: pages.length,
|
|
220
|
+
hasSitemap: pages.length > 0,
|
|
221
|
+
estimatedSize: pages.length > 0 ? estimatedSize : undefined,
|
|
222
|
+
recentPages,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
export { Crawler } from './crawler/index.js';
|
|
226
|
+
export { Extractor } from './extractor/index.js';
|
|
227
|
+
export { EngineWaterfall } from './engine/index.js';
|
|
228
|
+
export { createLogger, setLogMode } from './utils/logger.js';
|
|
229
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,OAAO,EAAuC,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACjG,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AACtH,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC7D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AA4CvE,MAAM,GAAG,GAAG,YAAY,EAAE,CAAC;AAE3B;;;;;GAKG;AACH,SAAS,cAAc,CAAC,GAAW;IACjC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC;QAC7B,yDAAyD;QACzD,gDAAgD;QAChD,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,EAAE;YAAE,OAAO,KAAK,CAAC;QAC9C,IAAI,IAAI,CAAC,KAAK,CAAC,wDAAwD,CAAC;YAAE,OAAO,KAAK,CAAC;QACvF,wBAAwB;QACxB,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,KAAa;IAC/B,OAAO,QAAQ,CAAC,KAAK,CAAC,IAAI,cAAc,CAAC,KAAK,CAAC,CAAC;AAClD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,IAAc,EAAE,OAAmB;IAC3D,UAAU,CAAC,OAAO,CAAC,KAAK,IAAI,KAAK,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK,CAAC,CAAC;IAE7D,mCAAmC;IACnC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAEnD,oCAAoC;IACpC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,MAAM,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IACpC,CAAC;IAED,qFAAqF;IACrF,MAAM,gBAAgB,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC;IACjE,MAAM,cAAc,GAAG,gBAAgB,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,CAAC;IAEvE,4DAA4D;IAC5D,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACnB,IAAI,cAAc,EAAE,CAAC;YACnB,GAAG,CAAC,IAAI,CAAC,cAAc,SAAS,CAAC,MAAM,QAAQ,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QACvF,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,qBAAqB,OAAO,CAAC,KAAK,IAAI,CAAC,SAAS,OAAO,CAAC,QAAQ,IAAI,GAAG,YAAY,CAAC,CAAC;QAChG,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,OAAO,CAAC;QAC1B,8DAA8D;QAC9D,KAAK,EAAE,cAAc,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC;QAChD,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;QACtC,QAAQ,EAAE,cAAc,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;QACvE,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,EAAE;QAClC,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,KAAK;QACjC,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,cAAc,EAAE,OAAO,CAAC,cAAc;QACtC,UAAU,EAAE,cAAc,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,IAAI,IAAI,CAAC;QACjE,aAAa,EAAE,OAAO,CAAC,aAAa,IAAI,IAAI;QAC5C,WAAW,EAAE,OAAO,CAAC,WAAW;KACjC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,mBAAmB;QACnB,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QAE9C,iDAAiD;QACjD,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;YACzD,IAAI,CAAC;gBAAC,OAAO,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC;gBAAC,OAAO,CAAC,CAAC;YAAC,CAAC;QAC7E,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEb,MAAM,WAAW,GAAG,MAAM,WAAW,CAAC,YAAY,EAAE;YAClD,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,UAAU;YACV,eAAe,EAAE,OAAO,CAAC,eAAe;YACxC,cAAc,EAAE,OAAO,CAAC,cAAc;SACvC,CAAC,CAAC;QAEH,kBAAkB;QAClB,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;QAEnD,OAAO;YACL,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,KAAK,EAAE,WAAW,CAAC,KAAK;YACxB,IAAI,EAAE,QAAQ;YACd,QAAQ,EAAE,WAAW,CAAC,QAAQ;YAC9B,cAAc,EAAE,WAAW,CAAC,cAAc;YAC1C,YAAY,EAAE,WAAW,CAAC,YAAY;YACtC,QAAQ,EAAE,WAAW,CAAC,QAAQ;YAC9B,KAAK,EAAE;gBACL,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,UAAU,EAAE,WAAW,CAAC,UAAU;gBAClC,SAAS,EAAE,WAAW,CAAC,SAAS;gBAChC,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,KAAK,EAAE,WAAW,CAAC,KAAK,IAAI,EAAE,aAAa,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;aAC9E;SACF,CAAC;IACJ,CAAC;YAAS,CAAC;QACT,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IACxB,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,MAAM,CAAC,KAAe,EAAE,OAAmB;IACxD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QACnB,GAAG,CAAC,IAAI,CAAC,aAAa,KAAK,CAAC,MAAM,QAAQ,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAC9E,CAAC;IAED,uCAAuC;IACvC,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;QACrD,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAC7C,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,MAAM,CAAC;IACzD,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEb,qBAAqB;IACrB,MAAM,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,qCAAqC;IAE7E,MAAM,WAAW,GAAG,MAAM,cAAc,CAAC,QAAQ,EAAE;QACjD,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,MAAM;QAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;QAC1B,UAAU;QACV,eAAe,EAAE,OAAO,CAAC,eAAe;QACxC,cAAc,EAAE,OAAO,CAAC,cAAc;KACvC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;IAExC,OAAO;QACL,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,KAAK,EAAE,WAAW,CAAC,KAAK;QACxB,IAAI,EAAE,QAAQ;QACd,QAAQ;QACR,cAAc,EAAE,WAAW,CAAC,cAAc;QAC1C,QAAQ,EAAE,WAAW,CAAC,QAAQ;QAC9B,KAAK,EAAE;YACL,KAAK,EAAE,WAAW,CAAC,KAAK;YACxB,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,OAAO,EAAE,CAAC;YACV,KAAK,EAAE,EAAE,aAAa,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,WAAW,CAAC,KAAK,EAAE;SACzE;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,IAAY,EAAE,KAAa,EAAE,UAA0B,EAAE;IAClF,OAAO,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,GAAG,CACvB,IAAY,EACZ,QAAgB,EAChB,UAA2D,EAAE;IAE7D,OAAO,MAAM,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,IAAY,EAAE,UAA8B,EAAE;IACvE,OAAO,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAY,EAAE,UAA8B,EAAE;IAC7E,OAAO,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAClC,CAAC;AAUD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,GAAW,EAAE,UAA8B,EAAE;IACzE,MAAM,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IACrC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC;IAElC,MAAM,OAAO,GAAG,IAAI,aAAa,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;IAE1D,sCAAsC;IACtC,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACtC,IAAI,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC,OAAO;YAAE,OAAO,CAAC,CAAC;QACvC,IAAI,CAAC,CAAC,CAAC,OAAO;YAAE,OAAO,CAAC,CAAC;QACzB,IAAI,CAAC,CAAC,CAAC,OAAO;YAAE,OAAO,CAAC,CAAC,CAAC;QAC1B,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,CAAC;IACvE,CAAC,CAAC,CAAC;IAEH,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,WAAW,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACxD,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,OAAO,EAAE,CAAC,CAAC,OAAO;KACnB,CAAC,CAAC,CAAC;IAEJ,0EAA0E;IAC1E,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;IAClF,MAAM,WAAW,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,WAAW,CAAC,GAAG,IAAI,CAAC;IACxD,MAAM,aAAa,GAAG,WAAW,GAAG,EAAE;QACpC,CAAC,CAAC,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,wBAAwB;QACnD,CAAC,CAAC,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,mCAAmC,CAAC;IAEjE,OAAO;QACL,MAAM;QACN,UAAU,EAAE,KAAK,CAAC,MAAM;QACxB,UAAU,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC;QAC5B,aAAa,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS;QAC3D,WAAW;KACZ,CAAC;AACJ,CAAC;AAMD,OAAO,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AACjD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MV2 ingestor - saves crawled content to .mv2 files
|
|
3
|
+
*/
|
|
4
|
+
import type { CrawlResult } from '../crawler/index.js';
|
|
5
|
+
export interface IngestOptions {
|
|
6
|
+
output: string;
|
|
7
|
+
label?: string;
|
|
8
|
+
batchSize?: number;
|
|
9
|
+
maxSizeMB?: number;
|
|
10
|
+
memoryId?: string;
|
|
11
|
+
memoryName?: string;
|
|
12
|
+
enableEmbedding?: boolean;
|
|
13
|
+
embeddingModel?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface IngestStats {
|
|
16
|
+
pages: number;
|
|
17
|
+
bytes: number;
|
|
18
|
+
duration: number;
|
|
19
|
+
stoppedAtLimit?: boolean;
|
|
20
|
+
skippedDupes?: number;
|
|
21
|
+
memoryId?: string;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Ingest crawled results into an MV2 file
|
|
25
|
+
*/
|
|
26
|
+
export declare function ingestToMv2(results: AsyncIterable<CrawlResult>, options: IngestOptions): Promise<IngestStats>;
|
|
27
|
+
export interface GitIngestOptions {
|
|
28
|
+
output: string;
|
|
29
|
+
label?: string;
|
|
30
|
+
memoryId?: string;
|
|
31
|
+
memoryName?: string;
|
|
32
|
+
enableEmbedding?: boolean;
|
|
33
|
+
embeddingModel?: string;
|
|
34
|
+
maxSizeMB?: number;
|
|
35
|
+
}
|
|
36
|
+
export interface GitIngestStats {
|
|
37
|
+
files: number;
|
|
38
|
+
bytes: number;
|
|
39
|
+
duration: number;
|
|
40
|
+
memoryId?: string;
|
|
41
|
+
stoppedAtLimit?: boolean;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Ingest git repo files into an MV2 file
|
|
45
|
+
*/
|
|
46
|
+
export declare function ingestGitToMv2(files: AsyncIterable<{
|
|
47
|
+
path: string;
|
|
48
|
+
content: string;
|
|
49
|
+
language: string;
|
|
50
|
+
size: number;
|
|
51
|
+
}>, options: GitIngestOptions): Promise<GitIngestStats>;
|
|
52
|
+
/**
|
|
53
|
+
* Get file size
|
|
54
|
+
*/
|
|
55
|
+
export declare function getFileSize(path: string): Promise<number>;
|
|
56
|
+
/**
|
|
57
|
+
* Open an existing MV2 file for querying
|
|
58
|
+
*/
|
|
59
|
+
export declare function openMv2(path: string): Promise<any>;
|
|
60
|
+
/**
|
|
61
|
+
* Search in an MV2 file
|
|
62
|
+
* Uses semantic search when OPENAI_API_KEY is set
|
|
63
|
+
*/
|
|
64
|
+
export declare function searchMv2(path: string, query: string, options?: {
|
|
65
|
+
k?: number;
|
|
66
|
+
embeddingModel?: string;
|
|
67
|
+
}): Promise<any>;
|
|
68
|
+
/**
|
|
69
|
+
* Ask a question using an MV2 file
|
|
70
|
+
* Uses semantic search when embeddings are available and OPENAI_API_KEY is set
|
|
71
|
+
*/
|
|
72
|
+
export declare function askMv2(path: string, question: string, options?: {
|
|
73
|
+
model?: string;
|
|
74
|
+
apiKey?: string;
|
|
75
|
+
k?: number;
|
|
76
|
+
embeddingModel?: string;
|
|
77
|
+
}): Promise<any>;
|
|
78
|
+
/**
|
|
79
|
+
* List documents in an MV2 file
|
|
80
|
+
*/
|
|
81
|
+
export declare function listMv2(path: string, options?: {
|
|
82
|
+
limit?: number;
|
|
83
|
+
offset?: number;
|
|
84
|
+
}): Promise<any>;
|
|
85
|
+
/**
|
|
86
|
+
* Export documents from an MV2 file with full content
|
|
87
|
+
*/
|
|
88
|
+
export declare function exportMv2(path: string, options?: {
|
|
89
|
+
limit?: number;
|
|
90
|
+
}): Promise<Array<{
|
|
91
|
+
title: string;
|
|
92
|
+
uri: string;
|
|
93
|
+
content: string;
|
|
94
|
+
}>>;
|
|
95
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/ingestor/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAevD,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,OAAO,EAAE,aAAa,CAAC,WAAW,CAAC,EACnC,OAAO,EAAE,aAAa,GACrB,OAAO,CAAC,WAAW,CAAC,CAmLtB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,KAAK,EAAE,aAAa,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,EACvF,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,cAAc,CAAC,CAqLzB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAO/D;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,IAAI,EAAE,MAAM,gBAGzC;AAED;;;GAGG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,MAAM,EACb,OAAO,GAAE;IAAE,CAAC,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAA;CAAO,gBAatD;AAeD;;;GAGG;AACH,wBAAsB,MAAM,CAC1B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,CAAC,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,MAAM,CAAA;CAAO,gBAoBvF;AAED;;GAEG;AACH,wBAAsB,OAAO,CAC3B,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAO,gBASlD;AAED;;GAEG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAO,GAC/B,OAAO,CAAC,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAoCjE"}
|