@crawlith/core 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +70 -0
- package/dist/analysis/analyze.d.ts +29 -8
- package/dist/analysis/analyze.js +325 -221
- package/dist/analysis/clustering.d.ts +23 -0
- package/dist/analysis/clustering.js +206 -0
- package/dist/analysis/content.d.ts +1 -1
- package/dist/analysis/content.js +11 -5
- package/dist/analysis/duplicate.d.ts +34 -0
- package/dist/analysis/duplicate.js +305 -0
- package/dist/analysis/heading.d.ts +116 -0
- package/dist/analysis/heading.js +356 -0
- package/dist/analysis/images.d.ts +1 -1
- package/dist/analysis/images.js +6 -5
- package/dist/analysis/links.d.ts +1 -1
- package/dist/analysis/links.js +8 -8
- package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
- package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
- package/dist/analysis/scoring.js +4 -1
- package/dist/analysis/seo.d.ts +8 -4
- package/dist/analysis/seo.js +41 -30
- package/dist/analysis/soft404.d.ts +17 -0
- package/dist/analysis/soft404.js +62 -0
- package/dist/analysis/structuredData.d.ts +1 -1
- package/dist/analysis/structuredData.js +5 -4
- package/dist/application/index.d.ts +2 -0
- package/dist/application/index.js +2 -0
- package/dist/application/usecase.d.ts +3 -0
- package/dist/application/usecase.js +1 -0
- package/dist/application/usecases.d.ts +114 -0
- package/dist/application/usecases.js +201 -0
- package/dist/audit/index.js +1 -1
- package/dist/audit/transport.d.ts +1 -1
- package/dist/audit/transport.js +5 -4
- package/dist/audit/types.d.ts +1 -0
- package/dist/constants.d.ts +17 -0
- package/dist/constants.js +23 -0
- package/dist/core/scope/scopeManager.js +3 -0
- package/dist/crawler/crawl.d.ts +2 -2
- package/dist/crawler/crawler.d.ts +17 -5
- package/dist/crawler/crawler.js +259 -94
- package/dist/crawler/fetcher.d.ts +1 -1
- package/dist/crawler/fetcher.js +6 -6
- package/dist/crawler/metricsRunner.d.ts +21 -1
- package/dist/crawler/metricsRunner.js +181 -60
- package/dist/crawler/normalize.d.ts +41 -0
- package/dist/crawler/normalize.js +119 -3
- package/dist/crawler/parser.d.ts +1 -3
- package/dist/crawler/parser.js +2 -49
- package/dist/crawler/resolver.d.ts +11 -0
- package/dist/crawler/resolver.js +67 -0
- package/dist/crawler/sitemap.d.ts +4 -1
- package/dist/crawler/sitemap.js +24 -18
- package/dist/crawler/trap.d.ts +5 -1
- package/dist/crawler/trap.js +23 -2
- package/dist/db/CrawlithDB.d.ts +110 -0
- package/dist/db/CrawlithDB.js +500 -0
- package/dist/db/graphLoader.js +15 -32
- package/dist/db/index.d.ts +9 -1
- package/dist/db/index.js +39 -31
- package/dist/db/migrations.d.ts +2 -0
- package/dist/db/{schema.js → migrations.js} +90 -43
- package/dist/db/pluginRegistry.d.ts +9 -0
- package/dist/db/pluginRegistry.js +19 -0
- package/dist/db/repositories/EdgeRepository.d.ts +5 -0
- package/dist/db/repositories/EdgeRepository.js +7 -0
- package/dist/db/repositories/MetricsRepository.d.ts +13 -8
- package/dist/db/repositories/MetricsRepository.js +14 -6
- package/dist/db/repositories/PageRepository.d.ts +5 -3
- package/dist/db/repositories/PageRepository.js +68 -17
- package/dist/db/repositories/SiteRepository.d.ts +6 -0
- package/dist/db/repositories/SiteRepository.js +4 -0
- package/dist/db/repositories/SnapshotRepository.d.ts +12 -5
- package/dist/db/repositories/SnapshotRepository.js +48 -10
- package/dist/db/reset.d.ts +9 -0
- package/dist/db/reset.js +32 -0
- package/dist/db/statements.d.ts +12 -0
- package/dist/db/statements.js +40 -0
- package/dist/diff/compare.d.ts +0 -5
- package/dist/diff/compare.js +0 -12
- package/dist/diff/service.d.ts +16 -0
- package/dist/diff/service.js +41 -0
- package/dist/domain/index.d.ts +4 -0
- package/dist/domain/index.js +4 -0
- package/dist/events.d.ts +8 -0
- package/dist/graph/graph.d.ts +20 -42
- package/dist/graph/graph.js +12 -16
- package/dist/graph/hits.d.ts +23 -0
- package/dist/graph/hits.js +111 -0
- package/dist/graph/metrics.d.ts +0 -4
- package/dist/graph/metrics.js +19 -15
- package/dist/graph/pagerank.d.ts +17 -4
- package/dist/graph/pagerank.js +126 -93
- package/dist/index.d.ts +27 -9
- package/dist/index.js +27 -9
- package/dist/lock/lockManager.d.ts +1 -0
- package/dist/lock/lockManager.js +15 -0
- package/dist/plugin-system/plugin-cli.d.ts +10 -0
- package/dist/plugin-system/plugin-cli.js +31 -0
- package/dist/plugin-system/plugin-config.d.ts +16 -0
- package/dist/plugin-system/plugin-config.js +36 -0
- package/dist/plugin-system/plugin-loader.d.ts +17 -0
- package/dist/plugin-system/plugin-loader.js +122 -0
- package/dist/plugin-system/plugin-registry.d.ts +25 -0
- package/dist/plugin-system/plugin-registry.js +167 -0
- package/dist/plugin-system/plugin-types.d.ts +205 -0
- package/dist/plugin-system/plugin-types.js +1 -0
- package/dist/ports/index.d.ts +9 -0
- package/dist/ports/index.js +1 -0
- package/dist/report/export.d.ts +3 -0
- package/dist/report/export.js +81 -0
- package/dist/report/insight.d.ts +27 -0
- package/dist/report/insight.js +103 -0
- package/dist/scoring/health.d.ts +17 -11
- package/dist/scoring/health.js +183 -140
- package/dist/utils/chalk.d.ts +6 -0
- package/dist/utils/chalk.js +41 -0
- package/dist/utils/secureConfig.d.ts +23 -0
- package/dist/utils/secureConfig.js +128 -0
- package/package.json +10 -4
- package/CHANGELOG.md +0 -13
- package/dist/db/schema.d.ts +0 -2
- package/dist/graph/cluster.d.ts +0 -6
- package/dist/graph/cluster.js +0 -221
- package/dist/graph/duplicate.d.ts +0 -10
- package/dist/graph/duplicate.js +0 -302
- package/dist/scoring/hits.d.ts +0 -10
- package/dist/scoring/hits.js +0 -131
- package/scripts/copy-assets.js +0 -37
- package/src/analysis/analysis_list.html +0 -35
- package/src/analysis/analysis_page.html +0 -123
- package/src/analysis/analyze.ts +0 -505
- package/src/analysis/content.ts +0 -62
- package/src/analysis/images.ts +0 -28
- package/src/analysis/links.ts +0 -41
- package/src/analysis/scoring.ts +0 -66
- package/src/analysis/seo.ts +0 -82
- package/src/analysis/structuredData.ts +0 -62
- package/src/analysis/templates.ts +0 -9
- package/src/audit/dns.ts +0 -49
- package/src/audit/headers.ts +0 -98
- package/src/audit/index.ts +0 -66
- package/src/audit/scoring.ts +0 -232
- package/src/audit/transport.ts +0 -258
- package/src/audit/types.ts +0 -102
- package/src/core/network/proxyAdapter.ts +0 -21
- package/src/core/network/rateLimiter.ts +0 -39
- package/src/core/network/redirectController.ts +0 -47
- package/src/core/network/responseLimiter.ts +0 -34
- package/src/core/network/retryPolicy.ts +0 -57
- package/src/core/scope/domainFilter.ts +0 -45
- package/src/core/scope/scopeManager.ts +0 -52
- package/src/core/scope/subdomainPolicy.ts +0 -39
- package/src/core/security/ipGuard.ts +0 -171
- package/src/crawler/crawl.ts +0 -9
- package/src/crawler/crawler.ts +0 -601
- package/src/crawler/extract.ts +0 -39
- package/src/crawler/fetcher.ts +0 -251
- package/src/crawler/metricsRunner.ts +0 -137
- package/src/crawler/normalize.ts +0 -108
- package/src/crawler/parser.ts +0 -190
- package/src/crawler/sitemap.ts +0 -76
- package/src/crawler/trap.ts +0 -96
- package/src/db/graphLoader.ts +0 -135
- package/src/db/index.ts +0 -75
- package/src/db/repositories/EdgeRepository.ts +0 -43
- package/src/db/repositories/MetricsRepository.ts +0 -63
- package/src/db/repositories/PageRepository.ts +0 -228
- package/src/db/repositories/SiteRepository.ts +0 -43
- package/src/db/repositories/SnapshotRepository.ts +0 -99
- package/src/db/schema.ts +0 -177
- package/src/diff/compare.ts +0 -84
- package/src/events.ts +0 -16
- package/src/graph/cluster.ts +0 -246
- package/src/graph/duplicate.ts +0 -350
- package/src/graph/graph.ts +0 -192
- package/src/graph/metrics.ts +0 -125
- package/src/graph/pagerank.ts +0 -126
- package/src/graph/simhash.ts +0 -76
- package/src/index.ts +0 -33
- package/src/lock/hashKey.ts +0 -51
- package/src/lock/lockManager.ts +0 -132
- package/src/lock/pidCheck.ts +0 -13
- package/src/report/crawl.html +0 -879
- package/src/report/crawlExport.ts +0 -58
- package/src/report/crawl_template.ts +0 -9
- package/src/report/html.ts +0 -27
- package/src/scoring/health.ts +0 -241
- package/src/scoring/hits.ts +0 -153
- package/src/scoring/orphanSeverity.ts +0 -176
- package/src/utils/version.ts +0 -18
- package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
- package/tests/analysis.unit.test.ts +0 -142
- package/tests/analyze.integration.test.ts +0 -133
- package/tests/analyze_markdown.test.ts +0 -98
- package/tests/audit/audit.test.ts +0 -101
- package/tests/audit/dns.test.ts +0 -31
- package/tests/audit/headers.test.ts +0 -45
- package/tests/audit/scoring.test.ts +0 -133
- package/tests/audit/security.test.ts +0 -12
- package/tests/audit/transport.test.ts +0 -111
- package/tests/clustering.test.ts +0 -118
- package/tests/clustering_risk.test.ts +0 -118
- package/tests/crawler.test.ts +0 -364
- package/tests/db/index.test.ts +0 -134
- package/tests/db/repositories.test.ts +0 -115
- package/tests/db.test.ts +0 -159
- package/tests/db_repos.test.ts +0 -72
- package/tests/diff.test.ts +0 -67
- package/tests/duplicate.test.ts +0 -110
- package/tests/extract.test.ts +0 -86
- package/tests/fetcher.test.ts +0 -110
- package/tests/fetcher_safety.test.ts +0 -91
- package/tests/fixtures/analyze-crawl.json +0 -26
- package/tests/graph/graph.test.ts +0 -100
- package/tests/graphLoader.test.ts +0 -124
- package/tests/hits.test.ts +0 -134
- package/tests/html_report.test.ts +0 -59
- package/tests/ipGuard.test.ts +0 -73
- package/tests/lock/lockManager.test.ts +0 -198
- package/tests/metrics.test.ts +0 -196
- package/tests/normalize.test.ts +0 -88
- package/tests/orphanSeverity.test.ts +0 -160
- package/tests/pagerank.test.ts +0 -98
- package/tests/parser.test.ts +0 -117
- package/tests/proxy_safety.test.ts +0 -57
- package/tests/redirect_safety.test.ts +0 -77
- package/tests/renderAnalysisCsv.test.ts +0 -183
- package/tests/safety.test.ts +0 -126
- package/tests/scope.test.ts +0 -84
- package/tests/scoring.test.ts +0 -60
- package/tests/sitemap.test.ts +0 -100
- package/tests/soft404.test.ts +0 -41
- package/tests/ssrf_fix.test.ts +0 -69
- package/tests/trap.test.ts +0 -39
- package/tests/visualization_data.test.ts +0 -46
- package/tsconfig.json +0 -11
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { EngineContext } from '../events.js';
|
|
2
|
+
import { Fetcher } from './fetcher.js';
|
|
2
3
|
export declare class Sitemap {
|
|
3
4
|
private context?;
|
|
4
|
-
|
|
5
|
+
private fetcher?;
|
|
6
|
+
private userAgent;
|
|
7
|
+
constructor(context?: EngineContext | undefined, fetcher?: Fetcher | undefined, userAgent?: string);
|
|
5
8
|
/**
|
|
6
9
|
* Fetches and parses a sitemap (or sitemap index) to extract URLs.
|
|
7
10
|
* Recursively handles sitemap indexes with loop detection and depth limits.
|
package/dist/crawler/sitemap.js
CHANGED
|
@@ -1,10 +1,18 @@
|
|
|
1
|
-
import { request } from 'undici';
|
|
2
1
|
import * as cheerio from 'cheerio';
|
|
2
|
+
import pLimit from 'p-limit';
|
|
3
3
|
import { normalizeUrl } from './normalize.js';
|
|
4
|
+
import { DEFAULTS } from '../constants.js';
|
|
4
5
|
export class Sitemap {
|
|
5
6
|
context;
|
|
6
|
-
|
|
7
|
+
fetcher;
|
|
8
|
+
userAgent = DEFAULTS.USER_AGENT;
|
|
9
|
+
constructor(context, fetcher, userAgent) {
|
|
7
10
|
this.context = context;
|
|
11
|
+
this.fetcher = fetcher;
|
|
12
|
+
if (userAgent)
|
|
13
|
+
this.userAgent = userAgent;
|
|
14
|
+
else if (fetcher)
|
|
15
|
+
this.userAgent = fetcher.userAgent;
|
|
8
16
|
}
|
|
9
17
|
/**
|
|
10
18
|
* Fetches and parses a sitemap (or sitemap index) to extract URLs.
|
|
@@ -24,14 +32,16 @@ export class Sitemap {
|
|
|
24
32
|
if (visited.size > 50)
|
|
25
33
|
return;
|
|
26
34
|
try {
|
|
27
|
-
const res =
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
const res = this.fetcher
|
|
36
|
+
? await this.fetcher.fetch(url, { maxBytes: 5000000 })
|
|
37
|
+
: await (async () => {
|
|
38
|
+
const { request } = await import('undici');
|
|
39
|
+
const r = await request(url, { headers: { 'User-Agent': this.userAgent } });
|
|
40
|
+
const b = await r.body.text();
|
|
41
|
+
return { status: r.statusCode, body: b };
|
|
42
|
+
})();
|
|
43
|
+
if (typeof res.status === 'number' && res.status >= 200 && res.status < 300) {
|
|
44
|
+
const xml = res.body;
|
|
35
45
|
// Basic validation: must verify it looks like XML
|
|
36
46
|
if (!xml.trim().startsWith('<'))
|
|
37
47
|
return;
|
|
@@ -45,10 +55,9 @@ export class Sitemap {
|
|
|
45
55
|
if (loc)
|
|
46
56
|
childSitemaps.push(loc);
|
|
47
57
|
});
|
|
48
|
-
// Process children
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
58
|
+
// Process children concurrently but with a limit to avoid massive concurrency spike
|
|
59
|
+
const limit = pLimit(10);
|
|
60
|
+
await Promise.all(childSitemaps.map(childUrl => limit(() => this.processSitemap(childUrl, visited, urls))));
|
|
52
61
|
}
|
|
53
62
|
else {
|
|
54
63
|
// It's a URL Set
|
|
@@ -63,12 +72,9 @@ export class Sitemap {
|
|
|
63
72
|
});
|
|
64
73
|
}
|
|
65
74
|
}
|
|
66
|
-
else {
|
|
67
|
-
await res.body.dump();
|
|
68
|
-
}
|
|
69
75
|
}
|
|
70
76
|
catch (e) {
|
|
71
|
-
this.context?.emit({ type: 'warn', message: `Failed to fetch sitemap ${url}`, context: e });
|
|
77
|
+
this.context?.emit({ type: 'warn', message: `Failed to fetch sitemap ${url} (${String(e)})`, context: e });
|
|
72
78
|
}
|
|
73
79
|
}
|
|
74
80
|
}
|
package/dist/crawler/trap.d.ts
CHANGED
|
@@ -16,7 +16,11 @@ export declare class TrapDetector {
|
|
|
16
16
|
/**
|
|
17
17
|
* Checks if a URL represents a potential crawl trap.
|
|
18
18
|
*/
|
|
19
|
-
checkTrap(rawUrl: string, _depth: number): TrapResult;
|
|
19
|
+
checkTrap(rawUrl: string, _depth: number, isInternal?: boolean): TrapResult;
|
|
20
|
+
/**
|
|
21
|
+
* Iterates over all nodes in the graph and flags potential traps.
|
|
22
|
+
*/
|
|
23
|
+
analyze(graph: any): void;
|
|
20
24
|
/**
|
|
21
25
|
* Resets internal state (useful for multi-crawl sessions if needed)
|
|
22
26
|
*/
|
package/dist/crawler/trap.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export class TrapDetector {
|
|
2
2
|
pathCounters = new Map();
|
|
3
3
|
paginationCounters = new Map();
|
|
4
|
-
sessionParams = new Set(['sid', 'session', 'phpsessid', 'sessid', 'token']);
|
|
4
|
+
sessionParams = new Set(['sid', 'session', 'phpsessid', 'sessid', 'token', 'intended']);
|
|
5
5
|
// Configurable thresholds
|
|
6
6
|
PARAM_EXPLOSION_THRESHOLD = 30;
|
|
7
7
|
PAGINATION_THRESHOLD = 50;
|
|
@@ -14,7 +14,12 @@ export class TrapDetector {
|
|
|
14
14
|
/**
|
|
15
15
|
* Checks if a URL represents a potential crawl trap.
|
|
16
16
|
*/
|
|
17
|
-
checkTrap(rawUrl, _depth) {
|
|
17
|
+
checkTrap(rawUrl, _depth, isInternal = true) {
|
|
18
|
+
// If it's not internal (e.g., social sharing links), we don't flag it as a trap
|
|
19
|
+
// that affects our crawl health, even though technically it might have many params.
|
|
20
|
+
if (!isInternal) {
|
|
21
|
+
return { risk: 0, type: null };
|
|
22
|
+
}
|
|
18
23
|
let risk = 0;
|
|
19
24
|
let type = null;
|
|
20
25
|
try {
|
|
@@ -68,6 +73,22 @@ export class TrapDetector {
|
|
|
68
73
|
}
|
|
69
74
|
return { risk, type };
|
|
70
75
|
}
|
|
76
|
+
/**
|
|
77
|
+
* Iterates over all nodes in the graph and flags potential traps.
|
|
78
|
+
*/
|
|
79
|
+
analyze(graph) {
|
|
80
|
+
const nodes = graph.getNodes();
|
|
81
|
+
for (const node of nodes) {
|
|
82
|
+
if (node.status === 200 || node.status === 0) {
|
|
83
|
+
const res = this.checkTrap(node.url, node.depth || 0, !!node.isInternal);
|
|
84
|
+
if (res.risk > 0.4) {
|
|
85
|
+
node.crawlTrapFlag = true;
|
|
86
|
+
node.crawlTrapRisk = res.risk;
|
|
87
|
+
node.trapType = res.type;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
71
92
|
/**
|
|
72
93
|
* Resets internal state (useful for multi-crawl sessions if needed)
|
|
73
94
|
*/
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import type { CrawlithPlugin } from '../plugin-system/plugin-types.js';
|
|
3
|
+
export declare class CrawlithDB {
|
|
4
|
+
private db;
|
|
5
|
+
private statements;
|
|
6
|
+
private registry;
|
|
7
|
+
/**
|
|
8
|
+
* @internal
|
|
9
|
+
* Dangerous: Returns the raw better-sqlite3 instance.
|
|
10
|
+
* Core only. Plugins must never use this.
|
|
11
|
+
*/
|
|
12
|
+
unsafeGetRawDb(): Database.Database;
|
|
13
|
+
private _pluginName?;
|
|
14
|
+
private _snapshotId?;
|
|
15
|
+
/** Whether live fallback is allowed (from --live flag). Core-controlled. */
|
|
16
|
+
private _live;
|
|
17
|
+
/** Whether this plugin makes network calls. Core-controlled via plugin.storage.fetchMode. */
|
|
18
|
+
private _fetchMode;
|
|
19
|
+
constructor(dbPath: string);
|
|
20
|
+
/**
|
|
21
|
+
* Schema API
|
|
22
|
+
*/
|
|
23
|
+
get schema(): {
|
|
24
|
+
define: (columns: Record<string, string>) => void;
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Fluent Data API (URL-scoped rows)
|
|
28
|
+
*/
|
|
29
|
+
get data(): {
|
|
30
|
+
save: <T>(input: {
|
|
31
|
+
url: string;
|
|
32
|
+
data: T;
|
|
33
|
+
}) => void;
|
|
34
|
+
find: <T>(url: string, options?: {
|
|
35
|
+
maxAge?: string | number;
|
|
36
|
+
global?: boolean;
|
|
37
|
+
}) => T | null;
|
|
38
|
+
all: <T>() => T[];
|
|
39
|
+
/**
|
|
40
|
+
* Cache-first with live fallback. Core-enforced pattern:
|
|
41
|
+
* 1. If cached data exists → return it (always, regardless of age)
|
|
42
|
+
* 2. If no cache + fetchMode='network' + live=false → return null (skip)
|
|
43
|
+
* 3. If no cache + (fetchMode='local' OR live=true) → call fetchFn, save, return
|
|
44
|
+
*
|
|
45
|
+
* Plugin authors NEVER touch ctx.live — the core injects it via scope().
|
|
46
|
+
*/
|
|
47
|
+
getOrFetch: <T>(url: string, fetchFn: () => Promise<T>) => Promise<T | null>;
|
|
48
|
+
};
|
|
49
|
+
/**
|
|
50
|
+
* Report API (Global snapshot summary)
|
|
51
|
+
*/
|
|
52
|
+
get report(): {
|
|
53
|
+
save: (summary: any, optionalScores?: {
|
|
54
|
+
totalScore?: number;
|
|
55
|
+
scoreCount?: number;
|
|
56
|
+
scoreWeightSum?: number;
|
|
57
|
+
scoreCalculatedAt?: string;
|
|
58
|
+
}) => void;
|
|
59
|
+
find: <T>() => T | null;
|
|
60
|
+
};
|
|
61
|
+
initialize(): void;
|
|
62
|
+
/**
|
|
63
|
+
* Create a scoped instance for a specific plugin.
|
|
64
|
+
* Also bakes in live + fetchMode so getOrFetch() can enforce the protocol
|
|
65
|
+
* without exposing those controls to the plugin author.
|
|
66
|
+
*/
|
|
67
|
+
scope(pluginName: string, snapshotId?: number | string, options?: {
|
|
68
|
+
live?: boolean;
|
|
69
|
+
fetchMode?: 'local' | 'network';
|
|
70
|
+
}): CrawlithDB;
|
|
71
|
+
registerPluginDataSchema(pluginNameOrColumns: string | Record<string, string>, extraColumns?: Record<string, string>): void;
|
|
72
|
+
/** @deprecated Use registerPluginDataSchema */
|
|
73
|
+
registerPluginMigration(pluginName: string, migrationSQL: string): void;
|
|
74
|
+
getPageIdByUrl(snapshotId: number | string, url: string): number | null;
|
|
75
|
+
insertPluginReport(input: {
|
|
76
|
+
snapshotId?: number | string;
|
|
77
|
+
pluginName?: string;
|
|
78
|
+
summary: unknown;
|
|
79
|
+
totalScore?: number;
|
|
80
|
+
scoreCount?: number;
|
|
81
|
+
scoreWeightSum?: number;
|
|
82
|
+
scoreCalculatedAt?: string;
|
|
83
|
+
}): void;
|
|
84
|
+
insertPluginRow<T>(input: {
|
|
85
|
+
tableName?: string;
|
|
86
|
+
snapshotId?: number | string;
|
|
87
|
+
url: string;
|
|
88
|
+
data: T;
|
|
89
|
+
}): void;
|
|
90
|
+
getPluginReport(snapshotId?: number | string, pluginName?: string): unknown | null;
|
|
91
|
+
getPluginRows<T>(tableName?: string, snapshotId?: number | string): T[];
|
|
92
|
+
getPluginRow<T>(tableNameOrUrl: string, snapshotId?: number | string, url?: string, options?: {
|
|
93
|
+
maxAge?: string | number;
|
|
94
|
+
global?: boolean;
|
|
95
|
+
}): T | null;
|
|
96
|
+
private _parseDuration;
|
|
97
|
+
private _parseRow;
|
|
98
|
+
deleteSnapshotPlugins(snapshotId: number | string): void;
|
|
99
|
+
private _getOrFetch;
|
|
100
|
+
aggregateScoreProviders(snapshotId: number | string, plugins: CrawlithPlugin[]): void;
|
|
101
|
+
runInTransaction(fn: () => void): void;
|
|
102
|
+
private _resolveTableName;
|
|
103
|
+
/** Converts a plugin name to its canonical SQLite table name, sanitizing invalid characters. */
|
|
104
|
+
private _toTableName;
|
|
105
|
+
close(): void;
|
|
106
|
+
private _isMigrationExecuted;
|
|
107
|
+
private _assertSnapshotExists;
|
|
108
|
+
private _assertTableRegistered;
|
|
109
|
+
private _assertOwnership;
|
|
110
|
+
}
|