@crawlith/core 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +70 -0
- package/dist/analysis/analyze.d.ts +29 -8
- package/dist/analysis/analyze.js +325 -221
- package/dist/analysis/clustering.d.ts +23 -0
- package/dist/analysis/clustering.js +206 -0
- package/dist/analysis/content.d.ts +1 -1
- package/dist/analysis/content.js +11 -5
- package/dist/analysis/duplicate.d.ts +34 -0
- package/dist/analysis/duplicate.js +305 -0
- package/dist/analysis/heading.d.ts +116 -0
- package/dist/analysis/heading.js +356 -0
- package/dist/analysis/images.d.ts +1 -1
- package/dist/analysis/images.js +6 -5
- package/dist/analysis/links.d.ts +1 -1
- package/dist/analysis/links.js +8 -8
- package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
- package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
- package/dist/analysis/scoring.js +4 -1
- package/dist/analysis/seo.d.ts +8 -4
- package/dist/analysis/seo.js +41 -30
- package/dist/analysis/soft404.d.ts +17 -0
- package/dist/analysis/soft404.js +62 -0
- package/dist/analysis/structuredData.d.ts +1 -1
- package/dist/analysis/structuredData.js +5 -4
- package/dist/application/index.d.ts +2 -0
- package/dist/application/index.js +2 -0
- package/dist/application/usecase.d.ts +3 -0
- package/dist/application/usecase.js +1 -0
- package/dist/application/usecases.d.ts +114 -0
- package/dist/application/usecases.js +201 -0
- package/dist/audit/index.js +1 -1
- package/dist/audit/transport.d.ts +1 -1
- package/dist/audit/transport.js +5 -4
- package/dist/audit/types.d.ts +1 -0
- package/dist/constants.d.ts +17 -0
- package/dist/constants.js +23 -0
- package/dist/core/scope/scopeManager.js +3 -0
- package/dist/crawler/crawl.d.ts +2 -2
- package/dist/crawler/crawler.d.ts +17 -5
- package/dist/crawler/crawler.js +259 -94
- package/dist/crawler/fetcher.d.ts +1 -1
- package/dist/crawler/fetcher.js +6 -6
- package/dist/crawler/metricsRunner.d.ts +21 -1
- package/dist/crawler/metricsRunner.js +181 -60
- package/dist/crawler/normalize.d.ts +41 -0
- package/dist/crawler/normalize.js +119 -3
- package/dist/crawler/parser.d.ts +1 -3
- package/dist/crawler/parser.js +2 -49
- package/dist/crawler/resolver.d.ts +11 -0
- package/dist/crawler/resolver.js +67 -0
- package/dist/crawler/sitemap.d.ts +4 -1
- package/dist/crawler/sitemap.js +24 -18
- package/dist/crawler/trap.d.ts +5 -1
- package/dist/crawler/trap.js +23 -2
- package/dist/db/CrawlithDB.d.ts +110 -0
- package/dist/db/CrawlithDB.js +500 -0
- package/dist/db/graphLoader.js +15 -32
- package/dist/db/index.d.ts +9 -1
- package/dist/db/index.js +39 -31
- package/dist/db/migrations.d.ts +2 -0
- package/dist/db/{schema.js → migrations.js} +90 -43
- package/dist/db/pluginRegistry.d.ts +9 -0
- package/dist/db/pluginRegistry.js +19 -0
- package/dist/db/repositories/EdgeRepository.d.ts +5 -0
- package/dist/db/repositories/EdgeRepository.js +7 -0
- package/dist/db/repositories/MetricsRepository.d.ts +13 -8
- package/dist/db/repositories/MetricsRepository.js +14 -6
- package/dist/db/repositories/PageRepository.d.ts +5 -3
- package/dist/db/repositories/PageRepository.js +68 -17
- package/dist/db/repositories/SiteRepository.d.ts +6 -0
- package/dist/db/repositories/SiteRepository.js +4 -0
- package/dist/db/repositories/SnapshotRepository.d.ts +12 -5
- package/dist/db/repositories/SnapshotRepository.js +48 -10
- package/dist/db/reset.d.ts +9 -0
- package/dist/db/reset.js +32 -0
- package/dist/db/statements.d.ts +12 -0
- package/dist/db/statements.js +40 -0
- package/dist/diff/compare.d.ts +0 -5
- package/dist/diff/compare.js +0 -12
- package/dist/diff/service.d.ts +16 -0
- package/dist/diff/service.js +41 -0
- package/dist/domain/index.d.ts +4 -0
- package/dist/domain/index.js +4 -0
- package/dist/events.d.ts +8 -0
- package/dist/graph/graph.d.ts +20 -42
- package/dist/graph/graph.js +12 -16
- package/dist/graph/hits.d.ts +23 -0
- package/dist/graph/hits.js +111 -0
- package/dist/graph/metrics.d.ts +0 -4
- package/dist/graph/metrics.js +19 -15
- package/dist/graph/pagerank.d.ts +17 -4
- package/dist/graph/pagerank.js +126 -93
- package/dist/index.d.ts +27 -9
- package/dist/index.js +27 -9
- package/dist/lock/lockManager.d.ts +1 -0
- package/dist/lock/lockManager.js +15 -0
- package/dist/plugin-system/plugin-cli.d.ts +10 -0
- package/dist/plugin-system/plugin-cli.js +31 -0
- package/dist/plugin-system/plugin-config.d.ts +16 -0
- package/dist/plugin-system/plugin-config.js +36 -0
- package/dist/plugin-system/plugin-loader.d.ts +17 -0
- package/dist/plugin-system/plugin-loader.js +122 -0
- package/dist/plugin-system/plugin-registry.d.ts +25 -0
- package/dist/plugin-system/plugin-registry.js +167 -0
- package/dist/plugin-system/plugin-types.d.ts +205 -0
- package/dist/plugin-system/plugin-types.js +1 -0
- package/dist/ports/index.d.ts +9 -0
- package/dist/ports/index.js +1 -0
- package/dist/report/export.d.ts +3 -0
- package/dist/report/export.js +81 -0
- package/dist/report/insight.d.ts +27 -0
- package/dist/report/insight.js +103 -0
- package/dist/scoring/health.d.ts +17 -11
- package/dist/scoring/health.js +183 -140
- package/dist/utils/chalk.d.ts +6 -0
- package/dist/utils/chalk.js +41 -0
- package/dist/utils/secureConfig.d.ts +23 -0
- package/dist/utils/secureConfig.js +128 -0
- package/package.json +10 -4
- package/CHANGELOG.md +0 -13
- package/dist/db/schema.d.ts +0 -2
- package/dist/graph/cluster.d.ts +0 -6
- package/dist/graph/cluster.js +0 -221
- package/dist/graph/duplicate.d.ts +0 -10
- package/dist/graph/duplicate.js +0 -302
- package/dist/scoring/hits.d.ts +0 -10
- package/dist/scoring/hits.js +0 -131
- package/scripts/copy-assets.js +0 -37
- package/src/analysis/analysis_list.html +0 -35
- package/src/analysis/analysis_page.html +0 -123
- package/src/analysis/analyze.ts +0 -505
- package/src/analysis/content.ts +0 -62
- package/src/analysis/images.ts +0 -28
- package/src/analysis/links.ts +0 -41
- package/src/analysis/scoring.ts +0 -66
- package/src/analysis/seo.ts +0 -82
- package/src/analysis/structuredData.ts +0 -62
- package/src/analysis/templates.ts +0 -9
- package/src/audit/dns.ts +0 -49
- package/src/audit/headers.ts +0 -98
- package/src/audit/index.ts +0 -66
- package/src/audit/scoring.ts +0 -232
- package/src/audit/transport.ts +0 -258
- package/src/audit/types.ts +0 -102
- package/src/core/network/proxyAdapter.ts +0 -21
- package/src/core/network/rateLimiter.ts +0 -39
- package/src/core/network/redirectController.ts +0 -47
- package/src/core/network/responseLimiter.ts +0 -34
- package/src/core/network/retryPolicy.ts +0 -57
- package/src/core/scope/domainFilter.ts +0 -45
- package/src/core/scope/scopeManager.ts +0 -52
- package/src/core/scope/subdomainPolicy.ts +0 -39
- package/src/core/security/ipGuard.ts +0 -171
- package/src/crawler/crawl.ts +0 -9
- package/src/crawler/crawler.ts +0 -601
- package/src/crawler/extract.ts +0 -39
- package/src/crawler/fetcher.ts +0 -251
- package/src/crawler/metricsRunner.ts +0 -137
- package/src/crawler/normalize.ts +0 -108
- package/src/crawler/parser.ts +0 -190
- package/src/crawler/sitemap.ts +0 -76
- package/src/crawler/trap.ts +0 -96
- package/src/db/graphLoader.ts +0 -135
- package/src/db/index.ts +0 -75
- package/src/db/repositories/EdgeRepository.ts +0 -43
- package/src/db/repositories/MetricsRepository.ts +0 -63
- package/src/db/repositories/PageRepository.ts +0 -228
- package/src/db/repositories/SiteRepository.ts +0 -43
- package/src/db/repositories/SnapshotRepository.ts +0 -99
- package/src/db/schema.ts +0 -177
- package/src/diff/compare.ts +0 -84
- package/src/events.ts +0 -16
- package/src/graph/cluster.ts +0 -246
- package/src/graph/duplicate.ts +0 -350
- package/src/graph/graph.ts +0 -192
- package/src/graph/metrics.ts +0 -125
- package/src/graph/pagerank.ts +0 -126
- package/src/graph/simhash.ts +0 -76
- package/src/index.ts +0 -33
- package/src/lock/hashKey.ts +0 -51
- package/src/lock/lockManager.ts +0 -132
- package/src/lock/pidCheck.ts +0 -13
- package/src/report/crawl.html +0 -879
- package/src/report/crawlExport.ts +0 -58
- package/src/report/crawl_template.ts +0 -9
- package/src/report/html.ts +0 -27
- package/src/scoring/health.ts +0 -241
- package/src/scoring/hits.ts +0 -153
- package/src/scoring/orphanSeverity.ts +0 -176
- package/src/utils/version.ts +0 -18
- package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
- package/tests/analysis.unit.test.ts +0 -142
- package/tests/analyze.integration.test.ts +0 -133
- package/tests/analyze_markdown.test.ts +0 -98
- package/tests/audit/audit.test.ts +0 -101
- package/tests/audit/dns.test.ts +0 -31
- package/tests/audit/headers.test.ts +0 -45
- package/tests/audit/scoring.test.ts +0 -133
- package/tests/audit/security.test.ts +0 -12
- package/tests/audit/transport.test.ts +0 -111
- package/tests/clustering.test.ts +0 -118
- package/tests/clustering_risk.test.ts +0 -118
- package/tests/crawler.test.ts +0 -364
- package/tests/db/index.test.ts +0 -134
- package/tests/db/repositories.test.ts +0 -115
- package/tests/db.test.ts +0 -159
- package/tests/db_repos.test.ts +0 -72
- package/tests/diff.test.ts +0 -67
- package/tests/duplicate.test.ts +0 -110
- package/tests/extract.test.ts +0 -86
- package/tests/fetcher.test.ts +0 -110
- package/tests/fetcher_safety.test.ts +0 -91
- package/tests/fixtures/analyze-crawl.json +0 -26
- package/tests/graph/graph.test.ts +0 -100
- package/tests/graphLoader.test.ts +0 -124
- package/tests/hits.test.ts +0 -134
- package/tests/html_report.test.ts +0 -59
- package/tests/ipGuard.test.ts +0 -73
- package/tests/lock/lockManager.test.ts +0 -198
- package/tests/metrics.test.ts +0 -196
- package/tests/normalize.test.ts +0 -88
- package/tests/orphanSeverity.test.ts +0 -160
- package/tests/pagerank.test.ts +0 -98
- package/tests/parser.test.ts +0 -117
- package/tests/proxy_safety.test.ts +0 -57
- package/tests/redirect_safety.test.ts +0 -77
- package/tests/renderAnalysisCsv.test.ts +0 -183
- package/tests/safety.test.ts +0 -126
- package/tests/scope.test.ts +0 -84
- package/tests/scoring.test.ts +0 -60
- package/tests/sitemap.test.ts +0 -100
- package/tests/soft404.test.ts +0 -41
- package/tests/ssrf_fix.test.ts +0 -69
- package/tests/trap.test.ts +0 -39
- package/tests/visualization_data.test.ts +0 -46
- package/tsconfig.json +0 -11
package/src/lock/lockManager.ts
DELETED
|
@@ -1,132 +0,0 @@
|
|
|
1
|
-
import fs from 'node:fs/promises';
|
|
2
|
-
import { existsSync, unlinkSync, readFileSync } from 'node:fs';
|
|
3
|
-
import path from 'node:path';
|
|
4
|
-
import os from 'node:os';
|
|
5
|
-
import { generateLockKey } from './hashKey.js';
|
|
6
|
-
import { isPidAlive } from './pidCheck.js';
|
|
7
|
-
import { EngineContext } from '../events.js';
|
|
8
|
-
|
|
9
|
-
interface LockData {
|
|
10
|
-
pid: number;
|
|
11
|
-
startedAt: number;
|
|
12
|
-
command: string;
|
|
13
|
-
target: string;
|
|
14
|
-
args: any;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
export class LockManager {
|
|
18
|
-
private static lockFilePath: string | null = null;
|
|
19
|
-
private static context: EngineContext | null = null;
|
|
20
|
-
|
|
21
|
-
private static get lockDir(): string {
|
|
22
|
-
return path.join(os.homedir(), '.crawlith', 'locks');
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
static async acquireLock(commandName: string, targetUrl: string, options: any, context?: EngineContext, force: boolean = false): Promise<void> {
|
|
26
|
-
this.context = context || null;
|
|
27
|
-
const lockHash = generateLockKey(commandName, targetUrl, options);
|
|
28
|
-
|
|
29
|
-
// Ensure lock directory exists
|
|
30
|
-
await fs.mkdir(this.lockDir, { recursive: true });
|
|
31
|
-
|
|
32
|
-
const lockPath = path.join(this.lockDir, `${lockHash}.lock`);
|
|
33
|
-
|
|
34
|
-
// Check existing lock
|
|
35
|
-
if (existsSync(lockPath)) {
|
|
36
|
-
let isStale: boolean;
|
|
37
|
-
let pid: number;
|
|
38
|
-
|
|
39
|
-
try {
|
|
40
|
-
const lockContent = readFileSync(lockPath, 'utf-8');
|
|
41
|
-
const lockData = JSON.parse(lockContent);
|
|
42
|
-
pid = lockData.pid;
|
|
43
|
-
isStale = !isPidAlive(pid);
|
|
44
|
-
} catch (_e) {
|
|
45
|
-
// Corrupted -> Treat as stale
|
|
46
|
-
isStale = true;
|
|
47
|
-
pid = 0;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
if (force) {
|
|
51
|
-
this.log('warn', 'Force mode enabled. Overriding existing lock.');
|
|
52
|
-
try { unlinkSync(lockPath); } catch { /* ignore */ }
|
|
53
|
-
} else {
|
|
54
|
-
if (!isStale) {
|
|
55
|
-
this.log('error', `Crawlith: command already running for ${targetUrl} (PID ${pid})`);
|
|
56
|
-
process.exit(1);
|
|
57
|
-
} else {
|
|
58
|
-
this.log('info', 'Detected stale lock. Continuing execution.');
|
|
59
|
-
try { unlinkSync(lockPath); } catch { /* ignore */ }
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
// Create new lock
|
|
65
|
-
try {
|
|
66
|
-
const data: LockData = {
|
|
67
|
-
pid: process.pid,
|
|
68
|
-
startedAt: Date.now(),
|
|
69
|
-
command: commandName,
|
|
70
|
-
target: targetUrl,
|
|
71
|
-
args: options
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
// 'wx' flag ensures atomic creation, failing if file exists
|
|
75
|
-
await fs.writeFile(lockPath, JSON.stringify(data, null, 2), { flag: 'wx', encoding: 'utf-8' });
|
|
76
|
-
|
|
77
|
-
this.lockFilePath = lockPath;
|
|
78
|
-
this.registerHandlers();
|
|
79
|
-
} catch (error: any) {
|
|
80
|
-
if (error.code === 'EEXIST') {
|
|
81
|
-
this.log('error', `Crawlith: command already running for ${targetUrl} (Race condition)`);
|
|
82
|
-
process.exit(1);
|
|
83
|
-
}
|
|
84
|
-
throw error;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
static releaseLock(): void {
|
|
89
|
-
if (this.lockFilePath && existsSync(this.lockFilePath)) {
|
|
90
|
-
try {
|
|
91
|
-
unlinkSync(this.lockFilePath);
|
|
92
|
-
this.lockFilePath = null;
|
|
93
|
-
} catch (_error) {
|
|
94
|
-
// Ignore errors during cleanup
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
private static log(type: 'info' | 'warn' | 'error', message: string, error?: unknown) {
|
|
100
|
-
if (this.context) {
|
|
101
|
-
this.context.emit({ type, message, error });
|
|
102
|
-
} else {
|
|
103
|
-
// Fallback for legacy usage or when no context provided
|
|
104
|
-
if (type === 'error') console.error(message, error || '');
|
|
105
|
-
else if (type === 'warn') console.warn(message);
|
|
106
|
-
else console.log(message);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
private static registerHandlers() {
|
|
111
|
-
// Ensure cleanup only happens once
|
|
112
|
-
const cleanup = () => {
|
|
113
|
-
this.releaseLock();
|
|
114
|
-
};
|
|
115
|
-
|
|
116
|
-
process.on('exit', cleanup);
|
|
117
|
-
|
|
118
|
-
process.on('SIGINT', () => {
|
|
119
|
-
cleanup();
|
|
120
|
-
process.exit(130);
|
|
121
|
-
});
|
|
122
|
-
process.on('SIGTERM', () => {
|
|
123
|
-
cleanup();
|
|
124
|
-
process.exit(143);
|
|
125
|
-
});
|
|
126
|
-
process.on('uncaughtException', (err) => {
|
|
127
|
-
this.log('error', 'Uncaught Exception', err);
|
|
128
|
-
cleanup();
|
|
129
|
-
process.exit(1);
|
|
130
|
-
});
|
|
131
|
-
}
|
|
132
|
-
}
|
package/src/lock/pidCheck.ts
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
export function isPidAlive(pid: number): boolean {
|
|
2
|
-
try {
|
|
3
|
-
process.kill(pid, 0);
|
|
4
|
-
return true;
|
|
5
|
-
} catch (error: any) {
|
|
6
|
-
if (error.code === 'EPERM') {
|
|
7
|
-
// Process exists but no permission to signal -> Alive
|
|
8
|
-
return true;
|
|
9
|
-
}
|
|
10
|
-
// Process does not exist (ESRCH) or other error
|
|
11
|
-
return false;
|
|
12
|
-
}
|
|
13
|
-
}
|