@crawlith/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/dist/analysis/analyze.d.ts +70 -0
  3. package/dist/analysis/analyze.js +436 -0
  4. package/dist/analysis/content.d.ts +12 -0
  5. package/dist/analysis/content.js +33 -0
  6. package/dist/analysis/images.d.ts +6 -0
  7. package/dist/analysis/images.js +18 -0
  8. package/dist/analysis/links.d.ts +7 -0
  9. package/dist/analysis/links.js +30 -0
  10. package/dist/analysis/scoring.d.ts +9 -0
  11. package/dist/analysis/scoring.js +42 -0
  12. package/dist/analysis/seo.d.ts +15 -0
  13. package/dist/analysis/seo.js +64 -0
  14. package/dist/analysis/structuredData.d.ts +6 -0
  15. package/dist/analysis/structuredData.js +51 -0
  16. package/dist/audit/dns.d.ts +2 -0
  17. package/dist/audit/dns.js +42 -0
  18. package/dist/audit/headers.d.ts +2 -0
  19. package/dist/audit/headers.js +95 -0
  20. package/dist/audit/index.d.ts +2 -0
  21. package/dist/audit/index.js +50 -0
  22. package/dist/audit/scoring.d.ts +14 -0
  23. package/dist/audit/scoring.js +214 -0
  24. package/dist/audit/transport.d.ts +6 -0
  25. package/dist/audit/transport.js +207 -0
  26. package/dist/audit/types.d.ts +88 -0
  27. package/dist/audit/types.js +1 -0
  28. package/dist/core/network/proxyAdapter.d.ts +6 -0
  29. package/dist/core/network/proxyAdapter.js +19 -0
  30. package/dist/core/network/rateLimiter.d.ts +6 -0
  31. package/dist/core/network/rateLimiter.js +31 -0
  32. package/dist/core/network/redirectController.d.ts +13 -0
  33. package/dist/core/network/redirectController.js +41 -0
  34. package/dist/core/network/responseLimiter.d.ts +4 -0
  35. package/dist/core/network/responseLimiter.js +26 -0
  36. package/dist/core/network/retryPolicy.d.ts +10 -0
  37. package/dist/core/network/retryPolicy.js +41 -0
  38. package/dist/core/scope/domainFilter.d.ts +11 -0
  39. package/dist/core/scope/domainFilter.js +40 -0
  40. package/dist/core/scope/scopeManager.d.ts +14 -0
  41. package/dist/core/scope/scopeManager.js +39 -0
  42. package/dist/core/scope/subdomainPolicy.d.ts +6 -0
  43. package/dist/core/scope/subdomainPolicy.js +35 -0
  44. package/dist/core/security/ipGuard.d.ts +11 -0
  45. package/dist/core/security/ipGuard.js +84 -0
  46. package/dist/crawler/crawl.d.ts +22 -0
  47. package/dist/crawler/crawl.js +336 -0
  48. package/dist/crawler/extract.d.ts +5 -0
  49. package/dist/crawler/extract.js +33 -0
  50. package/dist/crawler/fetcher.d.ts +40 -0
  51. package/dist/crawler/fetcher.js +161 -0
  52. package/dist/crawler/metricsRunner.d.ts +1 -0
  53. package/dist/crawler/metricsRunner.js +108 -0
  54. package/dist/crawler/normalize.d.ts +7 -0
  55. package/dist/crawler/normalize.js +88 -0
  56. package/dist/crawler/parser.d.ts +22 -0
  57. package/dist/crawler/parser.js +158 -0
  58. package/dist/crawler/sitemap.d.ts +8 -0
  59. package/dist/crawler/sitemap.js +70 -0
  60. package/dist/crawler/trap.d.ts +24 -0
  61. package/dist/crawler/trap.js +78 -0
  62. package/dist/db/graphLoader.d.ts +2 -0
  63. package/dist/db/graphLoader.js +96 -0
  64. package/dist/db/index.d.ts +4 -0
  65. package/dist/db/index.js +61 -0
  66. package/dist/db/repositories/EdgeRepository.d.ts +16 -0
  67. package/dist/db/repositories/EdgeRepository.js +17 -0
  68. package/dist/db/repositories/MetricsRepository.d.ts +26 -0
  69. package/dist/db/repositories/MetricsRepository.js +27 -0
  70. package/dist/db/repositories/PageRepository.d.ts +47 -0
  71. package/dist/db/repositories/PageRepository.js +93 -0
  72. package/dist/db/repositories/SiteRepository.d.ts +15 -0
  73. package/dist/db/repositories/SiteRepository.js +22 -0
  74. package/dist/db/repositories/SnapshotRepository.d.ts +22 -0
  75. package/dist/db/repositories/SnapshotRepository.js +55 -0
  76. package/dist/db/schema.d.ts +2 -0
  77. package/dist/db/schema.js +169 -0
  78. package/dist/diff/compare.d.ts +26 -0
  79. package/dist/diff/compare.js +64 -0
  80. package/dist/graph/cluster.d.ts +6 -0
  81. package/dist/graph/cluster.js +173 -0
  82. package/dist/graph/duplicate.d.ts +10 -0
  83. package/dist/graph/duplicate.js +251 -0
  84. package/dist/graph/graph.d.ts +103 -0
  85. package/dist/graph/graph.js +106 -0
  86. package/dist/graph/metrics.d.ts +29 -0
  87. package/dist/graph/metrics.js +74 -0
  88. package/dist/graph/pagerank.d.ts +12 -0
  89. package/dist/graph/pagerank.js +102 -0
  90. package/dist/graph/simhash.d.ts +17 -0
  91. package/dist/graph/simhash.js +56 -0
  92. package/dist/index.d.ts +30 -0
  93. package/dist/index.js +30 -0
  94. package/dist/lock/hashKey.d.ts +1 -0
  95. package/dist/lock/hashKey.js +44 -0
  96. package/dist/lock/lockManager.d.ts +7 -0
  97. package/dist/lock/lockManager.js +112 -0
  98. package/dist/lock/pidCheck.d.ts +1 -0
  99. package/dist/lock/pidCheck.js +14 -0
  100. package/dist/report/html.d.ts +2 -0
  101. package/dist/report/html.js +223 -0
  102. package/dist/report/sitegraphExport.d.ts +3 -0
  103. package/dist/report/sitegraphExport.js +52 -0
  104. package/dist/report/sitegraph_template.d.ts +1 -0
  105. package/dist/report/sitegraph_template.js +630 -0
  106. package/dist/scoring/hits.d.ts +9 -0
  107. package/dist/scoring/hits.js +111 -0
  108. package/dist/scoring/orphanSeverity.d.ts +39 -0
  109. package/dist/scoring/orphanSeverity.js +125 -0
  110. package/dist/utils/version.d.ts +2 -0
  111. package/dist/utils/version.js +15 -0
  112. package/package.json +33 -0
  113. package/src/analysis/analyze.ts +548 -0
  114. package/src/analysis/content.ts +62 -0
  115. package/src/analysis/images.ts +28 -0
  116. package/src/analysis/links.ts +41 -0
  117. package/src/analysis/scoring.ts +59 -0
  118. package/src/analysis/seo.ts +82 -0
  119. package/src/analysis/structuredData.ts +62 -0
  120. package/src/audit/dns.ts +49 -0
  121. package/src/audit/headers.ts +98 -0
  122. package/src/audit/index.ts +66 -0
  123. package/src/audit/scoring.ts +232 -0
  124. package/src/audit/transport.ts +258 -0
  125. package/src/audit/types.ts +102 -0
  126. package/src/core/network/proxyAdapter.ts +21 -0
  127. package/src/core/network/rateLimiter.ts +39 -0
  128. package/src/core/network/redirectController.ts +47 -0
  129. package/src/core/network/responseLimiter.ts +34 -0
  130. package/src/core/network/retryPolicy.ts +57 -0
  131. package/src/core/scope/domainFilter.ts +45 -0
  132. package/src/core/scope/scopeManager.ts +52 -0
  133. package/src/core/scope/subdomainPolicy.ts +39 -0
  134. package/src/core/security/ipGuard.ts +92 -0
  135. package/src/crawler/crawl.ts +382 -0
  136. package/src/crawler/extract.ts +34 -0
  137. package/src/crawler/fetcher.ts +233 -0
  138. package/src/crawler/metricsRunner.ts +124 -0
  139. package/src/crawler/normalize.ts +108 -0
  140. package/src/crawler/parser.ts +190 -0
  141. package/src/crawler/sitemap.ts +73 -0
  142. package/src/crawler/trap.ts +96 -0
  143. package/src/db/graphLoader.ts +105 -0
  144. package/src/db/index.ts +70 -0
  145. package/src/db/repositories/EdgeRepository.ts +29 -0
  146. package/src/db/repositories/MetricsRepository.ts +49 -0
  147. package/src/db/repositories/PageRepository.ts +128 -0
  148. package/src/db/repositories/SiteRepository.ts +32 -0
  149. package/src/db/repositories/SnapshotRepository.ts +74 -0
  150. package/src/db/schema.ts +177 -0
  151. package/src/diff/compare.ts +84 -0
  152. package/src/graph/cluster.ts +192 -0
  153. package/src/graph/duplicate.ts +286 -0
  154. package/src/graph/graph.ts +172 -0
  155. package/src/graph/metrics.ts +110 -0
  156. package/src/graph/pagerank.ts +125 -0
  157. package/src/graph/simhash.ts +61 -0
  158. package/src/index.ts +30 -0
  159. package/src/lock/hashKey.ts +51 -0
  160. package/src/lock/lockManager.ts +124 -0
  161. package/src/lock/pidCheck.ts +13 -0
  162. package/src/report/html.ts +227 -0
  163. package/src/report/sitegraphExport.ts +58 -0
  164. package/src/report/sitegraph_template.ts +630 -0
  165. package/src/scoring/hits.ts +131 -0
  166. package/src/scoring/orphanSeverity.ts +176 -0
  167. package/src/utils/version.ts +18 -0
  168. package/tests/__snapshots__/orphanSeverity.test.ts.snap +49 -0
  169. package/tests/analysis.unit.test.ts +98 -0
  170. package/tests/analyze.integration.test.ts +98 -0
  171. package/tests/audit/dns.test.ts +31 -0
  172. package/tests/audit/headers.test.ts +45 -0
  173. package/tests/audit/scoring.test.ts +133 -0
  174. package/tests/audit/security.test.ts +12 -0
  175. package/tests/audit/transport.test.ts +112 -0
  176. package/tests/clustering.test.ts +118 -0
  177. package/tests/crawler.test.ts +358 -0
  178. package/tests/db.test.ts +159 -0
  179. package/tests/diff.test.ts +67 -0
  180. package/tests/duplicate.test.ts +110 -0
  181. package/tests/fetcher.test.ts +106 -0
  182. package/tests/fetcher_safety.test.ts +85 -0
  183. package/tests/fixtures/analyze-crawl.json +26 -0
  184. package/tests/hits.test.ts +134 -0
  185. package/tests/html_report.test.ts +58 -0
  186. package/tests/lock/lockManager.test.ts +138 -0
  187. package/tests/metrics.test.ts +196 -0
  188. package/tests/normalize.test.ts +101 -0
  189. package/tests/orphanSeverity.test.ts +160 -0
  190. package/tests/pagerank.test.ts +98 -0
  191. package/tests/parser.test.ts +117 -0
  192. package/tests/proxy_safety.test.ts +57 -0
  193. package/tests/redirect_safety.test.ts +73 -0
  194. package/tests/safety.test.ts +114 -0
  195. package/tests/scope.test.ts +66 -0
  196. package/tests/scoring.test.ts +59 -0
  197. package/tests/sitemap.test.ts +88 -0
  198. package/tests/soft404.test.ts +41 -0
  199. package/tests/trap.test.ts +39 -0
  200. package/tests/visualization_data.test.ts +46 -0
  201. package/tsconfig.json +11 -0
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Computes Hub and Authority scores using the HITS algorithm.
3
+ * Operates purely on the internal link graph.
4
+ */
5
+ export function computeHITS(graph, options = {}) {
6
+ const iterations = options.iterations || 20;
7
+ const nodes = graph.getNodes();
8
+ // 1. Filter eligible nodes
9
+ // Eligibility: status 200, non-redirect (redirectChain empty), not noindex, non-external
10
+ const eligibleNodes = nodes.filter(n => n.status === 200 &&
11
+ (!n.redirectChain || n.redirectChain.length === 0) &&
12
+ !n.noindex);
13
+ if (eligibleNodes.length === 0)
14
+ return;
15
+ const urlToNode = new Map();
16
+ for (const node of eligibleNodes) {
17
+ urlToNode.set(node.url, node);
18
+ // 2. Initialization
19
+ node.authorityScore = 1.0;
20
+ node.hubScore = 1.0;
21
+ }
22
+ const allEdges = graph.getEdges();
23
+ // Filter edges: internal links only (both source and target must be in eligibleNodes), no self-links
24
+ const eligibleEdges = allEdges.filter(e => e.source !== e.target &&
25
+ urlToNode.has(e.source) &&
26
+ urlToNode.has(e.target));
27
+ // Group edges for efficient iteration
28
+ const incoming = new Map();
29
+ const outgoing = new Map();
30
+ for (const edge of eligibleEdges) {
31
+ if (!incoming.has(edge.target))
32
+ incoming.set(edge.target, []);
33
+ incoming.get(edge.target).push({ source: edge.source, weight: edge.weight });
34
+ if (!outgoing.has(edge.source))
35
+ outgoing.set(edge.source, []);
36
+ outgoing.get(edge.source).push({ target: edge.target, weight: edge.weight });
37
+ }
38
+ // 3. Iteration
39
+ for (let i = 0; i < iterations; i++) {
40
+ // Update Authorities
41
+ let normAuth = 0;
42
+ for (const node of eligibleNodes) {
43
+ const inLinks = incoming.get(node.url) || [];
44
+ let newAuth = 0;
45
+ for (const link of inLinks) {
46
+ const sourceNode = urlToNode.get(link.source);
47
+ newAuth += (sourceNode.hubScore || 0) * link.weight;
48
+ }
49
+ node.authorityScore = newAuth;
50
+ normAuth += newAuth * newAuth;
51
+ }
52
+ // Normalize Authorities (L2 norm)
53
+ normAuth = Math.sqrt(normAuth);
54
+ if (normAuth > 0) {
55
+ for (const node of eligibleNodes) {
56
+ node.authorityScore = (node.authorityScore || 0) / normAuth;
57
+ }
58
+ }
59
+ // Update Hubs
60
+ let normHub = 0;
61
+ for (const node of eligibleNodes) {
62
+ const outLinks = outgoing.get(node.url) || [];
63
+ let newHub = 0;
64
+ for (const link of outLinks) {
65
+ const targetNode = urlToNode.get(link.target);
66
+ newHub += (targetNode.authorityScore || 0) * link.weight;
67
+ }
68
+ node.hubScore = newHub;
69
+ normHub += newHub * newHub;
70
+ }
71
+ // Normalize Hubs (L2 norm)
72
+ normHub = Math.sqrt(normHub);
73
+ if (normHub > 0) {
74
+ for (const node of eligibleNodes) {
75
+ node.hubScore = (node.hubScore || 0) / normHub;
76
+ }
77
+ }
78
+ }
79
+ // 4. Classification Logic
80
+ classifyLinkRoles(eligibleNodes);
81
+ }
82
+ function classifyLinkRoles(nodes) {
83
+ if (nodes.length === 0)
84
+ return;
85
+ const authScores = nodes.map(n => n.authorityScore || 0).sort((a, b) => a - b);
86
+ const hubScores = nodes.map(n => n.hubScore || 0).sort((a, b) => a - b);
87
+ // Use 75th percentile as "high" threshold
88
+ const medianAuth = authScores[Math.floor(authScores.length / 2)];
89
+ const medianHub = hubScores[Math.floor(hubScores.length / 2)];
90
+ for (const node of nodes) {
91
+ const auth = node.authorityScore || 0;
92
+ const hub = node.hubScore || 0;
93
+ const isHighAuth = auth > medianAuth && auth > 0.0001;
94
+ const isHighHub = hub > medianHub && hub > 0.0001;
95
+ if (isHighAuth && isHighHub) {
96
+ node.linkRole = 'power';
97
+ }
98
+ else if (isHighAuth) {
99
+ node.linkRole = 'authority';
100
+ }
101
+ else if (isHighHub) {
102
+ node.linkRole = 'hub';
103
+ }
104
+ else if (auth > 0.0001 && hub > 0.0001) {
105
+ node.linkRole = 'balanced';
106
+ }
107
+ else {
108
+ node.linkRole = 'peripheral';
109
+ }
110
+ }
111
+ }
@@ -0,0 +1,39 @@
1
+ export type OrphanType = 'hard' | 'near' | 'soft' | 'crawl-only';
2
+ export type ImpactLevel = 'low' | 'medium' | 'high' | 'critical';
3
+ export interface SitegraphNode {
4
+ url: string;
5
+ depth: number;
6
+ inLinks: number;
7
+ outLinks: number;
8
+ status: number;
9
+ discoveredViaSitemap?: boolean;
10
+ robotsExcluded?: boolean;
11
+ canonicalUrl?: string;
12
+ isHomepage?: boolean;
13
+ wordCount?: number;
14
+ hasStructuredData?: boolean;
15
+ pageType?: string;
16
+ noindex?: boolean;
17
+ duplicateContent?: boolean;
18
+ isProductOrCommercial?: boolean;
19
+ }
20
+ export interface SitegraphEdge {
21
+ source: string;
22
+ target: string;
23
+ }
24
+ export interface OrphanScoringOptions {
25
+ enabled: boolean;
26
+ severityEnabled: boolean;
27
+ includeSoftOrphans: boolean;
28
+ minInbound: number;
29
+ rootUrl?: string;
30
+ }
31
+ export type AnnotatedNode = SitegraphNode & {
32
+ orphan: boolean;
33
+ orphanType?: OrphanType;
34
+ orphanSeverity?: number;
35
+ impactLevel?: ImpactLevel;
36
+ };
37
+ export declare function mapImpactLevel(score: number): ImpactLevel;
38
+ export declare function calculateOrphanSeverity(orphanType: OrphanType, node: SitegraphNode): number;
39
+ export declare function annotateOrphans(nodes: SitegraphNode[], edges: SitegraphEdge[], options: OrphanScoringOptions): AnnotatedNode[];
@@ -0,0 +1,125 @@
1
+ const LOW_VALUE_PATTERNS = [
2
+ /[?&](page|p)=\d+/i,
3
+ /\/(page|tag|tags|category|categories)\//i,
4
+ /[?&](q|query|search|filter|sort)=/i,
5
+ /\/search(\/|\?|$)/i
6
+ ];
7
+ function isLowValuePage(node) {
8
+ const type = (node.pageType || '').toLowerCase();
9
+ if (['pagination', 'tag', 'category', 'filter', 'search', 'archive'].includes(type)) {
10
+ return true;
11
+ }
12
+ if (node.noindex) {
13
+ return true;
14
+ }
15
+ return LOW_VALUE_PATTERNS.some((pattern) => pattern.test(node.url));
16
+ }
17
+ function clampScore(score) {
18
+ return Math.max(0, Math.min(100, Math.round(score)));
19
+ }
20
+ export function mapImpactLevel(score) {
21
+ if (score <= 39)
22
+ return 'low';
23
+ if (score <= 69)
24
+ return 'medium';
25
+ if (score <= 89)
26
+ return 'high';
27
+ return 'critical';
28
+ }
29
+ export function calculateOrphanSeverity(orphanType, node) {
30
+ let score = 0;
31
+ switch (orphanType) {
32
+ case 'hard':
33
+ score = 90;
34
+ break;
35
+ case 'crawl-only':
36
+ score = 80;
37
+ break;
38
+ case 'near':
39
+ score = node.inLinks <= 1 ? 70 : 60;
40
+ break;
41
+ case 'soft':
42
+ score = 50;
43
+ break;
44
+ }
45
+ let positiveModifier = 0;
46
+ if ((node.wordCount || 0) > 800)
47
+ positiveModifier += 10;
48
+ if (node.hasStructuredData)
49
+ positiveModifier += 10;
50
+ if (node.depth <= 2)
51
+ positiveModifier += 10;
52
+ if (node.isProductOrCommercial)
53
+ positiveModifier += 10;
54
+ positiveModifier = Math.min(20, positiveModifier);
55
+ let negativeModifier = 0;
56
+ if ((node.wordCount || 0) > 0 && (node.wordCount || 0) < 300)
57
+ negativeModifier += 20;
58
+ if (node.noindex)
59
+ negativeModifier += 20;
60
+ if (node.duplicateContent)
61
+ negativeModifier += 20;
62
+ if ((node.pageType || '').toLowerCase() === 'archive' || (node.pageType || '').toLowerCase() === 'pagination')
63
+ negativeModifier += 20;
64
+ negativeModifier = Math.min(20, negativeModifier);
65
+ score += positiveModifier;
66
+ score -= negativeModifier;
67
+ return clampScore(score);
68
+ }
69
+ function consolidateInboundByCanonical(nodes) {
70
+ const canonicalInbound = new Map();
71
+ for (const node of nodes) {
72
+ const canonical = node.canonicalUrl || node.url;
73
+ canonicalInbound.set(canonical, (canonicalInbound.get(canonical) || 0) + node.inLinks);
74
+ }
75
+ return canonicalInbound;
76
+ }
77
+ export function annotateOrphans(nodes, edges, options) {
78
+ if (!options.enabled) {
79
+ return nodes.map((node) => ({ ...node, orphan: false }));
80
+ }
81
+ const canonicalInbound = consolidateInboundByCanonical(nodes);
82
+ const nodeByUrl = new Map(nodes.map((node) => [node.url, node]));
83
+ return nodes.map((node) => {
84
+ const isHomepage = node.isHomepage || (options.rootUrl ? node.url === options.rootUrl : node.depth === 0);
85
+ if (isHomepage || node.robotsExcluded) {
86
+ return { ...node, orphan: false };
87
+ }
88
+ const canonical = node.canonicalUrl || node.url;
89
+ const inbound = canonicalInbound.get(canonical) || 0;
90
+ let orphanType;
91
+ if (inbound === 0) {
92
+ orphanType = node.discoveredViaSitemap ? 'crawl-only' : 'hard';
93
+ }
94
+ else if (inbound <= options.minInbound) {
95
+ orphanType = 'near';
96
+ }
97
+ if (!orphanType && options.includeSoftOrphans && inbound > 0) {
98
+ const inboundSources = edges
99
+ .filter((edge) => edge.target === node.url)
100
+ .map((edge) => nodeByUrl.get(edge.source))
101
+ .filter((source) => Boolean(source));
102
+ if (inboundSources.length > 0 && inboundSources.every((source) => isLowValuePage(source))) {
103
+ orphanType = 'soft';
104
+ }
105
+ }
106
+ if (!orphanType) {
107
+ return { ...node, orphan: false };
108
+ }
109
+ if (!options.severityEnabled) {
110
+ return {
111
+ ...node,
112
+ orphan: true,
113
+ orphanType
114
+ };
115
+ }
116
+ const orphanSeverity = calculateOrphanSeverity(orphanType, { ...node, inLinks: inbound });
117
+ return {
118
+ ...node,
119
+ orphan: true,
120
+ orphanType,
121
+ orphanSeverity,
122
+ impactLevel: mapImpactLevel(orphanSeverity)
123
+ };
124
+ });
125
+ }
@@ -0,0 +1,2 @@
1
+ declare let version: string;
2
+ export { version };
@@ -0,0 +1,15 @@
1
+ import { readFileSync } from 'node:fs';
2
+ import { fileURLToPath } from 'node:url';
3
+ import { dirname, join } from 'node:path';
4
+ const __filename = fileURLToPath(import.meta.url);
5
+ const __dirname = dirname(__filename);
6
+ let version = '0.0.1';
7
+ try {
8
+ const pkgPath = join(__dirname, '../../package.json');
9
+ const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'));
10
+ version = pkg.version;
11
+ }
12
+ catch {
13
+ // Fallback to internal default
14
+ }
15
+ export { version };
package/package.json ADDED
@@ -0,0 +1,33 @@
1
+ {
2
+ "name": "@crawlith/core",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "exports": {
8
+ ".": {
9
+ "import": "./dist/index.js",
10
+ "types": "./dist/index.d.ts",
11
+ "default": "./dist/index.js"
12
+ }
13
+ },
14
+ "dependencies": {
15
+ "better-sqlite3": "^12.6.2",
16
+ "chalk": "^5.3.0",
17
+ "cheerio": "^1.0.0-rc.12",
18
+ "p-limit": "^5.0.0",
19
+ "robots-parser": "^3.0.1",
20
+ "undici": "^6.13.0",
21
+ "vite": "7.3.1"
22
+ },
23
+ "devDependencies": {
24
+ "@types/better-sqlite3": "^7.6.13",
25
+ "@types/node": "^20.12.7",
26
+ "typescript": "^5.4.5",
27
+ "vitest": "^4.0.18"
28
+ },
29
+ "scripts": {
30
+ "build": "tsc",
31
+ "test": "vitest run"
32
+ }
33
+ }