npm - @crawlith/core - Versions diffs - 0.1.1 → 0.1.2 - Mend

@crawlith/core 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

package/LICENSE +201 -0
package/README.md +70 -0
package/dist/analysis/analyze.d.ts +29 -8
package/dist/analysis/analyze.js +325 -221
package/dist/analysis/clustering.d.ts +23 -0
package/dist/analysis/clustering.js +206 -0
package/dist/analysis/content.d.ts +1 -1
package/dist/analysis/content.js +11 -5
package/dist/analysis/duplicate.d.ts +34 -0
package/dist/analysis/duplicate.js +305 -0
package/dist/analysis/heading.d.ts +116 -0
package/dist/analysis/heading.js +356 -0
package/dist/analysis/images.d.ts +1 -1
package/dist/analysis/images.js +6 -5
package/dist/analysis/links.d.ts +1 -1
package/dist/analysis/links.js +8 -8
package/dist/{scoring/orphanSeverity.d.ts → analysis/orphan.d.ts} +12 -23
package/dist/{scoring/orphanSeverity.js → analysis/orphan.js} +9 -3
package/dist/analysis/scoring.js +4 -1
package/dist/analysis/seo.d.ts +8 -4
package/dist/analysis/seo.js +41 -30
package/dist/analysis/soft404.d.ts +17 -0
package/dist/analysis/soft404.js +62 -0
package/dist/analysis/structuredData.d.ts +1 -1
package/dist/analysis/structuredData.js +5 -4
package/dist/application/index.d.ts +2 -0
package/dist/application/index.js +2 -0
package/dist/application/usecase.d.ts +3 -0
package/dist/application/usecase.js +1 -0
package/dist/application/usecases.d.ts +114 -0
package/dist/application/usecases.js +201 -0
package/dist/audit/index.js +1 -1
package/dist/audit/transport.d.ts +1 -1
package/dist/audit/transport.js +5 -4
package/dist/audit/types.d.ts +1 -0
package/dist/constants.d.ts +17 -0
package/dist/constants.js +23 -0
package/dist/core/scope/scopeManager.js +3 -0
package/dist/crawler/crawl.d.ts +2 -2
package/dist/crawler/crawler.d.ts +17 -5
package/dist/crawler/crawler.js +259 -94
package/dist/crawler/fetcher.d.ts +1 -1
package/dist/crawler/fetcher.js +6 -6
package/dist/crawler/metricsRunner.d.ts +21 -1
package/dist/crawler/metricsRunner.js +181 -60
package/dist/crawler/normalize.d.ts +41 -0
package/dist/crawler/normalize.js +119 -3
package/dist/crawler/parser.d.ts +1 -3
package/dist/crawler/parser.js +2 -49
package/dist/crawler/resolver.d.ts +11 -0
package/dist/crawler/resolver.js +67 -0
package/dist/crawler/sitemap.d.ts +4 -1
package/dist/crawler/sitemap.js +24 -18
package/dist/crawler/trap.d.ts +5 -1
package/dist/crawler/trap.js +23 -2
package/dist/db/CrawlithDB.d.ts +110 -0
package/dist/db/CrawlithDB.js +500 -0
package/dist/db/graphLoader.js +15 -32
package/dist/db/index.d.ts +9 -1
package/dist/db/index.js +39 -31
package/dist/db/migrations.d.ts +2 -0
package/dist/db/{schema.js → migrations.js} +90 -43
package/dist/db/pluginRegistry.d.ts +9 -0
package/dist/db/pluginRegistry.js +19 -0
package/dist/db/repositories/EdgeRepository.d.ts +5 -0
package/dist/db/repositories/EdgeRepository.js +7 -0
package/dist/db/repositories/MetricsRepository.d.ts +13 -8
package/dist/db/repositories/MetricsRepository.js +14 -6
package/dist/db/repositories/PageRepository.d.ts +5 -3
package/dist/db/repositories/PageRepository.js +68 -17
package/dist/db/repositories/SiteRepository.d.ts +6 -0
package/dist/db/repositories/SiteRepository.js +4 -0
package/dist/db/repositories/SnapshotRepository.d.ts +12 -5
package/dist/db/repositories/SnapshotRepository.js +48 -10
package/dist/db/reset.d.ts +9 -0
package/dist/db/reset.js +32 -0
package/dist/db/statements.d.ts +12 -0
package/dist/db/statements.js +40 -0
package/dist/diff/compare.d.ts +0 -5
package/dist/diff/compare.js +0 -12
package/dist/diff/service.d.ts +16 -0
package/dist/diff/service.js +41 -0
package/dist/domain/index.d.ts +4 -0
package/dist/domain/index.js +4 -0
package/dist/events.d.ts +8 -0
package/dist/graph/graph.d.ts +20 -42
package/dist/graph/graph.js +12 -16
package/dist/graph/hits.d.ts +23 -0
package/dist/graph/hits.js +111 -0
package/dist/graph/metrics.d.ts +0 -4
package/dist/graph/metrics.js +19 -15
package/dist/graph/pagerank.d.ts +17 -4
package/dist/graph/pagerank.js +126 -93
package/dist/index.d.ts +27 -9
package/dist/index.js +27 -9
package/dist/lock/lockManager.d.ts +1 -0
package/dist/lock/lockManager.js +15 -0
package/dist/plugin-system/plugin-cli.d.ts +10 -0
package/dist/plugin-system/plugin-cli.js +31 -0
package/dist/plugin-system/plugin-config.d.ts +16 -0
package/dist/plugin-system/plugin-config.js +36 -0
package/dist/plugin-system/plugin-loader.d.ts +17 -0
package/dist/plugin-system/plugin-loader.js +122 -0
package/dist/plugin-system/plugin-registry.d.ts +25 -0
package/dist/plugin-system/plugin-registry.js +167 -0
package/dist/plugin-system/plugin-types.d.ts +205 -0
package/dist/plugin-system/plugin-types.js +1 -0
package/dist/ports/index.d.ts +9 -0
package/dist/ports/index.js +1 -0
package/dist/report/export.d.ts +3 -0
package/dist/report/export.js +81 -0
package/dist/report/insight.d.ts +27 -0
package/dist/report/insight.js +103 -0
package/dist/scoring/health.d.ts +17 -11
package/dist/scoring/health.js +183 -140
package/dist/utils/chalk.d.ts +6 -0
package/dist/utils/chalk.js +41 -0
package/dist/utils/secureConfig.d.ts +23 -0
package/dist/utils/secureConfig.js +128 -0
package/package.json +10 -4
package/CHANGELOG.md +0 -13
package/dist/db/schema.d.ts +0 -2
package/dist/graph/cluster.d.ts +0 -6
package/dist/graph/cluster.js +0 -221
package/dist/graph/duplicate.d.ts +0 -10
package/dist/graph/duplicate.js +0 -302
package/dist/scoring/hits.d.ts +0 -10
package/dist/scoring/hits.js +0 -131
package/scripts/copy-assets.js +0 -37
package/src/analysis/analysis_list.html +0 -35
package/src/analysis/analysis_page.html +0 -123
package/src/analysis/analyze.ts +0 -505
package/src/analysis/content.ts +0 -62
package/src/analysis/images.ts +0 -28
package/src/analysis/links.ts +0 -41
package/src/analysis/scoring.ts +0 -66
package/src/analysis/seo.ts +0 -82
package/src/analysis/structuredData.ts +0 -62
package/src/analysis/templates.ts +0 -9
package/src/audit/dns.ts +0 -49
package/src/audit/headers.ts +0 -98
package/src/audit/index.ts +0 -66
package/src/audit/scoring.ts +0 -232
package/src/audit/transport.ts +0 -258
package/src/audit/types.ts +0 -102
package/src/core/network/proxyAdapter.ts +0 -21
package/src/core/network/rateLimiter.ts +0 -39
package/src/core/network/redirectController.ts +0 -47
package/src/core/network/responseLimiter.ts +0 -34
package/src/core/network/retryPolicy.ts +0 -57
package/src/core/scope/domainFilter.ts +0 -45
package/src/core/scope/scopeManager.ts +0 -52
package/src/core/scope/subdomainPolicy.ts +0 -39
package/src/core/security/ipGuard.ts +0 -171
package/src/crawler/crawl.ts +0 -9
package/src/crawler/crawler.ts +0 -601
package/src/crawler/extract.ts +0 -39
package/src/crawler/fetcher.ts +0 -251
package/src/crawler/metricsRunner.ts +0 -137
package/src/crawler/normalize.ts +0 -108
package/src/crawler/parser.ts +0 -190
package/src/crawler/sitemap.ts +0 -76
package/src/crawler/trap.ts +0 -96
package/src/db/graphLoader.ts +0 -135
package/src/db/index.ts +0 -75
package/src/db/repositories/EdgeRepository.ts +0 -43
package/src/db/repositories/MetricsRepository.ts +0 -63
package/src/db/repositories/PageRepository.ts +0 -228
package/src/db/repositories/SiteRepository.ts +0 -43
package/src/db/repositories/SnapshotRepository.ts +0 -99
package/src/db/schema.ts +0 -177
package/src/diff/compare.ts +0 -84
package/src/events.ts +0 -16
package/src/graph/cluster.ts +0 -246
package/src/graph/duplicate.ts +0 -350
package/src/graph/graph.ts +0 -192
package/src/graph/metrics.ts +0 -125
package/src/graph/pagerank.ts +0 -126
package/src/graph/simhash.ts +0 -76
package/src/index.ts +0 -33
package/src/lock/hashKey.ts +0 -51
package/src/lock/lockManager.ts +0 -132
package/src/lock/pidCheck.ts +0 -13
package/src/report/crawl.html +0 -879
package/src/report/crawlExport.ts +0 -58
package/src/report/crawl_template.ts +0 -9
package/src/report/html.ts +0 -27
package/src/scoring/health.ts +0 -241
package/src/scoring/hits.ts +0 -153
package/src/scoring/orphanSeverity.ts +0 -176
package/src/utils/version.ts +0 -18
package/tests/__snapshots__/orphanSeverity.test.ts.snap +0 -49
package/tests/analysis.unit.test.ts +0 -142
package/tests/analyze.integration.test.ts +0 -133
package/tests/analyze_markdown.test.ts +0 -98
package/tests/audit/audit.test.ts +0 -101
package/tests/audit/dns.test.ts +0 -31
package/tests/audit/headers.test.ts +0 -45
package/tests/audit/scoring.test.ts +0 -133
package/tests/audit/security.test.ts +0 -12
package/tests/audit/transport.test.ts +0 -111
package/tests/clustering.test.ts +0 -118
package/tests/clustering_risk.test.ts +0 -118
package/tests/crawler.test.ts +0 -364
package/tests/db/index.test.ts +0 -134
package/tests/db/repositories.test.ts +0 -115
package/tests/db.test.ts +0 -159
package/tests/db_repos.test.ts +0 -72
package/tests/diff.test.ts +0 -67
package/tests/duplicate.test.ts +0 -110
package/tests/extract.test.ts +0 -86
package/tests/fetcher.test.ts +0 -110
package/tests/fetcher_safety.test.ts +0 -91
package/tests/fixtures/analyze-crawl.json +0 -26
package/tests/graph/graph.test.ts +0 -100
package/tests/graphLoader.test.ts +0 -124
package/tests/hits.test.ts +0 -134
package/tests/html_report.test.ts +0 -59
package/tests/ipGuard.test.ts +0 -73
package/tests/lock/lockManager.test.ts +0 -198
package/tests/metrics.test.ts +0 -196
package/tests/normalize.test.ts +0 -88
package/tests/orphanSeverity.test.ts +0 -160
package/tests/pagerank.test.ts +0 -98
package/tests/parser.test.ts +0 -117
package/tests/proxy_safety.test.ts +0 -57
package/tests/redirect_safety.test.ts +0 -77
package/tests/renderAnalysisCsv.test.ts +0 -183
package/tests/safety.test.ts +0 -126
package/tests/scope.test.ts +0 -84
package/tests/scoring.test.ts +0 -60
package/tests/sitemap.test.ts +0 -100
package/tests/soft404.test.ts +0 -41
package/tests/ssrf_fix.test.ts +0 -69
package/tests/trap.test.ts +0 -39
package/tests/visualization_data.test.ts +0 -46
package/tsconfig.json +0 -11

package/tests/pagerank.test.ts DELETED Viewed

@@ -1,98 +0,0 @@
-import { describe, it, expect } from 'vitest';
-import { Graph } from '../src/graph/graph.js';
-import { computePageRank } from '../src/graph/pagerank.js';
-describe('PageRank Engine', () => {
-    it('should calculate identical PageRank for a simple loop', () => {
-        const graph = new Graph();
-        graph.addNode('https://a.com', 0, 200);
-        graph.addNode('https://b.com', 1, 200);
-        graph.addEdge('https://a.com', 'https://b.com');
-        graph.addEdge('https://b.com', 'https://a.com');
-        computePageRank(graph);
-        const nodes = graph.getNodes();
-        expect(nodes[0].pageRank).toBeCloseTo(0.5, 4);
-        expect(nodes[1].pageRank).toBeCloseTo(0.5, 4);
-        expect(nodes[0].pageRankScore).toBe(100);
-        expect(nodes[1].pageRankScore).toBe(100);
-    });
-    it('should identify the center of a star graph as most important', () => {
-        const graph = new Graph();
-        graph.addNode('https://center.com', 0, 200);
-        graph.addNode('https://p1.com', 1, 200);
-        graph.addNode('https://p2.com', 1, 200);
-        graph.addNode('https://p3.com', 1, 200);
-        // Star in: all link to center
-        graph.addEdge('https://p1.com', 'https://center.com');
-        graph.addEdge('https://p2.com', 'https://center.com');
-        graph.addEdge('https://p3.com', 'https://center.com');
-        computePageRank(graph);
-        const nodes = graph.getNodes();
-        const center = nodes.find(n => n.url.includes('center'))!;
-        const leaves = nodes.filter(n => !n.url.includes('center'));
-        expect(center.pageRankScore).toBe(100);
-        leaves.forEach(leaf => {
-            expect(leaf.pageRankScore).toBeLessThan(100);
-            expect(leaf.pageRank!).toBeLessThan(center.pageRank!);
-        });
-    });
-    it('should respect link weights (Body > Nav > Footer)', () => {
-        const graph = new Graph();
-        graph.addNode('https://source.com', 0, 200);
-        graph.addNode('https://body-target.com', 1, 200);
-        graph.addNode('https://footer-target.com', 1, 200);
-        // Body weight 1.0, Footer weight 0.4
-        graph.addEdge('https://source.com', 'https://body-target.com', 1.0);
-        graph.addEdge('https://source.com', 'https://footer-target.com', 0.4);
-        computePageRank(graph);
-        const bodyTarget = graph.nodes.get('https://body-target.com')!;
-        const footerTarget = graph.nodes.get('https://footer-target.com')!;
-        expect(bodyTarget.pageRank!).toBeGreaterThan(footerTarget.pageRank!);
-    });
-    it('should handle sink nodes by redistributing rank', () => {
-        const graph = new Graph();
-        graph.addNode('https://a.com', 0, 200);
-        graph.addNode('https://b.com', 1, 200); // b is a sink
-        graph.addEdge('https://a.com', 'https://b.com');
-        computePageRank(graph);
-        const nodeA = graph.nodes.get('https://a.com')!;
-        const nodeB = graph.nodes.get('https://b.com')!;
-        // Without redistribution, A would lose all rank.
-        // With redistribution, A should still have some rank.
-        expect(nodeA.pageRank).toBeGreaterThan(0);
-        expect(nodeB.pageRank).toBeGreaterThan(nodeA.pageRank!);
-    });
-    it('should exclude noindex pages from receiving or passing rank', () => {
-        const graph = new Graph();
-        graph.addNode('https://a.com', 0, 200);
-        graph.addNode('https://no-index.com', 1, 200);
-        graph.nodes.get('https://no-index.com')!.noindex = true;
-        graph.addEdge('https://a.com', 'https://no-index.com');
-        computePageRank(graph);
-        const nodeA = graph.nodes.get('https://a.com')!;
-        const nodeNoIndex = graph.nodes.get('https://no-index.com')!;
-        expect(nodeNoIndex.pageRank).toBeUndefined();
-        expect(nodeA.pageRank).toBe(1.0); // Only one eligible node
-    });
-});

package/tests/parser.test.ts DELETED Viewed

@@ -1,117 +0,0 @@
-import { test, expect } from 'vitest';
-import { Parser } from '../src/crawler/parser.js';
-const parser = new Parser();
-const baseUrl = 'https://example.com';
-test('extracts links correctly', () => {
-  const html = `
-    <html>
-      <body>
-        <a href="/page1">Page 1</a>
-        <a href="https://other.com">Other</a>
-        <a href="#hash">Hash</a>
-        <a href="javascript:void(0)">JS</a>
-      </body>
-    </html>
-  `;
-  const result = parser.parse(html, baseUrl, 200);
-  const urls = result.links.map(l => l.url);
-  expect(urls).toContain('https://example.com/page1');
-  expect(urls).toContain('https://other.com/');
-  expect(urls).not.toContain('https://example.com/#hash');
-  // It also extracts the base URL itself from href="#hash"
-  expect(urls).toContain('https://example.com/');
-  expect(result.links.length).toBe(3);
-});
-test('respects nofollow on links', () => {
-  const html = `
-    <html>
-      <body>
-        <a href="/page1" rel="nofollow">Page 1</a>
-        <a href="/page2">Page 2</a>
-      </body>
-    </html>
-  `;
-  const result = parser.parse(html, baseUrl, 200);
-  const urls = result.links.map(l => l.url);
-  expect(urls).not.toContain('https://example.com/page1');
-  expect(urls).toContain('https://example.com/page2');
-});
-test('respects meta robots nofollow', () => {
-  const html = `
-    <html>
-      <head>
-        <meta name="robots" content="nofollow">
-      </head>
-      <body>
-        <a href="/page1">Page 1</a>
-      </body>
-    </html>
-  `;
-  const result = parser.parse(html, baseUrl, 200);
-  expect(result.nofollow).toBe(true);
-  expect(result.links.length).toBe(0);
-});
-test('detects canonical', () => {
-  const html = `
-    <html>
-      <head>
-        <link rel="canonical" href="https://example.com/canon">
-      </head>
-    </html>
-  `;
-  const result = parser.parse(html, baseUrl, 200);
-  expect(result.canonical).toBe('https://example.com/canon');
-});
-test('detects relative canonical', () => {
-  const html = `
-    <html>
-      <head>
-        <link rel="canonical" href="/canon">
-      </head>
-    </html>
-  `;
-  const result = parser.parse(html, baseUrl, 200);
-  expect(result.canonical).toBe('https://example.com/canon');
-});
-test('detects soft 404', () => {
-  const html = `
-    <html>
-      <head><title>Page Not Found</title></head>
-      <body>Sorry, the page you are looking for does not exist.</body>
-    </html>
-  `;
-  const result = parser.parse(html, baseUrl, 200);
-  expect(result.soft404Score).toBeGreaterThanOrEqual(0.5);
-});
-test('content hash ignores scripts', () => {
-  const html1 = `
-    <html><body><script>var x=1;</script><p>Hello</p></body></html>
-  `;
-  const html2 = `
-    <html><body><script>var x=2;</script><p>Hello</p></body></html>
-  `;
-  const result1 = parser.parse(html1, baseUrl, 200);
-  const result2 = parser.parse(html2, baseUrl, 200);
-  expect(result1.contentHash).toBe(result2.contentHash);
-});
-test('detects meta robots noindex', () => {
-  const html = `
-    <html>
-      <head>
-        <meta name="robots" content="noindex, nofollow">
-      </head>
-    </html>
-  `;
-  const result = parser.parse(html, baseUrl, 200);
-  expect(result.noindex).toBe(true);
-  expect(result.nofollow).toBe(true);
-});

package/tests/proxy_safety.test.ts DELETED Viewed

@@ -1,57 +0,0 @@
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { Fetcher } from '../src/crawler/fetcher.js';
-import { request, ProxyAgent } from 'undici';
-vi.mock('undici', async (importOriginal) => {
-    const original = await importOriginal<typeof import('undici')>();
-    return {
-        ...original,
-        request: vi.fn(),
-        ProxyAgent: vi.fn(function () {
-            return {
-                request: vi.fn(),
-                close: vi.fn()
-            };
-        })
-    };
-});
-describe('Proxy Integration', () => {
-    beforeEach(() => {
-        vi.clearAllMocks();
-    });
-    it('should use ProxyAgent when proxyUrl is provided', async () => {
-        const fetcher = new Fetcher({ proxyUrl: 'http://proxy.com:8080', rate: 100 });
-        const mockRequest = vi.mocked(request);
-        // Mock the request to return a successful response immediately
-        mockRequest.mockResolvedValueOnce({
-            statusCode: 200,
-            headers: {},
-            body: {
-                on: vi.fn((event, cb) => {
-                    if (event === 'data') {
-                        // Simulate async data chunk
-                        setTimeout(() => cb(Buffer.from('ok')), 0);
-                    }
-                    if (event === 'end') {
-                        // Simulate async end
-                        setTimeout(() => cb(), 0);
-                    }
-                    return { on: vi.fn() }; // chaining
-                }),
-                dump: vi.fn(),
-                text: vi.fn().mockResolvedValue('ok')
-            }
-        } as any);
-        await fetcher.fetch('http://target.com');
-        expect(ProxyAgent).toHaveBeenCalledWith('http://proxy.com:8080');
-    });
-    it('should fail fast on invalid proxy URL', () => {
-        expect(() => new Fetcher({ proxyUrl: 'not-a-url' })).toThrow('Invalid proxy URL');
-    });
-});

package/tests/redirect_safety.test.ts DELETED Viewed

@@ -1,77 +0,0 @@
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-import { RedirectController } from '../src/core/network/redirectController.js';
-import { Fetcher } from '../src/crawler/fetcher.js';
-import { request } from 'undici';
-vi.mock('undici', () => ({
-    request: vi.fn(),
-    ProxyAgent: vi.fn().mockImplementation(() => ({ dispatcher: {} })),
-    Agent: class {
-        dispatch = vi.fn();
-    },
-    Dispatcher: class {}
-}));
-describe('RedirectController', () => {
-    it('should limit hops', () => {
-        const ctrl = new RedirectController(2);
-        expect(ctrl.nextHop('http://b.com')).toBe(null);
-        expect(ctrl.nextHop('http://c.com')).toBe(null);
-        expect(ctrl.nextHop('http://d.com')).toBe('redirect_limit_exceeded');
-    });
-    it('should detect loops', () => {
-        const ctrl = new RedirectController(5);
-        expect(ctrl.nextHop('http://b.com')).toBe(null);
-        expect(ctrl.nextHop('http://a.com')).toBe(null);
-        expect(ctrl.nextHop('http://b.com')).toBe('redirect_loop');
-    });
-});
-describe('Fetcher Redirect Integration', () => {
-    let fetcher: Fetcher;
-    beforeEach(() => {
-        vi.clearAllMocks();
-        fetcher = new Fetcher({ rate: 100, maxRedirects: 2 });
-    });
-    it('should stop at max redirects', async () => {
-        const mockRequest = vi.mocked(request);
-        // Return 301 with unique locations
-        mockRequest
-            .mockResolvedValueOnce({
-                statusCode: 301,
-                headers: { location: 'http://a.com' },
-                body: { dump: vi.fn().mockResolvedValue(undefined) }
-            } as any)
-            .mockResolvedValueOnce({
-                statusCode: 301,
-                headers: { location: 'http://b.com' },
-                body: { dump: vi.fn().mockResolvedValue(undefined) }
-            } as any)
-            .mockResolvedValueOnce({
-                statusCode: 301,
-                headers: { location: 'http://c.com' },
-                body: { dump: vi.fn().mockResolvedValue(undefined) }
-            } as any);
-        const res = await fetcher.fetch('http://start.com');
-        expect(res.status).toBe('redirect_limit_exceeded');
-        expect(res.redirectChain).toHaveLength(2);
-    });
-    it('should detect loops in fetch', async () => {
-        const mockRequest = vi.mocked(request);
-        mockRequest.mockResolvedValue({
-            statusCode: 301,
-            headers: { location: 'http://start.com' },
-            body: { dump: vi.fn().mockResolvedValue(undefined) }
-        } as any);
-        const res = await fetcher.fetch('http://start.com');
-        expect(res.status).toBe('redirect_loop');
-    });
-});

package/tests/renderAnalysisCsv.test.ts DELETED Viewed

@@ -1,183 +0,0 @@
-import { describe, expect, test } from 'vitest';
-import { renderAnalysisCsv, AnalysisResult } from '../src/analysis/analyze.js';
-describe('renderAnalysisCsv', () => {
-    test('renders CSV with headers', () => {
-        const result: AnalysisResult = {
-            pages: [],
-            site_summary: {
-                pages_analyzed: 0,
-                avg_seo_score: 0,
-                thin_pages: 0,
-                duplicate_titles: 0,
-                site_score: 0
-            },
-            site_scores: {} as any,
-            active_modules: {
-                seo: true,
-                content: true,
-                accessibility: true
-            }
-        };
-        const csv = renderAnalysisCsv(result);
-        expect(csv).toContain('URL,SEO Score,Thin Score,HTTP Status,Title,Title Length,Meta Description,Desc Length,Word Count,Internal Links,External Links');
-    });
-    test('renders a single page correctly', () => {
-        const result: AnalysisResult = {
-            pages: [
-                {
-                    url: 'https://example.com',
-                    status: 200,
-                    seoScore: 85,
-                    thinScore: 10,
-                    title: { value: 'Example Domain', length: 14, status: 'ok' },
-                    metaDescription: { value: 'This is an example description.', length: 29, status: 'ok' },
-                    content: { wordCount: 500 } as any,
-                    links: { internalLinks: 5, externalLinks: 2 } as any,
-                    h1: {} as any,
-                    images: {} as any,
-                    structuredData: {} as any,
-                    meta: {}
-                }
-            ],
-            site_summary: {
-                pages_analyzed: 1,
-                avg_seo_score: 85,
-                thin_pages: 0,
-                duplicate_titles: 0,
-                site_score: 85
-            },
-            site_scores: {} as any,
-            active_modules: {
-                seo: true,
-                content: true,
-                accessibility: true
-            }
-        };
-        const csv = renderAnalysisCsv(result);
-        const lines = csv.split('\n');
-        expect(lines.length).toBe(2);
-        expect(lines[1]).toContain('https://example.com,85,10,200,"Example Domain",14,"This is an example description.",29,500,5,2');
-    });
-    test('escapes quotes in title and meta description', () => {
-        const result: AnalysisResult = {
-            pages: [
-                {
-                    url: 'https://example.com/quote',
-                    status: 200,
-                    seoScore: 90,
-                    thinScore: 5,
-                    title: { value: 'Example "Quoted" Domain', length: 23, status: 'ok' },
-                    metaDescription: { value: 'This description contains "quotes" inside.', length: 42, status: 'ok' },
-                    content: { wordCount: 300 } as any,
-                    links: { internalLinks: 3, externalLinks: 1 } as any,
-                    h1: {} as any,
-                    images: {} as any,
-                    structuredData: {} as any,
-                    meta: {}
-                }
-            ],
-            site_summary: {
-                pages_analyzed: 1,
-                avg_seo_score: 90,
-                thin_pages: 0,
-                duplicate_titles: 0,
-                site_score: 90
-            },
-            site_scores: {} as any,
-            active_modules: {
-                seo: true,
-                content: true,
-                accessibility: true
-            }
-        };
-        const csv = renderAnalysisCsv(result);
-        const lines = csv.split('\n');
-        // Expect double quotes to be escaped with double quotes: " -> ""
-        // And the whole field wrapped in quotes
-        expect(lines[1]).toContain('"Example ""Quoted"" Domain"');
-        expect(lines[1]).toContain('"This description contains ""quotes"" inside."');
-    });
-    test('handles Pending/Limit status (status: 0)', () => {
-        const result: AnalysisResult = {
-            pages: [
-                {
-                    url: 'https://example.com/pending',
-                    status: 0,
-                    seoScore: 0,
-                    thinScore: 0,
-                    title: { value: null, length: 0, status: 'missing' },
-                    metaDescription: { value: null, length: 0, status: 'missing' },
-                    content: { wordCount: 0 } as any,
-                    links: { internalLinks: 0, externalLinks: 0 } as any,
-                    h1: {} as any,
-                    images: {} as any,
-                    structuredData: {} as any,
-                    meta: {}
-                }
-            ],
-            site_summary: {
-                pages_analyzed: 1,
-                avg_seo_score: 0,
-                thin_pages: 0,
-                duplicate_titles: 0,
-                site_score: 0
-            },
-            site_scores: {} as any,
-            active_modules: {
-                seo: true,
-                content: true,
-                accessibility: true
-            }
-        };
-        const csv = renderAnalysisCsv(result);
-        const lines = csv.split('\n');
-        expect(lines[1]).toContain('Pending/Limit');
-    });
-    test('handles missing title and description gracefully', () => {
-        const result: AnalysisResult = {
-            pages: [
-                {
-                    url: 'https://example.com/missing',
-                    status: 404,
-                    seoScore: 0,
-                    thinScore: 0,
-                    title: { value: undefined as any, length: 0, status: 'missing' },
-                    metaDescription: { value: null as any, length: 0, status: 'missing' },
-                    content: { wordCount: 0 } as any,
-                    links: { internalLinks: 0, externalLinks: 0 } as any,
-                    h1: {} as any,
-                    images: {} as any,
-                    structuredData: {} as any,
-                    meta: {}
-                }
-            ],
-            site_summary: {
-                pages_analyzed: 1,
-                avg_seo_score: 0,
-                thin_pages: 0,
-                duplicate_titles: 0,
-                site_score: 0
-            },
-            site_scores: {} as any,
-            active_modules: {
-                seo: true,
-                content: true,
-                accessibility: true
-            }
-        };
-        const csv = renderAnalysisCsv(result);
-        const lines = csv.split('\n');
-        // Should produce empty quoted strings ""
-        expect(lines[1]).toContain(',"",0,"",0,0,0,0');
-    });
-});

package/tests/safety.test.ts DELETED Viewed

@@ -1,126 +0,0 @@
-import { describe, it, expect, vi } from 'vitest';
-import { IPGuard } from '../src/core/security/ipGuard.js';
-import { RateLimiter } from '../src/core/network/rateLimiter.js';
-import { RetryPolicy } from '../src/core/network/retryPolicy.js';
-import { ResponseLimiter } from '../src/core/network/responseLimiter.js';
-import { Readable } from 'stream';
-import * as dns from 'dns';
-vi.mock('dns', () => ({
-    resolve4: vi.fn(),
-    resolve6: vi.fn(),
-}));
-describe('IPGuard', () => {
-    it('should block IPv4 internal ranges', () => {
-        expect(IPGuard.isInternal('127.0.0.1')).toBe(true);
-        expect(IPGuard.isInternal('10.0.0.1')).toBe(true);
-        expect(IPGuard.isInternal('192.168.1.1')).toBe(true);
-        expect(IPGuard.isInternal('172.16.0.1')).toBe(true);
-        expect(IPGuard.isInternal('172.31.255.255')).toBe(true);
-        expect(IPGuard.isInternal('169.254.1.1')).toBe(true);
-        expect(IPGuard.isInternal('0.0.0.0')).toBe(true);
-    });
-    it('should allow public IPv4', () => {
-        expect(IPGuard.isInternal('8.8.8.8')).toBe(false);
-        expect(IPGuard.isInternal('1.1.1.1')).toBe(false);
-        expect(IPGuard.isInternal('172.32.0.1')).toBe(false);
-    });
-    it('should block IPv6 internal/local addresses', () => {
-        expect(IPGuard.isInternal('::1')).toBe(true);
-        expect(IPGuard.isInternal('fc00::1')).toBe(true);
-        expect(IPGuard.isInternal('fe80::1')).toBe(true);
-    });
-    it('should block IPv4-mapped IPv6 internal addresses', () => {
-        expect(IPGuard.isInternal('::ffff:127.0.0.1')).toBe(true);
-        expect(IPGuard.isInternal('::ffff:10.0.0.1')).toBe(true);
-        expect(IPGuard.isInternal('::ffff:192.168.1.1')).toBe(true);
-        expect(IPGuard.isInternal('::ffff:169.254.169.254')).toBe(true);
-        expect(IPGuard.isInternal('::ffff:7f00:0001')).toBe(true); // Hex 127.0.0.1
-    });
-    it('should allow IPv4-mapped IPv6 public addresses', () => {
-        expect(IPGuard.isInternal('::ffff:8.8.8.8')).toBe(false);
-    });
-    it('should validate hostname by resolving IPs', async () => {
-        const resolve4Spy = vi.mocked(dns.resolve4);
-        const resolve6Spy = vi.mocked(dns.resolve6);
-        resolve4Spy.mockImplementation((_h: string, cb: any) => cb(null, ['1.1.1.1']));
-        resolve6Spy.mockImplementation((_h: string, cb: any) => cb(null, []));
-        expect(await IPGuard.validateHost('example.com')).toBe(true);
-        resolve4Spy.mockImplementation((_h: string, cb: any) => cb(null, ['127.0.0.1']));
-        expect(await IPGuard.validateHost('localhost')).toBe(false);
-    });
-});
-describe('RateLimiter', () => {
-    it('should enforce rate limits', async () => {
-        const limiter = new RateLimiter(1); // 1 req/sec = 1000ms interval
-        const start = Date.now();
-        await limiter.waitForToken('host1'); // returns immediately, tokens becomes 0
-        await limiter.waitForToken('host1'); // waits for refill (1s)
-        const elapsed = Date.now() - start;
-        expect(elapsed).toBeGreaterThanOrEqual(1000);
-    }, 5000);
-    it('should have separate buckets for hosts', async () => {
-        const limiter = new RateLimiter(1);
-        const start = Date.now();
-        await limiter.waitForToken('host1');
-        await limiter.waitForToken('host2');
-        const elapsed = Date.now() - start;
-        expect(elapsed).toBeLessThan(100);
-    });
-    it('should respect crawlDelay if higher than rate', async () => {
-        const limiter = new RateLimiter(1); // 1000ms interval
-        const start = Date.now();
-        await limiter.waitForToken('host3'); // returns immediately, tokens = 0
-        await limiter.waitForToken('host3', 1); // 1s crawl delay
-        const elapsed = Date.now() - start;
-        expect(elapsed).toBeGreaterThanOrEqual(1000);
-    }, 5000);
-});
-describe('RetryPolicy', () => {
-    it('should retry transient failures', async () => {
-        let calls = 0;
-        const result = await RetryPolicy.execute(
-            async () => {
-                calls++;
-                if (calls < 3) throw new Error('Status 500');
-                return 'success';
-            },
-            (err) => err.message === 'Status 500',
-            { maxRetries: 3, baseDelay: 10 }
-        );
-        expect(result).toBe('success');
-        expect(calls).toBe(3);
-    });
-});
-describe('ResponseLimiter', () => {
-    it('should stream to string', async () => {
-        const stream = Readable.from(['hello ', 'world']);
-        const result = await ResponseLimiter.streamToString(stream, 100);
-        expect(result).toBe('hello world');
-    });
-    it('should abort if limit exceeded', async () => {
-        const stream = Readable.from(['too ', 'large ', 'content']);
-        await expect(ResponseLimiter.streamToString(stream, 5)).rejects.toThrow('Oversized response');
-    });
-});