npm - recker - Versions diffs - 1.0.28-next.32fe8ef → 1.0.28-next.4354f8c - Mend

recker 1.0.28-next.32fe8ef → 1.0.28-next.4354f8c

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/cli/tui/shell.d.ts +1 -0
package/dist/cli/tui/shell.js +112 -1
package/dist/scrape/index.d.ts +2 -0
package/dist/scrape/index.js +1 -0
package/dist/scrape/spider.d.ts +59 -0
package/dist/scrape/spider.js +209 -0
package/dist/seo/analyzer.js +12 -0
package/dist/seo/rules/accessibility.js +620 -54
package/dist/seo/rules/best-practices.d.ts +2 -0
package/dist/seo/rules/best-practices.js +188 -0
package/dist/seo/rules/crawl.d.ts +2 -0
package/dist/seo/rules/crawl.js +307 -0
package/dist/seo/rules/cwv.d.ts +2 -0
package/dist/seo/rules/cwv.js +337 -0
package/dist/seo/rules/ecommerce.d.ts +2 -0
package/dist/seo/rules/ecommerce.js +252 -0
package/dist/seo/rules/i18n.d.ts +2 -0
package/dist/seo/rules/i18n.js +222 -0
package/dist/seo/rules/index.d.ts +32 -0
package/dist/seo/rules/index.js +71 -0
package/dist/seo/rules/internal-linking.d.ts +2 -0
package/dist/seo/rules/internal-linking.js +375 -0
package/dist/seo/rules/local.d.ts +2 -0
package/dist/seo/rules/local.js +265 -0
package/dist/seo/rules/pwa.d.ts +2 -0
package/dist/seo/rules/pwa.js +302 -0
package/dist/seo/rules/readability.d.ts +2 -0
package/dist/seo/rules/readability.js +255 -0
package/dist/seo/rules/security.js +406 -28
package/dist/seo/rules/social.d.ts +2 -0
package/dist/seo/rules/social.js +373 -0
package/dist/seo/rules/types.d.ts +155 -0
package/package.json +1 -1

package/dist/cli/tui/shell.d.ts CHANGED Viewed

@@ -51,6 +51,7 @@ export declare class RekShell {
     private runRDAP;
     private runPing;
     private runScrap;
+    private runSpider;
     private runSelect;
     private runSelectText;
     private runSelectAttr;

package/dist/cli/tui/shell.js CHANGED Viewed

@@ -10,6 +10,7 @@ import { inspectTLS } from '../../utils/tls-inspector.js';
 import { getSecurityRecords } from '../../utils/dns-toolkit.js';
 import { rdap } from '../../utils/rdap.js';
 import { ScrapeDocument } from '../../scrape/document.js';
+import { Spider } from '../../scrape/spider.js';
 import colors from '../../utils/colors.js';
 import { getShellSearch } from './shell-search.js';
 import { openSearchPanel } from './search-panel.js';
@@ -94,7 +95,7 @@ export class RekShell {
             'get', 'post', 'put', 'delete', 'patch', 'head', 'options',
             'ws', 'udp', 'load', 'chat', 'ai',
             'whois', 'tls', 'ssl', 'security', 'ip', 'dns', 'dns:propagate', 'dns:email', 'rdap', 'ping',
-            'scrap', '$', '$text', '$attr', '$html', '$links', '$images', '$scripts', '$css', '$sourcemaps', '$unmap', '$unmap:view', '$unmap:save', '$beautify', '$beautify:save', '$table',
+            'scrap', 'spider', '$', '$text', '$attr', '$html', '$links', '$images', '$scripts', '$css', '$sourcemaps', '$unmap', '$unmap:view', '$unmap:save', '$beautify', '$beautify:save', '$table',
             '?', 'search', 'suggest', 'example',
             'help', 'clear', 'exit', 'set', 'url', 'vars', 'env'
         ];
@@ -368,6 +369,9 @@ export class RekShell {
             case 'scrap':
                 await this.runScrap(parts[1]);
                 return;
+            case 'spider':
+                await this.runSpider(parts.slice(1));
+                return;
             case '$':
                 await this.runSelect(parts.slice(1).join(' '));
                 return;
@@ -1434,6 +1438,105 @@ ${colors.bold('Network:')}
         }
         console.log('');
     }
+    async runSpider(args) {
+        let url = '';
+        let maxDepth = 3;
+        let maxPages = 100;
+        let concurrency = 5;
+        for (let i = 0; i < args.length; i++) {
+            const arg = args[i];
+            if (arg.startsWith('--depth=') || arg.startsWith('-d=')) {
+                maxDepth = parseInt(arg.split('=')[1]) || 3;
+            }
+            else if (arg.startsWith('--limit=') || arg.startsWith('-l=')) {
+                maxPages = parseInt(arg.split('=')[1]) || 100;
+            }
+            else if (arg.startsWith('--concurrency=') || arg.startsWith('-c=')) {
+                concurrency = parseInt(arg.split('=')[1]) || 5;
+            }
+            else if (!arg.startsWith('-')) {
+                url = arg;
+            }
+        }
+        if (!url) {
+            if (!this.baseUrl) {
+                console.log(colors.yellow('Usage: spider <url> [options]'));
+                console.log(colors.gray('  Options:'));
+                console.log(colors.gray('    --depth=3      Max crawl depth'));
+                console.log(colors.gray('    --limit=100    Max pages to crawl'));
+                console.log(colors.gray('    --concurrency=5  Concurrent requests'));
+                console.log(colors.gray('  Examples:'));
+                console.log(colors.gray('    spider https://example.com'));
+                console.log(colors.gray('    spider https://example.com --depth=2 --limit=50'));
+                return;
+            }
+            url = this.baseUrl;
+        }
+        else if (!url.startsWith('http')) {
+            url = `https://${url}`;
+        }
+        console.log(colors.cyan(`\nSpider starting: ${url}`));
+        console.log(colors.gray(`  Depth: ${maxDepth} | Limit: ${maxPages} | Concurrency: ${concurrency}`));
+        console.log('');
+        const spider = new Spider({
+            maxDepth,
+            maxPages,
+            concurrency,
+            sameDomain: true,
+            delay: 100,
+            onProgress: (progress) => {
+                process.stdout.write(`\r${colors.gray('  Crawling:')} ${colors.cyan(progress.crawled.toString())} pages | ${colors.gray('Queue:')} ${progress.queued} | ${colors.gray('Depth:')} ${progress.depth}   `);
+            },
+        });
+        try {
+            const result = await spider.crawl(url);
+            process.stdout.write('\r' + ' '.repeat(80) + '\r');
+            console.log(colors.green(`\n✔ Spider complete`) + colors.gray(` (${(result.duration / 1000).toFixed(1)}s)`));
+            console.log(`  ${colors.cyan('Pages crawled')}: ${result.pages.length}`);
+            console.log(`  ${colors.cyan('Unique URLs')}: ${result.visited.size}`);
+            console.log(`  ${colors.cyan('Errors')}: ${result.errors.length}`);
+            const byDepth = new Map();
+            for (const page of result.pages) {
+                byDepth.set(page.depth, (byDepth.get(page.depth) || 0) + 1);
+            }
+            console.log(colors.bold('\n  Pages by depth:'));
+            for (const [depth, count] of Array.from(byDepth.entries()).sort((a, b) => a[0] - b[0])) {
+                const bar = '█'.repeat(Math.min(count, 40));
+                console.log(`    ${colors.gray(`d${depth}:`)} ${bar} ${count}`);
+            }
+            const topPages = [...result.pages]
+                .filter(p => !p.error)
+                .sort((a, b) => b.links.length - a.links.length)
+                .slice(0, 10);
+            if (topPages.length > 0) {
+                console.log(colors.bold('\n  Top pages by outgoing links:'));
+                for (const page of topPages) {
+                    const title = page.title.slice(0, 40) || new URL(page.url).pathname;
+                    console.log(`    ${colors.cyan(page.links.length.toString().padStart(3))} ${title}`);
+                }
+            }
+            if (result.errors.length > 0 && result.errors.length <= 10) {
+                console.log(colors.bold('\n  Errors:'));
+                for (const err of result.errors) {
+                    const path = new URL(err.url).pathname;
+                    console.log(`    ${colors.red('✗')} ${path.slice(0, 40)} ${colors.gray('→')} ${err.error.slice(0, 30)}`);
+                }
+            }
+            else if (result.errors.length > 10) {
+                console.log(colors.yellow(`\n  ${result.errors.length} errors (showing first 10):`));
+                for (const err of result.errors.slice(0, 10)) {
+                    const path = new URL(err.url).pathname;
+                    console.log(`    ${colors.red('✗')} ${path.slice(0, 40)} ${colors.gray('→')} ${err.error.slice(0, 30)}`);
+                }
+            }
+            this.lastResponse = result;
+            console.log(colors.gray('\n  Result stored in lastResponse. Use $links to explore.'));
+        }
+        catch (error) {
+            console.error(colors.red(`Spider failed: ${error.message}`));
+        }
+        console.log('');
+    }
     async runSelect(selector) {
         if (!this.currentDoc) {
             console.log(colors.yellow('No document loaded. Use "scrap <url>" first.'));
@@ -2358,6 +2461,13 @@ ${colors.bold('Network:')}
     ${colors.green('$beautify:save [f]')}  Save beautified code to file.
     ${colors.green('$table <selector>')}   Extract table as data.
+  ${colors.bold('Web Crawler:')}
+    ${colors.green('spider <url>')}        Crawl website following internal links.
+                             ${colors.gray('Options:')}
+                             ${colors.white('--depth=3')}     ${colors.gray('Maximum depth to crawl')}
+                             ${colors.white('--limit=100')}   ${colors.gray('Maximum pages to crawl')}
+                             ${colors.white('--concurrency=5')} ${colors.gray('Parallel requests')}
   ${colors.bold('Documentation:')}
     ${colors.green('? <query>')}           Search Recker documentation.
     ${colors.green('search <query>')}      Alias for ? (hybrid fuzzy+semantic search).
@@ -2375,6 +2485,7 @@ ${colors.bold('Network:')}
     › post /post name="Neo" active:=true role:Admin
     › load /heavy-endpoint users=100 mode=stress
     › chat openai gpt-5.1
+    › spider https://example.com --depth=2 --limit=50
     `);
     }
 }

package/dist/scrape/index.d.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 export { ScrapeDocument } from './document.js';
 export { ScrapeElement } from './element.js';
+export { Spider, spider } from './spider.js';
+export type { SpiderOptions, SpiderPageResult, SpiderProgress, SpiderResult, } from './spider.js';
 export { extractLinks, extractImages, extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractForms, extractTables, extractScripts, extractStyles, } from './extractors.js';
 export type { ExtractedLink, ExtractedImage, ExtractedMeta, OpenGraphData, TwitterCardData, JsonLdData, ExtractedForm, ExtractedFormField, ExtractedTable, ExtractedScript, ExtractedStyle, ExtractionSchema, ExtractionSchemaField, ScrapeOptions, LinkExtractionOptions, ImageExtractionOptions, } from './types.js';

package/dist/scrape/index.js CHANGED Viewed

@@ -1,3 +1,4 @@
 export { ScrapeDocument } from './document.js';
 export { ScrapeElement } from './element.js';
+export { Spider, spider } from './spider.js';
 export { extractLinks, extractImages, extractMeta, extractOpenGraph, extractTwitterCard, extractJsonLd, extractForms, extractTables, extractScripts, extractStyles, } from './extractors.js';

package/dist/scrape/spider.d.ts ADDED Viewed

@@ -0,0 +1,59 @@
+import type { ExtractedLink } from './types.js';
+export interface SpiderOptions {
+    maxDepth?: number;
+    maxPages?: number;
+    sameDomain?: boolean;
+    concurrency?: number;
+    timeout?: number;
+    delay?: number;
+    exclude?: RegExp[];
+    include?: RegExp[];
+    userAgent?: string;
+    respectRobotsTxt?: boolean;
+    onPage?: (result: SpiderPageResult) => void;
+    onProgress?: (progress: SpiderProgress) => void;
+}
+export interface SpiderPageResult {
+    url: string;
+    status: number;
+    title: string;
+    depth: number;
+    links: ExtractedLink[];
+    duration: number;
+    error?: string;
+}
+export interface SpiderProgress {
+    crawled: number;
+    queued: number;
+    total: number;
+    currentUrl: string;
+    depth: number;
+}
+export interface SpiderResult {
+    startUrl: string;
+    pages: SpiderPageResult[];
+    visited: Set<string>;
+    duration: number;
+    errors: Array<{
+        url: string;
+        error: string;
+    }>;
+}
+export declare class Spider {
+    private options;
+    private client;
+    private visited;
+    private queue;
+    private results;
+    private errors;
+    private baseHost;
+    private running;
+    private aborted;
+    constructor(options?: SpiderOptions);
+    crawl(startUrl: string): Promise<SpiderResult>;
+    private crawlPage;
+    abort(): void;
+    isRunning(): boolean;
+    getProgress(): SpiderProgress;
+}
+export declare function spider(url: string, options?: SpiderOptions): Promise<SpiderResult>;

package/dist/scrape/spider.js ADDED Viewed

@@ -0,0 +1,209 @@
+import { createClient } from '../core/client.js';
+import { ScrapeDocument } from './document.js';
+function normalizeUrl(urlStr) {
+    try {
+        const url = new URL(urlStr);
+        url.hash = '';
+        url.searchParams.sort();
+        if (url.pathname !== '/' && url.pathname.endsWith('/')) {
+            url.pathname = url.pathname.slice(0, -1);
+        }
+        return url.toString();
+    }
+    catch {
+        return urlStr;
+    }
+}
+function shouldCrawl(url, baseHost, options) {
+    try {
+        const parsed = new URL(url);
+        if (!['http:', 'https:'].includes(parsed.protocol)) {
+            return false;
+        }
+        if (options.sameDomain !== false && parsed.hostname !== baseHost) {
+            return false;
+        }
+        const skipExtensions = [
+            '.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.ico',
+            '.pdf', '.zip', '.tar', '.gz', '.rar',
+            '.mp3', '.mp4', '.avi', '.mov', '.webm',
+            '.css', '.js', '.json', '.xml', '.rss',
+            '.woff', '.woff2', '.ttf', '.eot',
+        ];
+        const pathname = parsed.pathname.toLowerCase();
+        if (skipExtensions.some(ext => pathname.endsWith(ext))) {
+            return false;
+        }
+        if (options.exclude?.some(pattern => pattern.test(url))) {
+            return false;
+        }
+        if (options.include?.length) {
+            if (!options.include.some(pattern => pattern.test(url))) {
+                return false;
+            }
+        }
+        return true;
+    }
+    catch {
+        return false;
+    }
+}
+function sleep(ms) {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
+export class Spider {
+    options;
+    client;
+    visited = new Set();
+    queue = [];
+    results = [];
+    errors = [];
+    baseHost = '';
+    running = false;
+    aborted = false;
+    constructor(options = {}) {
+        this.options = {
+            maxDepth: options.maxDepth ?? 3,
+            maxPages: options.maxPages ?? 100,
+            sameDomain: options.sameDomain ?? true,
+            concurrency: options.concurrency ?? 5,
+            timeout: options.timeout ?? 10000,
+            delay: options.delay ?? 100,
+            userAgent: options.userAgent ?? 'Recker Spider/1.0',
+            respectRobotsTxt: options.respectRobotsTxt ?? true,
+            exclude: options.exclude,
+            include: options.include,
+            onPage: options.onPage,
+            onProgress: options.onProgress,
+        };
+        this.client = createClient({
+            baseUrl: 'http://localhost',
+            timeout: this.options.timeout,
+            headers: {
+                'User-Agent': this.options.userAgent,
+            },
+        });
+    }
+    async crawl(startUrl) {
+        const startTime = performance.now();
+        const normalizedStart = normalizeUrl(startUrl);
+        this.baseHost = new URL(normalizedStart).hostname;
+        this.visited.clear();
+        this.queue = [{ url: normalizedStart, depth: 0 }];
+        this.results = [];
+        this.errors = [];
+        this.running = true;
+        this.aborted = false;
+        while (this.queue.length > 0 && !this.aborted) {
+            if (this.results.length >= this.options.maxPages) {
+                break;
+            }
+            const batch = [];
+            while (batch.length < this.options.concurrency && this.queue.length > 0) {
+                const item = this.queue.shift();
+                const normalized = normalizeUrl(item.url);
+                if (this.visited.has(normalized)) {
+                    continue;
+                }
+                if (item.depth > this.options.maxDepth) {
+                    continue;
+                }
+                this.visited.add(normalized);
+                batch.push({ ...item, url: normalized });
+            }
+            if (batch.length === 0) {
+                continue;
+            }
+            await Promise.all(batch.map(item => this.crawlPage(item)));
+            if (this.options.delay > 0 && this.queue.length > 0) {
+                await sleep(this.options.delay);
+            }
+        }
+        this.running = false;
+        return {
+            startUrl: normalizedStart,
+            pages: this.results,
+            visited: this.visited,
+            duration: Math.round(performance.now() - startTime),
+            errors: this.errors,
+        };
+    }
+    async crawlPage(item) {
+        const startTime = performance.now();
+        this.options.onProgress?.({
+            crawled: this.results.length,
+            queued: this.queue.length,
+            total: this.visited.size,
+            currentUrl: item.url,
+            depth: item.depth,
+        });
+        try {
+            const response = await this.client.get(item.url);
+            const status = response.status;
+            const contentType = response.headers.get('content-type') || '';
+            if (!contentType.includes('text/html')) {
+                return;
+            }
+            const html = await response.text();
+            const doc = await ScrapeDocument.create(html, { baseUrl: item.url });
+            const title = doc.selectFirst('title').text() || '';
+            const links = doc.links({ absolute: true });
+            const result = {
+                url: item.url,
+                status,
+                title,
+                depth: item.depth,
+                links,
+                duration: Math.round(performance.now() - startTime),
+            };
+            this.results.push(result);
+            this.options.onPage?.(result);
+            for (const link of links) {
+                if (!link.href)
+                    continue;
+                const normalized = normalizeUrl(link.href);
+                if (this.visited.has(normalized))
+                    continue;
+                if (!shouldCrawl(normalized, this.baseHost, this.options))
+                    continue;
+                this.queue.push({
+                    url: normalized,
+                    depth: item.depth + 1,
+                });
+            }
+        }
+        catch (error) {
+            const errorResult = {
+                url: item.url,
+                status: 0,
+                title: '',
+                depth: item.depth,
+                links: [],
+                duration: Math.round(performance.now() - startTime),
+                error: error.message,
+            };
+            this.results.push(errorResult);
+            this.errors.push({ url: item.url, error: error.message });
+            this.options.onPage?.(errorResult);
+        }
+    }
+    abort() {
+        this.aborted = true;
+    }
+    isRunning() {
+        return this.running;
+    }
+    getProgress() {
+        return {
+            crawled: this.results.length,
+            queued: this.queue.length,
+            total: this.visited.size,
+            currentUrl: '',
+            depth: 0,
+        };
+    }
+}
+export async function spider(url, options) {
+    const s = new Spider(options);
+    return s.crawl(url);
+}

package/dist/seo/analyzer.js CHANGED Viewed

@@ -72,6 +72,16 @@ export class SeoAnalyzer {
     buildRuleContext(data) {
         const { meta, og, twitter, jsonLd, headings, content, linkAnalysis, imageAnalysis, links } = data;
         const htmlLang = this.$('html').attr('lang');
+        const hreflangTags = [];
+        this.$('link[rel="alternate"][hreflang]').each((_, el) => {
+            const $el = this.$(el);
+            const lang = $el.attr('hreflang');
+            const href = $el.attr('href');
+            if (lang && href) {
+                hreflangTags.push({ lang, href });
+            }
+        });
+        const ogLocale = this.$('meta[property="og:locale"]').attr('content');
         const genericTexts = SEO_THRESHOLDS.links.genericTexts;
         const genericTextLinks = links.filter((l) => {
             const text = l.text?.toLowerCase().trim();
@@ -196,6 +206,8 @@ export class SeoAnalyzer {
             titleMatchesH1: meta.title && h1Text ? meta.title.toLowerCase().trim() === h1Text.toLowerCase().trim() : undefined,
             ...this.analyzeUrlQuality(),
             ...this.analyzeJsRendering(content),
+            hreflangTags: hreflangTags.length > 0 ? hreflangTags : undefined,
+            ogLocale,
         };
     }
     analyzeUrlQuality() {