khoji 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +136 -0
  2. package/dist/ai/GeminiAdapter.d.ts +7 -0
  3. package/dist/ai/GeminiAdapter.d.ts.map +1 -0
  4. package/dist/ai/GeminiAdapter.js +40 -0
  5. package/dist/ai/GeminiAdapter.js.map +1 -0
  6. package/dist/browser/BrowserManager.d.ts +17 -0
  7. package/dist/browser/BrowserManager.d.ts.map +1 -0
  8. package/dist/browser/BrowserManager.js +61 -0
  9. package/dist/browser/BrowserManager.js.map +1 -0
  10. package/dist/browser/PageLoader.d.ts +21 -0
  11. package/dist/browser/PageLoader.d.ts.map +1 -0
  12. package/dist/browser/PageLoader.js +116 -0
  13. package/dist/browser/PageLoader.js.map +1 -0
  14. package/dist/cli/index.d.ts +3 -0
  15. package/dist/cli/index.d.ts.map +1 -0
  16. package/dist/cli/index.js +98 -0
  17. package/dist/cli/index.js.map +1 -0
  18. package/dist/extractors/AnimationExtractor.d.ts +12 -0
  19. package/dist/extractors/AnimationExtractor.d.ts.map +1 -0
  20. package/dist/extractors/AnimationExtractor.js +247 -0
  21. package/dist/extractors/AnimationExtractor.js.map +1 -0
  22. package/dist/extractors/AssetExtractor.d.ts +11 -0
  23. package/dist/extractors/AssetExtractor.d.ts.map +1 -0
  24. package/dist/extractors/AssetExtractor.js +124 -0
  25. package/dist/extractors/AssetExtractor.js.map +1 -0
  26. package/dist/extractors/ContentExtractor.d.ts +13 -0
  27. package/dist/extractors/ContentExtractor.d.ts.map +1 -0
  28. package/dist/extractors/ContentExtractor.js +60 -0
  29. package/dist/extractors/ContentExtractor.js.map +1 -0
  30. package/dist/extractors/DomExtractor.d.ts +11 -0
  31. package/dist/extractors/DomExtractor.d.ts.map +1 -0
  32. package/dist/extractors/DomExtractor.js +68 -0
  33. package/dist/extractors/DomExtractor.js.map +1 -0
  34. package/dist/extractors/InteractionExtractor.d.ts +10 -0
  35. package/dist/extractors/InteractionExtractor.d.ts.map +1 -0
  36. package/dist/extractors/InteractionExtractor.js +64 -0
  37. package/dist/extractors/InteractionExtractor.js.map +1 -0
  38. package/dist/extractors/MetaExtractor.d.ts +8 -0
  39. package/dist/extractors/MetaExtractor.d.ts.map +1 -0
  40. package/dist/extractors/MetaExtractor.js +33 -0
  41. package/dist/extractors/MetaExtractor.js.map +1 -0
  42. package/dist/extractors/StyleExtractor.d.ts +10 -0
  43. package/dist/extractors/StyleExtractor.d.ts.map +1 -0
  44. package/dist/extractors/StyleExtractor.js +87 -0
  45. package/dist/extractors/StyleExtractor.js.map +1 -0
  46. package/dist/index.d.ts +6 -0
  47. package/dist/index.d.ts.map +1 -0
  48. package/dist/index.js +6 -0
  49. package/dist/index.js.map +1 -0
  50. package/dist/output/Writer.d.ts +5 -0
  51. package/dist/output/Writer.d.ts.map +1 -0
  52. package/dist/output/Writer.js +13 -0
  53. package/dist/output/Writer.js.map +1 -0
  54. package/dist/pipeline/Cleaner.d.ts +12 -0
  55. package/dist/pipeline/Cleaner.d.ts.map +1 -0
  56. package/dist/pipeline/Cleaner.js +41 -0
  57. package/dist/pipeline/Cleaner.js.map +1 -0
  58. package/dist/pipeline/ComponentDetector.d.ts +8 -0
  59. package/dist/pipeline/ComponentDetector.d.ts.map +1 -0
  60. package/dist/pipeline/ComponentDetector.js +43 -0
  61. package/dist/pipeline/ComponentDetector.js.map +1 -0
  62. package/dist/pipeline/runner.d.ts +3 -0
  63. package/dist/pipeline/runner.d.ts.map +1 -0
  64. package/dist/pipeline/runner.js +182 -0
  65. package/dist/pipeline/runner.js.map +1 -0
  66. package/dist/prompting/PromptGenerator.d.ts +5 -0
  67. package/dist/prompting/PromptGenerator.d.ts.map +1 -0
  68. package/dist/prompting/PromptGenerator.js +30 -0
  69. package/dist/prompting/PromptGenerator.js.map +1 -0
  70. package/dist/serializer/JsonSerializer.d.ts +6 -0
  71. package/dist/serializer/JsonSerializer.d.ts.map +1 -0
  72. package/dist/serializer/JsonSerializer.js +7 -0
  73. package/dist/serializer/JsonSerializer.js.map +1 -0
  74. package/dist/serializer/MarkdownSerializer.d.ts +7 -0
  75. package/dist/serializer/MarkdownSerializer.d.ts.map +1 -0
  76. package/dist/serializer/MarkdownSerializer.js +143 -0
  77. package/dist/serializer/MarkdownSerializer.js.map +1 -0
  78. package/dist/types/KhojContext.d.ts +141 -0
  79. package/dist/types/KhojContext.d.ts.map +1 -0
  80. package/dist/types/KhojContext.js +6 -0
  81. package/dist/types/KhojContext.js.map +1 -0
  82. package/dist/utils/logger.d.ts +15 -0
  83. package/dist/utils/logger.d.ts.map +1 -0
  84. package/dist/utils/logger.js +70 -0
  85. package/dist/utils/logger.js.map +1 -0
  86. package/dist/utils/text.d.ts +2 -0
  87. package/dist/utils/text.d.ts.map +1 -0
  88. package/dist/utils/text.js +6 -0
  89. package/dist/utils/text.js.map +1 -0
  90. package/dist/utils/tokenEstimator.d.ts +10 -0
  91. package/dist/utils/tokenEstimator.d.ts.map +1 -0
  92. package/dist/utils/tokenEstimator.js +17 -0
  93. package/dist/utils/tokenEstimator.js.map +1 -0
  94. package/khoj-context.schema.json +48 -0
  95. package/package.json +75 -0
package/README.md ADDED
@@ -0,0 +1,136 @@
1
+ # Khoj (खोज)
2
+
3
+ [![npm version](https://img.shields.io/npm/v/khoj.svg)](https://npmjs.org/package/khoj)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
5
+
6
+ **Khoj** is a focused, open-source CLI tool and Node.js package that visits any public URL and extracts only the meaningful layers of the website — outputting a compact `khoj-context.json` specifically designed for AI agents (like Gemini, Claude, and GPT-4).
7
+
8
+ Raw HTML is noisy and wastes LLM tokens. Khoj solves the **token bloat problem** by stripping the noise and feeding your agent exactly what it needs to understand the page.
9
+
10
+ ## Features
11
+
12
+ - **Token Efficient**: Reduces raw HTML token size by up to 90%.
13
+ - **Design Tokens**: Automatically extracts CSS custom properties (colors, spacing, fonts).
14
+ - **Animation Aware**: Detects CSS animations, GSAP timelines, Framer Motion, AOS, and infers the purpose of GIFs.
15
+ - **Semantic DOM**: Provides a clean, depth-capped, text-truncated structural tree.
16
+ - **Component Detection**: Automatically flags repeating patterns (e.g., Cards, ListItems).
17
+ - **Interactive Map**: Extracts forms, fields, and navigation menus.
18
+ - **Clone Mode**: Extracts a full-page PNG screenshot, the raw HTML, and a concatenated CSS file for pixel-perfect AI reproduction.
19
+ - **Gemini Native**: Built-in `--send-to-gemini` flag to pipe context straight to an LLM.
20
+
21
+ ## Installation & Usage
22
+
23
+ Khoj can be run instantly, or installed either globally or locally to suit your workflow.
24
+
25
+ ### 1. Run Instantly (No Install)
26
+ If you don't want to install anything, you can run Khoj directly using `npx`:
27
+ ```bash
28
+ npx khoji https://example.com --send-to-gemini --prompt "Identify all primary call-to-action buttons."
29
+ ```
30
+
31
+ ### 3. Install as a Dev Dependency
32
+ If you are building an AI project and want Khoj locally:
33
+ ```bash
34
+ npm install -D khoji
35
+ npx khoji https://example.com
36
+ ```
37
+
38
+ ### Bypassing "Click to Enter" Preloaders
39
+ Many high-end award-winning sites hide their entire layout behind an initial "Click to Enter" or "Start Experience" overlay screen. If you extract the site normally, you will only capture the loader screen.
40
+
41
+ To bypass this natively, inspect the website to find the CSS Selector of the start button (e.g., `#enter-button` or `.preloader-enter`), and pass it to Khoj using the `--click` flag:
42
+
43
+ ```bash
44
+ npx khoji https://dich-fashion.webflow.io/ --clone --click ".preloader-enter"
45
+ ```
46
+ Khoj will automatically navigate to the site, wait for the overlay button, click it, wait for the intro animations to clear, and *then* run the full clone extraction of the underlying page!
47
+
48
+ ### 2. Install Globally
49
+ If you plan to use Khoj frequently from your terminal:
50
+ ```bash
51
+ npm install -g khoji
52
+ ```
53
+ Once installed globally, you can drop the `npx` prefix and just type:
54
+ ```bash
55
+ khoj https://example.com
56
+ ```
57
+
58
+ > **Tip:** If you just type `khoj` or `npx khoj` in your terminal without any URL, it will print out the full help menu and list all available options.
59
+
60
+ ### What happens next?
61
+ Whichever way you run it, Khoj will create an `output/` folder in your **current working directory**. Inside that folder, you will find a subdirectory named after the website (e.g., `output/example.com/`).
62
+
63
+ You can then manually drag and drop these generated files (`khoj-context.json` or `khoj-context.md`) into ChatGPT, Claude, Cursor, or any other AI coding agent as highly-efficient context!
64
+
65
+ ### Options
66
+
67
+ ```bash
68
+ Usage: khoj <url> [options]
69
+
70
+ Extract token-efficient website context for AI agents
71
+
72
+ Arguments:
73
+ url Target URL to extract context from
74
+
75
+ Options:
76
+ -o, --output <dir> Output directory (default: "./output")
77
+ -f, --format <type> Output format: json | markdown | both (default: "both")
78
+ -t, --timeout <ms> Page load timeout in milliseconds (default: "30000")
79
+ --fast Fast mode: skip image loading (reduces extraction time)
80
+ --clone Clone mode: Extract full-page screenshot, raw HTML, and CSS
81
+ --send-to-gemini Send output to Gemini API after extraction
82
+ --prompt <text> Custom instruction to send to Gemini along with context
83
+ -V, --version output the version number
84
+ -h, --help display help for command
85
+ ```
86
+
87
+ ## Output Structure
88
+
89
+ All extracted data is automatically placed in a subdirectory named after the target domain (e.g., `./output/stripe.com/`).
90
+
91
+ ### Programmatic API
92
+
93
+ You can use Khoj within your own Node.js or TypeScript projects:
94
+
95
+ ```bash
96
+ npm install khoj playwright
97
+ ```
98
+
99
+ ```typescript
100
+ import { runExtraction } from 'khoj';
101
+
102
+ await runExtraction({
103
+ url: 'https://example.com',
104
+ outputDir: './context',
105
+ format: 'json',
106
+ timeout: 30000,
107
+ fast: false
108
+ });
109
+ ```
110
+
111
+ ## Output Structure
112
+
113
+ Khoj produces a structured JSON output (and an optional Markdown summary). See the [JSON Schema definition](./khoj-context.schema.json).
114
+
115
+ Key sections in `khoj-context.json`:
116
+ - `meta`: Page title, OpenGraph image, theme-color, JSON-LD
117
+ - `structure`: Cleaned semantic tree
118
+ - `designTokens`: Colors, fonts, typography, spacing, breakpoints
119
+ - `components`: Detected repeating UI patterns
120
+ - `assets`: Images, isolated GIFs, fonts, icons, external scripts
121
+ - `content`: Extracted headings, buttons, and text blocks
122
+ - `interactions`: Actionable forms and nav menus
123
+ - `animations`: CSS keyframes, transitions, JS libraries (GSAP, Framer), and GIF intents.
124
+
125
+ ### Clone Mode Artifacts
126
+ When using the `--clone` flag, three additional raw files are saved directly into the domain folder:
127
+ - **`khoj-clone-YYYY-MM-DD.png`**: A full-page visual screenshot captured by Playwright.
128
+ - **`khoj-clone-YYYY-MM-DD.html`**: The fully hydrated, raw HTML source code.
129
+ - **`khoj-clone-YYYY-MM-DD.css`**: All styling rules needed for pixel-perfect cloning (combines inline `<style>` and external `<link rel="stylesheet">` tags).
130
+
131
+ ## Requirements
132
+ - Node.js >= 18
133
+ - Playwright (installed automatically)
134
+
135
+ ## License
136
+ MIT
@@ -0,0 +1,7 @@
1
+ import type { KhojContext } from '../types/KhojContext.js';
2
+ /**
3
+ * Sends the extracted KhojContext to Google Gemini with an optional user prompt.
4
+ * Streams the model response to stdout.
5
+ */
6
+ export declare function sendToGemini(ctx: KhojContext, prompt?: string): Promise<void>;
7
+ //# sourceMappingURL=GeminiAdapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GeminiAdapter.d.ts","sourceRoot":"","sources":["../../src/ai/GeminiAdapter.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAI3D;;;GAGG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,WAAW,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAmCnF"}
@@ -0,0 +1,40 @@
1
+ import { GoogleGenerativeAI } from '@google/generative-ai';
2
+ import { logger } from '../utils/logger.js';
3
+ const DEFAULT_MODEL = 'gemini-1.5-flash';
4
+ /**
5
+ * Sends the extracted KhojContext to Google Gemini with an optional user prompt.
6
+ * Streams the model response to stdout.
7
+ */
8
+ export async function sendToGemini(ctx, prompt) {
9
+ const apiKey = process.env['GEMINI_API_KEY'];
10
+ if (!apiKey) {
11
+ logger.error('GEMINI_API_KEY is not set. Add it to your .env file.');
12
+ return;
13
+ }
14
+ const model = process.env['GEMINI_MODEL'] ?? DEFAULT_MODEL;
15
+ const genAI = new GoogleGenerativeAI(apiKey);
16
+ const geminiModel = genAI.getGenerativeModel({ model });
17
+ const systemContext = JSON.stringify(ctx, null, 2);
18
+ const userPrompt = prompt ?? 'Summarise this website and suggest how to replicate it with modern web technologies.';
19
+ const fullPrompt = `You are a professional web developer assistant. Below is a structured JSON snapshot of a website, extracted by the Khoj tool. Use this to answer the user's request.
20
+
21
+ <site-context>
22
+ ${systemContext}
23
+ </site-context>
24
+
25
+ User request: ${userPrompt}`;
26
+ logger.step('🤖', `Sending to Gemini (${model})...`);
27
+ try {
28
+ const result = await geminiModel.generateContentStream(fullPrompt);
29
+ process.stdout.write('\n');
30
+ for await (const chunk of result.stream) {
31
+ process.stdout.write(chunk.text());
32
+ }
33
+ process.stdout.write('\n\n');
34
+ logger.success('Gemini response complete');
35
+ }
36
+ catch (err) {
37
+ logger.error('Gemini API request failed', err);
38
+ }
39
+ }
40
+ //# sourceMappingURL=GeminiAdapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"GeminiAdapter.js","sourceRoot":"","sources":["../../src/ai/GeminiAdapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,MAAM,aAAa,GAAG,kBAAkB,CAAC;AAEzC;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,GAAgB,EAAE,MAAe;IAChE,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC7C,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,CAAC,KAAK,CAAC,sDAAsD,CAAC,CAAC;QACrE,OAAO;IACX,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,aAAa,CAAC;IAC3D,MAAM,KAAK,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,KAAK,CAAC,kBAAkB,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;IAExD,MAAM,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;IACnD,MAAM,UAAU,GAAG,MAAM,IAAI,sFAAsF,CAAC;IAEpH,MAAM,UAAU,GAAG;;;EAGrB,aAAa;;;gBAGC,UAAU,EAAE,CAAC;IAEzB,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,sBAAsB,KAAK,MAAM,CAAC,CAAC;IAErD,IAAI,CAAC;QACD,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACnE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC3B,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YACtC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QACvC,CAAC;QACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC7B,MAAM,CAAC,OAAO,CAAC,0BAA0B,CAAC,CAAC;IAC/C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,GAAG,CAAC,CAAC;IACnD,CAAC;AACL,CAAC"}
@@ -0,0 +1,17 @@
1
+ import { type Browser, type BrowserContext } from 'playwright';
2
+ export type LoadMode = 'full' | 'fast';
3
+ export interface BrowserSession {
4
+ browser: Browser;
5
+ context: BrowserContext;
6
+ }
7
+ /**
8
+ * Manages the Playwright browser lifecycle for a single extraction run.
9
+ * Each run gets an isolated browser context — no shared cookies, storage, or state.
10
+ */
11
+ export declare class BrowserManager {
12
+ private browser;
13
+ private context;
14
+ launch(mode?: LoadMode): Promise<BrowserContext>;
15
+ close(): Promise<void>;
16
+ }
17
+ //# sourceMappingURL=BrowserManager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BrowserManager.d.ts","sourceRoot":"","sources":["../../src/browser/BrowserManager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAY,KAAK,OAAO,EAAE,KAAK,cAAc,EAAE,MAAM,YAAY,CAAC;AAGzE,MAAM,MAAM,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAEvC,MAAM,WAAW,cAAc;IAC3B,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,cAAc,CAAC;CAC3B;AAED;;;GAGG;AACH,qBAAa,cAAc;IACvB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA+B;IAExC,MAAM,CAAC,IAAI,GAAE,QAAiB,GAAG,OAAO,CAAC,cAAc,CAAC;IA8CxD,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAW/B"}
@@ -0,0 +1,61 @@
1
+ import { chromium } from 'playwright';
2
+ import { logger } from '../utils/logger.js';
3
+ /**
4
+ * Manages the Playwright browser lifecycle for a single extraction run.
5
+ * Each run gets an isolated browser context — no shared cookies, storage, or state.
6
+ */
7
+ export class BrowserManager {
8
+ browser = null;
9
+ context = null;
10
+ async launch(mode = 'fast') {
11
+ logger.step('🌐', 'Launching browser...');
12
+ this.browser = await chromium.launch({
13
+ headless: true,
14
+ args: [
15
+ '--no-sandbox',
16
+ '--disable-setuid-sandbox',
17
+ '--disable-dev-shm-usage',
18
+ '--disable-gpu',
19
+ '--no-zygote',
20
+ ],
21
+ });
22
+ this.context = await this.browser.newContext({
23
+ userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
24
+ locale: 'en-US',
25
+ timezoneId: 'America/New_York',
26
+ viewport: { width: 1440, height: 900 },
27
+ javaScriptEnabled: true,
28
+ ignoreHTTPSErrors: true,
29
+ extraHTTPHeaders: {
30
+ 'Accept-Language': 'en-US,en;q=0.9',
31
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
32
+ },
33
+ });
34
+ // Block heavy resources in fast mode to speed up load
35
+ if (mode === 'fast') {
36
+ await this.context.route('**/*', (route) => {
37
+ const resourceType = route.request().resourceType();
38
+ // Allow documents and scripts (needed for JS-rendered content)
39
+ // Block media, fonts, and stylesheets for speed
40
+ if (['media', 'font', 'websocket', 'eventsource'].includes(resourceType)) {
41
+ return route.abort();
42
+ }
43
+ return route.continue();
44
+ });
45
+ }
46
+ logger.success('Browser ready');
47
+ return this.context;
48
+ }
49
+ async close() {
50
+ if (this.context) {
51
+ await this.context.close().catch(() => undefined);
52
+ this.context = null;
53
+ }
54
+ if (this.browser) {
55
+ await this.browser.close().catch(() => undefined);
56
+ this.browser = null;
57
+ }
58
+ logger.dim('Browser closed');
59
+ }
60
+ }
61
+ //# sourceMappingURL=BrowserManager.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"BrowserManager.js","sourceRoot":"","sources":["../../src/browser/BrowserManager.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAqC,MAAM,YAAY,CAAC;AACzE,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAS5C;;;GAGG;AACH,MAAM,OAAO,cAAc;IACf,OAAO,GAAmB,IAAI,CAAC;IAC/B,OAAO,GAA0B,IAAI,CAAC;IAE9C,KAAK,CAAC,MAAM,CAAC,OAAiB,MAAM;QAChC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,sBAAsB,CAAC,CAAC;QAE1C,IAAI,CAAC,OAAO,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC;YACjC,QAAQ,EAAE,IAAI;YACd,IAAI,EAAE;gBACF,cAAc;gBACd,0BAA0B;gBAC1B,yBAAyB;gBACzB,eAAe;gBACf,aAAa;aAChB;SACJ,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC;YACzC,SAAS,EACL,uHAAuH;YAC3H,MAAM,EAAE,OAAO;YACf,UAAU,EAAE,kBAAkB;YAC9B,QAAQ,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE;YACtC,iBAAiB,EAAE,IAAI;YACvB,iBAAiB,EAAE,IAAI;YACvB,gBAAgB,EAAE;gBACd,iBAAiB,EAAE,gBAAgB;gBACnC,MAAM,EACF,uFAAuF;aAC9F;SACJ,CAAC,CAAC;QAEH,sDAAsD;QACtD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YAClB,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;gBACvC,MAAM,YAAY,GAAG,KAAK,CAAC,OAAO,EAAE,CAAC,YAAY,EAAE,CAAC;gBACpD,+DAA+D;gBAC/D,gDAAgD;gBAChD,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,aAAa,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC;oBACvE,OAAO,KAAK,CAAC,KAAK,EAAE,CAAC;gBACzB,CAAC;gBACD,OAAO,KAAK,CAAC,QAAQ,EAAE,CAAC;YAC5B,CAAC,CAAC,CAAC;QACP,CAAC;QAED,MAAM,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC;QAChC,OAAO,IAAI,CAAC,OAAO,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,KAAK;QACP,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAClD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACxB,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,MAAM,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YAClD,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACxB,CAAC;QACD,MAAM,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IACjC,CAAC;CACJ"}
@@ -0,0 +1,21 @@
1
+ import type { BrowserContext, Page } from 'playwright';
2
+ export interface LoadResult {
3
+ page: Page;
4
+ finalUrl: string;
5
+ statusCode: number | null;
6
+ loadTime: number;
7
+ }
8
+ /**
9
+ * Loads a URL into a new Playwright page with:
10
+ * - networkidle wait strategy (all network activity settles)
11
+ * - configurable timeout
12
+ * - redirect tracking
13
+ * - graceful failure with partial result
14
+ */
15
+ export declare class PageLoader {
16
+ private readonly context;
17
+ private readonly timeoutMs;
18
+ constructor(context: BrowserContext, timeoutMs?: number);
19
+ load(url: string, clickSelector?: string): Promise<LoadResult>;
20
+ }
21
+ //# sourceMappingURL=PageLoader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PageLoader.d.ts","sourceRoot":"","sources":["../../src/browser/PageLoader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAGvD,MAAM,WAAW,UAAU;IACvB,IAAI,EAAE,IAAI,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,QAAQ,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;GAMG;AACH,qBAAa,UAAU;IAEf,OAAO,CAAC,QAAQ,CAAC,OAAO;IACxB,OAAO,CAAC,QAAQ,CAAC,SAAS;gBADT,OAAO,EAAE,cAAc,EACvB,SAAS,GAAE,MAAe;IAGzC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,aAAa,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC;CAgHvE"}
@@ -0,0 +1,116 @@
1
+ import { logger } from '../utils/logger.js';
2
+ /**
3
+ * Loads a URL into a new Playwright page with:
4
+ * - networkidle wait strategy (all network activity settles)
5
+ * - configurable timeout
6
+ * - redirect tracking
7
+ * - graceful failure with partial result
8
+ */
9
+ export class PageLoader {
10
+ context;
11
+ timeoutMs;
12
+ constructor(context, timeoutMs = 30_000) {
13
+ this.context = context;
14
+ this.timeoutMs = timeoutMs;
15
+ }
16
+ async load(url, clickSelector) {
17
+ const page = await this.context.newPage();
18
+ const start = Date.now();
19
+ // Track GIF/image responses for the asset extractor
20
+ page.on('response', (response) => {
21
+ const contentType = response.headers()['content-type'] ?? '';
22
+ const reqUrl = response.url();
23
+ if (contentType.includes('image/gif') || reqUrl.endsWith('.gif')) {
24
+ // Store on page for later retrieval by AssetExtractor
25
+ page.evaluate((u) => {
26
+ window.__khoj_gifs__ = [
27
+ ...(window.__khoj_gifs__ ?? []),
28
+ u,
29
+ ];
30
+ }, reqUrl).catch(() => undefined);
31
+ }
32
+ });
33
+ let statusCode = null;
34
+ try {
35
+ const response = await page.goto(url, {
36
+ waitUntil: 'networkidle',
37
+ timeout: this.timeoutMs,
38
+ });
39
+ statusCode = response?.status() ?? null;
40
+ if (statusCode !== null && statusCode >= 400) {
41
+ logger.warn(`Server responded with HTTP ${statusCode} for ${url}`);
42
+ }
43
+ // Handle Click-to-Enter Preloaders
44
+ if (clickSelector) {
45
+ logger.step('🖱️', `Found --click flag. Waiting for and clicking: ${clickSelector}`);
46
+ try {
47
+ await page.waitForSelector(clickSelector, { timeout: 10000 });
48
+ await page.click(clickSelector);
49
+ // Wait 3 seconds for intro animations/overlays to fade out
50
+ await page.waitForTimeout(3000);
51
+ }
52
+ catch (e) {
53
+ logger.warn(`Failed to click selector "${clickSelector}". Proceeding anyway.`);
54
+ }
55
+ }
56
+ // Auto-scroll the page to trigger lazy-loaded images and intersection observers (scroll animations)
57
+ // We use native mouse.wheel events here instead of window.scrollBy because award-winning
58
+ // websites often use Virtual Scroll libraries (Locomotive, Lenis) that ONLY respond to real WheelEvents.
59
+ logger.step('⏬', 'Scrolling page to trigger GSAP/Virtual-Scroll animations & lazy-loading...');
60
+ // Move mouse to center of screen to ensure wheel events are captured by the main body
61
+ const viewport = page.viewportSize();
62
+ if (viewport) {
63
+ await page.mouse.move(viewport.width / 2, viewport.height / 2);
64
+ }
65
+ let previousScrollY = -1;
66
+ let unchangedCount = 0;
67
+ const maxScrolls = 50;
68
+ for (let i = 0; i < maxScrolls; i++) {
69
+ await page.mouse.wheel(0, 400); // 400px per scroll tick
70
+ // Wait 150ms for smooth scroll momentum (Lenis/Locomotive) AND animations to render
71
+ await page.waitForTimeout(150);
72
+ const scrollData = await page.evaluate(() => {
73
+ return {
74
+ scrollY: window.scrollY,
75
+ scrollHeight: document.body.scrollHeight,
76
+ innerHeight: window.innerHeight
77
+ };
78
+ });
79
+ if (scrollData.scrollY === previousScrollY) {
80
+ unchangedCount++;
81
+ // If height hasn't changed for 5 ticks, we hit bottom or scroll is fully hijacked
82
+ if (unchangedCount > 5)
83
+ break;
84
+ }
85
+ else {
86
+ unchangedCount = 0;
87
+ }
88
+ previousScrollY = scrollData.scrollY;
89
+ // Break if we natively hit the bottom bounds
90
+ if (scrollData.scrollY + scrollData.innerHeight >= scrollData.scrollHeight - 10) {
91
+ break;
92
+ }
93
+ }
94
+ // Scroll instantly back to top so screenshot looks normal
95
+ await page.evaluate(() => window.scrollTo(0, 0));
96
+ // Extra settle time for single-page apps running animations or deferred renders
97
+ await page.waitForTimeout(1000);
98
+ const loadTime = Date.now() - start;
99
+ const finalUrl = page.url();
100
+ logger.success(`Page loaded in ${(loadTime / 1000).toFixed(2)}s → ${finalUrl}`);
101
+ return { page, finalUrl, statusCode, loadTime };
102
+ }
103
+ catch (err) {
104
+ const loadTime = Date.now() - start;
105
+ if (err instanceof Error && err.message.includes('timeout')) {
106
+ logger.warn(`Page load timed out after ${this.timeoutMs / 1000}s — continuing with partial content`);
107
+ }
108
+ else {
109
+ logger.error('Failed to load page', err);
110
+ throw err;
111
+ }
112
+ return { page, finalUrl: page.url() || url, statusCode, loadTime };
113
+ }
114
+ }
115
+ }
116
+ //# sourceMappingURL=PageLoader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PageLoader.js","sourceRoot":"","sources":["../../src/browser/PageLoader.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAS5C;;;;;;GAMG;AACH,MAAM,OAAO,UAAU;IAEE;IACA;IAFrB,YACqB,OAAuB,EACvB,YAAoB,MAAM;QAD1B,YAAO,GAAP,OAAO,CAAgB;QACvB,cAAS,GAAT,SAAS,CAAiB;IAC3C,CAAC;IAEL,KAAK,CAAC,IAAI,CAAC,GAAW,EAAE,aAAsB;QAC1C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,oDAAoD;QACpD,IAAI,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,QAAQ,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,QAAQ,CAAC,OAAO,EAAE,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC;YAC7D,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,EAAE,CAAC;YAC9B,IAAI,WAAW,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/D,sDAAsD;gBACtD,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE;oBACf,MAAgD,CAAC,aAAa,GAAG;wBAC9D,GAAG,CAAE,MAAgD,CAAC,aAAa,IAAI,EAAE,CAAC;wBAC1E,CAAC;qBACJ,CAAC;gBACN,CAAC,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC;YACtC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,IAAI,UAAU,GAAkB,IAAI,CAAC;QAErC,IAAI,CAAC;YACD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE;gBAClC,SAAS,EAAE,aAAa;gBACxB,OAAO,EAAE,IAAI,CAAC,SAAS;aAC1B,CAAC,CAAC;YAEH,UAAU,GAAG,QAAQ,EAAE,MAAM,EAAE,IAAI,IAAI,CAAC;YAExC,IAAI,UAAU,KAAK,IAAI,IAAI,UAAU,IAAI,GAAG,EAAE,CAAC;gBAC3C,MAAM,CAAC,IAAI,CAAC,8BAA8B,UAAU,QAAQ,GAAG,EAAE,CAAC,CAAC;YACvE,CAAC;YAED,mCAAmC;YACnC,IAAI,aAAa,EAAE,CAAC;gBAChB,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,iDAAiD,aAAa,EAAE,CAAC,CAAC;gBACrF,IAAI,CAAC;oBACD,MAAM,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC,CAAC;oBAC9D,MAAM,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;oBAChC,2DAA2D;oBAC3D,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;gBACpC,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACT,MAAM,CAAC,IAAI,CAAC,6BAA6B,aAAa,uBAAuB,CAAC,CAAC;gBACnF,CAAC;YACL,CAAC;YAED,oGAAoG;YACpG,0FAA0F;YAC1F,yGAAyG;YACzG,MAAM,CAAC,IAAI,CAAC,GAAG,EAAE,4EAA4E,CAAC,CAAC;YAE/F,sFAAsF;YACtF,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;YACrC,IAAI,QAAQ,EAAE,CAAC;gBACX,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,GAAG,CAAC,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACnE,CAAC;YAED,IAAI,eAAe,GAAG,CAAC,CAAC,CAAC;YACzB,IAAI,cAAc,GAAG,CAAC,CAAC;YACvB,MAAM,UAAU,GAAG,EAAE,CAAC;YAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,wBAAwB;gBACxD,oFAAoF;gBACpF,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;gBAE/B,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;oBACxC,OAAO;wBACH,OAAO,EAAE,MAAM,CAAC,OAAO;wBACvB,YAAY,EAAE,QAAQ,CAAC,IAAI,CAAC,YAAY;wBACxC,WAAW,EAAE,MAAM,CAAC,WAAW;qBAClC,CAAC;gBACN,CAAC,CAAC,CAAC;gBAEH,IAAI,UAAU,CAAC,OAAO,KAAK,eAAe,EAAE,CAAC;oBACzC,cAAc,EAAE,CAAC;oBACjB,kFAAkF;oBAClF,IAAI,cAAc,GAAG,CAAC;wBAAE,MAAM;gBAClC,CAAC;qBAAM,CAAC;oBACJ,cAAc,GAAG,CAAC,CAAC;gBACvB,CAAC;gBACD,eAAe,GAAG,UAAU,CAAC,OAAO,CAAC;gBAErC,6CAA6C;gBAC7C,IAAI,UAAU,CAAC,OAAO,GAAG,UAAU,CAAC,WAAW,IAAI,UAAU,CAAC,YAAY,GAAG,EAAE,EAAE,CAAC;oBAC9E,MAAM;gBACV,CAAC;YACL,CAAC;YAED,0DAA0D;YAC1D,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAEjD,gFAAgF;YAChF,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;YAEhC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAE5B,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,QAAQ,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,QAAQ,EAAE,CAAC,CAAC;YAEhF,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;QACpD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;YACpC,IAAI,GAAG,YAAY,KAAK,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC1D,MAAM,CAAC,IAAI,CAAC,6BAA6B,IAAI,CAAC,SAAS,GAAG,IAAI,qCAAqC,CAAC,CAAC;YACzG,CAAC;iBAAM,CAAC;gBACJ,MAAM,CAAC,KAAK,CAAC,qBAAqB,EAAE,GAAG,CAAC,CAAC;gBACzC,MAAM,GAAG,CAAC;YACd,CAAC;YACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,GAAG,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC;QACvE,CAAC;IACL,CAAC;CACJ"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ import 'dotenv/config';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC"}
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env node
2
+ import 'dotenv/config';
3
+ import { Command } from 'commander';
4
+ import chalk from 'chalk';
5
+ import { confirm, checkbox } from '@inquirer/prompts';
6
+ import { logger } from '../utils/logger.js';
7
+ import { runExtraction } from '../pipeline/runner.js';
8
+ const program = new Command();
9
+ program
10
+ .name('khoj')
11
+ .description('Extract token-efficient website context for AI agents')
12
+ .version('2.1.4')
13
+ .argument('<url>', 'Target URL to extract context from')
14
+ .option('-o, --output <dir>', 'Output directory', './output')
15
+ .option('-f, --format <type>', 'Output format: json | markdown | both', 'both')
16
+ .option('-t, --timeout <ms>', 'Page load timeout in milliseconds', '30000')
17
+ .option('--send-to-gemini', 'Send output to Gemini API after extraction')
18
+ .option('--prompt <text>', 'Custom instruction to send to Gemini along with context')
19
+ .option('--fast', 'Fast mode: skip image loading (reduces extraction time)')
20
+ .option('--clone', 'Clone mode: Extract full-page screenshot, raw HTML, and CSS')
21
+ .option('--click <selector>', 'CSS selector of an element to click before extraction (useful for "Enter Site" preloaders)')
22
+ .action(async (url, options) => {
23
+ logger.banner();
24
+ // Validator — must be a valid URL
25
+ try {
26
+ new URL(url);
27
+ }
28
+ catch {
29
+ logger.error(`Invalid URL provided: ${url}`);
30
+ logger.error(`Please ensure the URL includes http:// or https:// (e.g., https://${url})`);
31
+ process.exit(1);
32
+ }
33
+ logger.step('🚀', `Starting Khoj extraction for ${chalk.cyan(url)}`);
34
+ let cloneSkills = undefined;
35
+ if (options.clone) {
36
+ const wantsPrompt = await confirm({
37
+ message: 'Do you want to generate a custom AI instruction prompt for this clone?',
38
+ default: true
39
+ });
40
+ if (wantsPrompt) {
41
+ const selections = await checkbox({
42
+ message: 'Select the guidelines the AI should follow when rebuilding this site (Press <space> to select):',
43
+ choices: [
44
+ { name: '★ All of the above', value: 'all' },
45
+ { name: 'Frontend Design (Avoid cliché AI traits)', value: 'frontend-design' },
46
+ { name: 'SEO Best Practices', value: 'seo-audit' },
47
+ { name: 'Web Design Guidelines (a11y, contrast)', value: 'web-design-guidelines' },
48
+ { name: 'Award-Winning Site (3D, GSAP, etc.)', value: 'award-winning-website' }
49
+ ]
50
+ });
51
+ if (selections.includes('all')) {
52
+ cloneSkills = ['frontend-design', 'seo-audit', 'web-design-guidelines', 'award-winning-website'];
53
+ }
54
+ else {
55
+ // Safe cast since the only non-CloneSkill option is 'all'
56
+ cloneSkills = selections;
57
+ }
58
+ }
59
+ }
60
+ const extractOpts = {
61
+ url,
62
+ outputDir: options.output,
63
+ format: options.format, // Type assertion handled by options type
64
+ timeout: parseInt(options.timeout, 10),
65
+ fast: options.fast ?? false, // Ensure fast is boolean
66
+ clone: options.clone,
67
+ cloneSkills: cloneSkills,
68
+ sendToGemini: options.sendToGemini,
69
+ prompt: options.prompt,
70
+ clickSelector: options.click,
71
+ };
72
+ // Validate format option
73
+ if (!['json', 'markdown', 'both'].includes(extractOpts.format)) {
74
+ logger.error('--format must be one of: json, markdown, both');
75
+ process.exit(1);
76
+ }
77
+ // Validate timeout option
78
+ if (isNaN(extractOpts.timeout) || extractOpts.timeout < 1000) {
79
+ logger.error('--timeout must be a number >= 1000 (ms)');
80
+ process.exit(1);
81
+ }
82
+ logger.step('🔎', `Analysing: ${url}`);
83
+ logger.divider();
84
+ try {
85
+ // Will be wired in Phase 4 after all extractors are built
86
+ await runExtraction(extractOpts);
87
+ }
88
+ catch (err) {
89
+ logger.error('Extraction failed', err);
90
+ process.exit(1);
91
+ }
92
+ });
93
+ if (!process.argv.slice(2).length) {
94
+ program.outputHelp();
95
+ process.exit(0);
96
+ }
97
+ program.parseAsync(process.argv);
98
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AACtD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAItD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACF,IAAI,CAAC,MAAM,CAAC;KACZ,WAAW,CAAC,uDAAuD,CAAC;KACpE,OAAO,CAAC,OAAO,CAAC;KAChB,QAAQ,CAAC,OAAO,EAAE,oCAAoC,CAAC;KACvD,MAAM,CAAC,oBAAoB,EAAE,kBAAkB,EAAE,UAAU,CAAC;KAC5D,MAAM,CAAC,qBAAqB,EAAE,uCAAuC,EAAE,MAAM,CAAC;KAC9E,MAAM,CAAC,oBAAoB,EAAE,mCAAmC,EAAE,OAAO,CAAC;KAC1E,MAAM,CAAC,kBAAkB,EAAE,4CAA4C,CAAC;KACxE,MAAM,CAAC,iBAAiB,EAAE,yDAAyD,CAAC;KACpF,MAAM,CAAC,QAAQ,EAAE,yDAAyD,CAAC;KAC3E,MAAM,CAAC,SAAS,EAAE,6DAA6D,CAAC;KAChF,MAAM,CAAC,oBAAoB,EAAE,4FAA4F,CAAC;KAC1H,MAAM,CAAC,KAAK,EAAE,GAAW,EAAE,OAS3B,EAAE,EAAE;IACD,MAAM,CAAC,MAAM,EAAE,CAAC;IAEhB,kCAAkC;IAClC,IAAI,CAAC;QACD,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IAAC,MAAM,CAAC;QACL,MAAM,CAAC,KAAK,CAAC,yBAAyB,GAAG,EAAE,CAAC,CAAC;QAC7C,MAAM,CAAC,KAAK,CAAC,qEAAqE,GAAG,GAAG,CAAC,CAAC;QAC1F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,gCAAgC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAErE,IAAI,WAAW,GAA6B,SAAS,CAAC;IAEtD,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;QAChB,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC;YAC9B,OAAO,EAAE,wEAAwE;YACjF,OAAO,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,IAAI,WAAW,EAAE,CAAC;YACd,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC;gBAC9B,OAAO,EAAE,iGAAiG;gBAC1G,OAAO,EAAE;oBACL,EAAE,IAAI,EAAE,oBAAoB,EAAE,KAAK,EAAE,KAAK,EAAE;oBAC5C,EAAE,IAAI,EAAE,0CAA0C,EAAE,KAAK,EAAE,iBAAiB,EAAE;oBAC9E,EAAE,IAAI,EAAE,oBAAoB,EAAE,KAAK,EAAE,WAAW,EAAE;oBAClD,EAAE,IAAI,EAAE,wCAAwC,EAAE,KAAK,EAAE,uBAAuB,EAAE;oBAClF,EAAE,IAAI,EAAE,qCAAqC,EAAE,KAAK,EAAE,uBAAuB,EAAE;iBAClF;aACJ,CAAC,CAAC;YAEH,IAAI,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7B,WAAW,GAAG,CAAC,iBAAiB,EAAE,WAAW,EAAE,uBAAuB,EAAE,uBAAuB,CAAC,CAAC;YACrG,CAAC;iBAAM,CAAC;gBACJ,0DAA0D;gBAC1D,WAAW,GAAG,UAA0B,CAAC;YAC7C,CAAC;QACL,CAAC;IACL,CAAC;IAED,MAAM,WAAW,GAAsB;QACnC,GAAG;QACH,SAAS,EAAE,OAAO,CAAC,MAAM;QACzB,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,yCAAyC;QACjE,OAAO,EAAE,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;QACtC,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,KAAK,EAAE,yBAAyB;QACtD,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,WAAW,EAAE,WAAW;QACxB,YAAY,EAAE,OAAO,CAAC,YAAY;QAClC,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,aAAa,EAAE,OAAO,CAAC,KAAK;KAC/B,CAAC;IAEF,yBAAyB;IACzB,IAAI,CAAC,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC;QAC7D,MAAM,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;QAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,0BAA0B;IAC1B,IAAI,KAAK,CAAC,WAAW,CAAC,OAAO,CAAC,IAAI,WAAW,CAAC,OAAO,GAAG,IAAI,EAAE,CAAC;QAC3D,MAAM,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;QACxD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,cAAc,GAAG,EAAE,CAAC,CAAC;IACvC,MAAM,CAAC,OAAO,EAAE,CAAC;IAEjB,IAAI,CAAC;QACD,0DAA0D;QAC1D,MAAM,aAAa,CAAC,WAAW,CAAC,CAAC;IACrC,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,MAAM,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;QACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;AACL,CAAC,CAAC,CAAC;AAEP,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;IAChC,OAAO,CAAC,UAAU,EAAE,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC;AAED,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -0,0 +1,12 @@
1
+ import type { Page } from 'playwright';
2
+ import type { AnimationMap, GifAnimationPurpose, ImageAsset } from '../types/KhojContext.js';
3
+ /**
4
+ * AnimationExtractor — 3-pass animation intelligence:
5
+ *
6
+ * Pass 1: CSS @keyframes + transitions (from document.styleSheets)
7
+ * Pass 2: JS animation library detection (GSAP, Framer Motion, AOS, Lottie, etc.)
8
+ * Pass 3: GIF purpose inference from context
9
+ */
10
+ export declare function extractAnimations(page: Page, gifs: ImageAsset[]): Promise<AnimationMap>;
11
+ export type { GifAnimationPurpose };
12
+ //# sourceMappingURL=AnimationExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AnimationExtractor.d.ts","sourceRoot":"","sources":["../../src/extractors/AnimationExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EACR,YAAY,EAMZ,mBAAmB,EACnB,UAAU,EACb,MAAM,yBAAyB,CAAC;AAIjC;;;;;;GAMG;AACH,wBAAsB,iBAAiB,CACnC,IAAI,EAAE,IAAI,EACV,IAAI,EAAE,UAAU,EAAE,GACnB,OAAO,CAAC,YAAY,CAAC,CAwBvB;AAmQD,YAAY,EAAE,mBAAmB,EAAE,CAAC"}