khoji 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +136 -0
  2. package/dist/ai/GeminiAdapter.d.ts +7 -0
  3. package/dist/ai/GeminiAdapter.d.ts.map +1 -0
  4. package/dist/ai/GeminiAdapter.js +40 -0
  5. package/dist/ai/GeminiAdapter.js.map +1 -0
  6. package/dist/browser/BrowserManager.d.ts +17 -0
  7. package/dist/browser/BrowserManager.d.ts.map +1 -0
  8. package/dist/browser/BrowserManager.js +61 -0
  9. package/dist/browser/BrowserManager.js.map +1 -0
  10. package/dist/browser/PageLoader.d.ts +21 -0
  11. package/dist/browser/PageLoader.d.ts.map +1 -0
  12. package/dist/browser/PageLoader.js +116 -0
  13. package/dist/browser/PageLoader.js.map +1 -0
  14. package/dist/cli/index.d.ts +3 -0
  15. package/dist/cli/index.d.ts.map +1 -0
  16. package/dist/cli/index.js +98 -0
  17. package/dist/cli/index.js.map +1 -0
  18. package/dist/extractors/AnimationExtractor.d.ts +12 -0
  19. package/dist/extractors/AnimationExtractor.d.ts.map +1 -0
  20. package/dist/extractors/AnimationExtractor.js +247 -0
  21. package/dist/extractors/AnimationExtractor.js.map +1 -0
  22. package/dist/extractors/AssetExtractor.d.ts +11 -0
  23. package/dist/extractors/AssetExtractor.d.ts.map +1 -0
  24. package/dist/extractors/AssetExtractor.js +124 -0
  25. package/dist/extractors/AssetExtractor.js.map +1 -0
  26. package/dist/extractors/ContentExtractor.d.ts +13 -0
  27. package/dist/extractors/ContentExtractor.d.ts.map +1 -0
  28. package/dist/extractors/ContentExtractor.js +60 -0
  29. package/dist/extractors/ContentExtractor.js.map +1 -0
  30. package/dist/extractors/DomExtractor.d.ts +11 -0
  31. package/dist/extractors/DomExtractor.d.ts.map +1 -0
  32. package/dist/extractors/DomExtractor.js +68 -0
  33. package/dist/extractors/DomExtractor.js.map +1 -0
  34. package/dist/extractors/InteractionExtractor.d.ts +10 -0
  35. package/dist/extractors/InteractionExtractor.d.ts.map +1 -0
  36. package/dist/extractors/InteractionExtractor.js +64 -0
  37. package/dist/extractors/InteractionExtractor.js.map +1 -0
  38. package/dist/extractors/MetaExtractor.d.ts +8 -0
  39. package/dist/extractors/MetaExtractor.d.ts.map +1 -0
  40. package/dist/extractors/MetaExtractor.js +33 -0
  41. package/dist/extractors/MetaExtractor.js.map +1 -0
  42. package/dist/extractors/StyleExtractor.d.ts +10 -0
  43. package/dist/extractors/StyleExtractor.d.ts.map +1 -0
  44. package/dist/extractors/StyleExtractor.js +87 -0
  45. package/dist/extractors/StyleExtractor.js.map +1 -0
  46. package/dist/index.d.ts +6 -0
  47. package/dist/index.d.ts.map +1 -0
  48. package/dist/index.js +6 -0
  49. package/dist/index.js.map +1 -0
  50. package/dist/output/Writer.d.ts +5 -0
  51. package/dist/output/Writer.d.ts.map +1 -0
  52. package/dist/output/Writer.js +13 -0
  53. package/dist/output/Writer.js.map +1 -0
  54. package/dist/pipeline/Cleaner.d.ts +12 -0
  55. package/dist/pipeline/Cleaner.d.ts.map +1 -0
  56. package/dist/pipeline/Cleaner.js +41 -0
  57. package/dist/pipeline/Cleaner.js.map +1 -0
  58. package/dist/pipeline/ComponentDetector.d.ts +8 -0
  59. package/dist/pipeline/ComponentDetector.d.ts.map +1 -0
  60. package/dist/pipeline/ComponentDetector.js +43 -0
  61. package/dist/pipeline/ComponentDetector.js.map +1 -0
  62. package/dist/pipeline/runner.d.ts +3 -0
  63. package/dist/pipeline/runner.d.ts.map +1 -0
  64. package/dist/pipeline/runner.js +182 -0
  65. package/dist/pipeline/runner.js.map +1 -0
  66. package/dist/prompting/PromptGenerator.d.ts +5 -0
  67. package/dist/prompting/PromptGenerator.d.ts.map +1 -0
  68. package/dist/prompting/PromptGenerator.js +30 -0
  69. package/dist/prompting/PromptGenerator.js.map +1 -0
  70. package/dist/serializer/JsonSerializer.d.ts +6 -0
  71. package/dist/serializer/JsonSerializer.d.ts.map +1 -0
  72. package/dist/serializer/JsonSerializer.js +7 -0
  73. package/dist/serializer/JsonSerializer.js.map +1 -0
  74. package/dist/serializer/MarkdownSerializer.d.ts +7 -0
  75. package/dist/serializer/MarkdownSerializer.d.ts.map +1 -0
  76. package/dist/serializer/MarkdownSerializer.js +143 -0
  77. package/dist/serializer/MarkdownSerializer.js.map +1 -0
  78. package/dist/types/KhojContext.d.ts +141 -0
  79. package/dist/types/KhojContext.d.ts.map +1 -0
  80. package/dist/types/KhojContext.js +6 -0
  81. package/dist/types/KhojContext.js.map +1 -0
  82. package/dist/utils/logger.d.ts +15 -0
  83. package/dist/utils/logger.d.ts.map +1 -0
  84. package/dist/utils/logger.js +70 -0
  85. package/dist/utils/logger.js.map +1 -0
  86. package/dist/utils/text.d.ts +2 -0
  87. package/dist/utils/text.d.ts.map +1 -0
  88. package/dist/utils/text.js +6 -0
  89. package/dist/utils/text.js.map +1 -0
  90. package/dist/utils/tokenEstimator.d.ts +10 -0
  91. package/dist/utils/tokenEstimator.d.ts.map +1 -0
  92. package/dist/utils/tokenEstimator.js +17 -0
  93. package/dist/utils/tokenEstimator.js.map +1 -0
  94. package/khoj-context.schema.json +48 -0
  95. package/package.json +75 -0
@@ -0,0 +1,247 @@
1
+ /**
2
+ * AnimationExtractor — 3-pass animation intelligence:
3
+ *
4
+ * Pass 1: CSS @keyframes + transitions (from document.styleSheets)
5
+ * Pass 2: JS animation library detection (GSAP, Framer Motion, AOS, Lottie, etc.)
6
+ * Pass 3: GIF purpose inference from context
7
+ */
8
+ export async function extractAnimations(page, gifs) {
9
+ const [cssResult, jsResult] = await Promise.all([
10
+ extractCSSAnimations(page),
11
+ extractJSAnimations(page),
12
+ ]);
13
+ const gifAnimations = inferGifPurpose(gifs);
14
+ const summary = buildSummary(cssResult.animations, cssResult.transitions, jsResult.jsAnimations, jsResult.scrollAnimations, gifAnimations);
15
+ return {
16
+ cssAnimations: cssResult.animations,
17
+ cssTransitions: cssResult.transitions,
18
+ jsAnimations: jsResult.jsAnimations,
19
+ scrollAnimations: jsResult.scrollAnimations,
20
+ gifAnimations,
21
+ summary,
22
+ };
23
+ }
24
+ // ─── Pass 1: CSS ──────────────────────────────────────────────────────────────
25
+ async function extractCSSAnimations(page) {
26
+ return page.evaluate(() => {
27
+ const keyframeMap = new Map(); // name → description of steps
28
+ const animationRules = [];
29
+ const transitionRules = [];
30
+ // Collect all @keyframes first
31
+ for (const sheet of Array.from(document.styleSheets)) {
32
+ let rules;
33
+ try {
34
+ rules = sheet.cssRules;
35
+ }
36
+ catch {
37
+ continue;
38
+ }
39
+ for (const rule of Array.from(rules)) {
40
+ if (rule instanceof CSSKeyframesRule) {
41
+ const steps = Array.from(rule.cssRules)
42
+ .map((r) => `${r.keyText}: ${r.style.cssText.slice(0, 80)}`)
43
+ .join('; ');
44
+ keyframeMap.set(rule.name, steps);
45
+ }
46
+ }
47
+ }
48
+ // Now map animation-name on elements back to keyframes
49
+ const processedSelectors = new Set();
50
+ for (const sheet of Array.from(document.styleSheets)) {
51
+ let rules;
52
+ try {
53
+ rules = sheet.cssRules;
54
+ }
55
+ catch {
56
+ continue;
57
+ }
58
+ for (const rule of Array.from(rules)) {
59
+ if (!(rule instanceof CSSStyleRule))
60
+ continue;
61
+ const style = rule.style;
62
+ // CSS animations
63
+ const animName = style.getPropertyValue('animation-name') ||
64
+ style.getPropertyValue('animation')?.split(' ')[0] || '';
65
+ if (animName && animName !== 'none' && !processedSelectors.has(`anim:${rule.selectorText}`)) {
66
+ processedSelectors.add(`anim:${rule.selectorText}`);
67
+ const keyframeDesc = keyframeMap.get(animName) ?? '';
68
+ const trigger = rule.selectorText.includes(':hover') ? 'hover'
69
+ : rule.selectorText.includes(':focus') ? 'focus'
70
+ : 'page-load';
71
+ animationRules.push({
72
+ name: animName,
73
+ selector: rule.selectorText.slice(0, 120),
74
+ duration: style.getPropertyValue('animation-duration') || '1s',
75
+ timingFunction: style.getPropertyValue('animation-timing-function') || 'ease',
76
+ iterationCount: style.getPropertyValue('animation-iteration-count') || '1',
77
+ delay: style.getPropertyValue('animation-delay') || '0s',
78
+ trigger,
79
+ description: keyframeDesc
80
+ ? `"${animName}" — ${keyframeDesc.slice(0, 150)}`
81
+ : `Animation "${animName}" applied`,
82
+ });
83
+ }
84
+ // CSS transitions
85
+ const transitionProp = style.getPropertyValue('transition');
86
+ if (transitionProp && transitionProp !== 'none' && !processedSelectors.has(`trans:${rule.selectorText}`)) {
87
+ processedSelectors.add(`trans:${rule.selectorText}`);
88
+ const trigger = rule.selectorText.includes(':hover') ? 'hover'
89
+ : rule.selectorText.includes(':focus') ? 'focus'
90
+ : rule.selectorText.includes(':active') ? 'active'
91
+ : 'unknown';
92
+ const props = transitionProp.split(',').map((t) => t.trim().split(' ')[0]);
93
+ transitionRules.push({
94
+ selector: rule.selectorText.slice(0, 120),
95
+ properties: props,
96
+ duration: transitionProp.match(/[\d.]+s/)?.[0] ?? '0.3s',
97
+ timingFunction: transitionProp.match(/ease[a-z-]*|linear|cubic-bezier\([^)]+\)/)?.[0] ?? 'ease',
98
+ trigger: trigger,
99
+ description: `Transition on ${props.join(', ')} — ${trigger}`,
100
+ });
101
+ }
102
+ }
103
+ }
104
+ return {
105
+ animations: animationRules.slice(0, 50),
106
+ transitions: transitionRules.slice(0, 50),
107
+ };
108
+ });
109
+ }
110
+ // ─── Pass 2: JS Libraries ─────────────────────────────────────────────────────
111
+ async function extractJSAnimations(page) {
112
+ return page.evaluate(() => {
113
+ const win = window;
114
+ const jsAnimations = [];
115
+ const scrollAnimations = [];
116
+ // GSAP
117
+ if (win['gsap']) {
118
+ jsAnimations.push({
119
+ library: 'gsap',
120
+ selector: 'document',
121
+ description: 'GSAP detected — timeline-based animation library in use',
122
+ trigger: 'page-load',
123
+ });
124
+ }
125
+ // Framer Motion
126
+ if (win['Motion'] || win['__framer_motion__'] || document.querySelector('[data-framer-motion]')) {
127
+ jsAnimations.push({
128
+ library: 'framer-motion',
129
+ selector: '[data-framer-motion]',
130
+ description: 'Framer Motion detected — declarative React animation library',
131
+ trigger: 'page-load',
132
+ });
133
+ }
134
+ // Anime.js
135
+ if (win['anime']) {
136
+ jsAnimations.push({
137
+ library: 'animejs',
138
+ selector: 'document',
139
+ description: 'Anime.js detected — lightweight JavaScript animation library',
140
+ trigger: 'page-load',
141
+ });
142
+ }
143
+ // Lottie
144
+ if (win['lottie'] || win['Lottie'] || document.querySelector('lottie-player, [data-lottie]')) {
145
+ jsAnimations.push({
146
+ library: 'lottie',
147
+ selector: 'lottie-player, [data-lottie]',
148
+ description: 'Lottie detected — JSON-based vector animations in use',
149
+ trigger: 'page-load',
150
+ });
151
+ }
152
+ // Web Animations API
153
+ if (typeof Element.prototype.animate === 'function') {
154
+ // Only flag if scripts are actually using it (heuristic: check for animate() on visible elements)
155
+ const hasWaapi = document.querySelectorAll('[data-animate], .animate').length > 0;
156
+ if (hasWaapi) {
157
+ jsAnimations.push({
158
+ library: 'web-animations-api',
159
+ selector: '[data-animate], .animate',
160
+ description: 'Web Animations API in use',
161
+ trigger: 'page-load',
162
+ });
163
+ }
164
+ }
165
+ // AOS (scroll animations)
166
+ if (win['AOS'] || document.querySelector('[data-aos]')) {
167
+ const aosEls = Array.from(document.querySelectorAll('[data-aos]')).slice(0, 20);
168
+ aosEls.forEach((el) => {
169
+ scrollAnimations.push({
170
+ selector: el.tagName.toLowerCase() + (el.className ? `.${el.className.toString().split(' ')[0]}` : ''),
171
+ library: 'aos',
172
+ animationType: el.getAttribute('data-aos') ?? 'fade',
173
+ description: `AOS scroll animation: ${el.getAttribute('data-aos') ?? 'fade'} — triggers when element enters viewport`,
174
+ });
175
+ });
176
+ }
177
+ // GSAP ScrollTrigger heuristic
178
+ if (win['ScrollTrigger'] || (win['gsap'] && document.querySelector('[data-scroll]'))) {
179
+ document.querySelectorAll('[data-scroll], [data-scroll-trigger]').forEach((el) => {
180
+ scrollAnimations.push({
181
+ selector: el.tagName.toLowerCase(),
182
+ library: 'gsap-scrolltrigger',
183
+ animationType: el.getAttribute('data-scroll') ?? 'scroll-driven',
184
+ description: 'GSAP ScrollTrigger — animation tied to scroll position',
185
+ });
186
+ });
187
+ }
188
+ return { jsAnimations, scrollAnimations };
189
+ });
190
+ }
191
+ // ─── Pass 3: GIF Purpose Inference ───────────────────────────────────────────
192
+ function inferGifPurpose(gifs) {
193
+ return gifs.map((gif) => {
194
+ const combined = `${gif.alt} ${gif.selector} ${gif.url}`.toLowerCase();
195
+ let purpose = 'unknown';
196
+ if (/load|spin|wait|progress|preload|buffer/.test(combined)) {
197
+ purpose = 'loading-spinner';
198
+ }
199
+ else if (/demo|preview|product|feature|how|tutorial|guide|walkthrough|example/.test(combined)) {
200
+ purpose = gif.alt.toLowerCase().includes('tutorial') || gif.selector.includes('tutorial')
201
+ ? 'tutorial'
202
+ : 'product-demo';
203
+ }
204
+ else if (/bg|background|decoration|hero|banner|pattern|abstract/.test(combined)) {
205
+ purpose = 'decorative';
206
+ }
207
+ else if (gif.alt.length === 0 && gif.selector.includes('bg')) {
208
+ purpose = 'decorative';
209
+ }
210
+ return {
211
+ url: gif.url,
212
+ selector: gif.selector,
213
+ alt: gif.alt,
214
+ purpose,
215
+ };
216
+ });
217
+ }
218
+ // ─── Summary Builder ──────────────────────────────────────────────────────────
219
+ function buildSummary(cssAnimations, cssTransitions, jsAnimations, scrollAnimations, gifAnimations) {
220
+ const parts = [];
221
+ if (cssAnimations.length > 0) {
222
+ const names = [...new Set(cssAnimations.map((a) => a.name))].slice(0, 3).join(', ');
223
+ parts.push(`CSS animations: ${names} (${cssAnimations.length} total)`);
224
+ }
225
+ if (cssTransitions.length > 0) {
226
+ const hoverCount = cssTransitions.filter((t) => t.trigger === 'hover').length;
227
+ parts.push(`${cssTransitions.length} CSS transitions${hoverCount > 0 ? ` (${hoverCount} on hover)` : ''}`);
228
+ }
229
+ if (jsAnimations.length > 0) {
230
+ const libs = [...new Set(jsAnimations.map((j) => j.library))].join(', ');
231
+ parts.push(`JS: ${libs}`);
232
+ }
233
+ if (scrollAnimations.length > 0) {
234
+ const lib = scrollAnimations[0]?.library ?? 'scroll';
235
+ parts.push(`${scrollAnimations.length} scroll animations (${lib})`);
236
+ }
237
+ if (gifAnimations.length > 0) {
238
+ const spinners = gifAnimations.filter((g) => g.purpose === 'loading-spinner').length;
239
+ const demos = gifAnimations.filter((g) => g.purpose === 'product-demo').length;
240
+ const detail = [spinners > 0 && `${spinners} spinner`, demos > 0 && `${demos} demo`]
241
+ .filter(Boolean)
242
+ .join(', ');
243
+ parts.push(`${gifAnimations.length} GIF${gifAnimations.length > 1 ? 's' : ''}${detail ? ` (${detail})` : ''}`);
244
+ }
245
+ return parts.length > 0 ? parts.join('. ') + '.' : 'No animations detected.';
246
+ }
247
+ //# sourceMappingURL=AnimationExtractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AnimationExtractor.js","sourceRoot":"","sources":["../../src/extractors/AnimationExtractor.ts"],"names":[],"mappings":"AAcA;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACnC,IAAU,EACV,IAAkB;IAElB,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC5C,oBAAoB,CAAC,IAAI,CAAC;QAC1B,mBAAmB,CAAC,IAAI,CAAC;KAC5B,CAAC,CAAC;IAEH,MAAM,aAAa,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;IAE5C,MAAM,OAAO,GAAG,YAAY,CACxB,SAAS,CAAC,UAAU,EACpB,SAAS,CAAC,WAAW,EACrB,QAAQ,CAAC,YAAY,EACrB,QAAQ,CAAC,gBAAgB,EACzB,aAAa,CAChB,CAAC;IAEF,OAAO;QACH,aAAa,EAAE,SAAS,CAAC,UAAU;QACnC,cAAc,EAAE,SAAS,CAAC,WAAW;QACrC,YAAY,EAAE,QAAQ,CAAC,YAAY;QACnC,gBAAgB,EAAE,QAAQ,CAAC,gBAAgB;QAC3C,aAAa;QACb,OAAO;KACV,CAAC;AACN,CAAC;AAED,iFAAiF;AAEjF,KAAK,UAAU,oBAAoB,CAC/B,IAAU;IAEV,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAiE,EAAE;QACpF,MAAM,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAG,8BAA8B;QAC/E,MAAM,cAAc,GAAmB,EAAE,CAAC;QAC1C,MAAM,eAAe,GAAoB,EAAE,CAAC;QAE5C,+BAA+B;QAC/B,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACnD,IAAI,KAAkB,CAAC;YACvB,IAAI,CAAC;gBAAC,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC;gBAAC,SAAS;YAAC,CAAC;YAEnD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACnC,IAAI,IAAI,YAAY,gBAAgB,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;yBAClC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAI,CAAqB,CAAC,OAAO,KAAM,CAAqB,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;yBACrG,IAAI,CAAC,IAAI,CAAC,CAAC;oBAChB,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBACtC,CAAC;YACL,CAAC;QACL,CAAC;QAED,uDAAuD;QACvD,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAU,CAAC;QAE7C,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACnD,IAAI,KAAkB,CAAC;YACvB,IAAI,CAAC;gBAAC,KAAK,GAAG,KAAK,CAAC,QAAQ,CAAC;YAAC,CAAC;YAAC,MAAM,CAAC;gBAAC,SAAS;YAAC,CAAC;YAEnD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACnC,IAAI,CAAC,CAAC,IAAI,YAAY,YAAY,CAAC;oBAAE,SAAS;gBAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;gBAEzB,iBAAiB;gBACjB,MAAM,QAAQ,GAAG,KAAK,CAAC,gBAAgB,CAAC,gBAAgB,CAAC;oBACrD,KAAK,CAAC,gBAAgB,CAAC,WAAW,CAAC,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAE7D,IAAI,QAAQ,IAAI,QAAQ,KAAK,MAAM,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,YAAY,EAAE,CAAC,EAAE,CAAC;oBAC1F,kBAAkB,CAAC,GAAG,CAAC,QAAQ,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;oBACpD,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;oBACrD,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO;wBAC1D,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO;4BAC5C,CAAC,CAAC,WAAW,CAAC;oBAEtB,cAAc,CAAC,IAAI,CAAC;wBAChB,IAAI,EAAE,QAAQ;wBACd,QAAQ,EAAE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;wBACzC,QAAQ,EAAE,KAAK,CAAC,gBAAgB,CAAC,oBAAoB,CAAC,IAAI,IAAI;wBAC9D,cAAc,EAAE,KAAK,CAAC,gBAAgB,CAAC,2BAA2B,CAAC,IAAI,MAAM;wBAC7E,cAAc,EAAE,KAAK,CAAC,gBAAgB,CAAC,2BAA2B,CAAC,IAAI,GAAG;wBAC1E,KAAK,EAAE,KAAK,CAAC,gBAAgB,CAAC,iBAAiB,CAAC,IAAI,IAAI;wBACxD,OAAO;wBACP,WAAW,EAAE,YAAY;4BACrB,CAAC,CAAC,IAAI,QAAQ,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;4BACjD,CAAC,CAAC,cAAc,QAAQ,WAAW;qBAC1C,CAAC,CAAC;gBACP,CAAC;gBAED,kBAAkB;gBAClB,MAAM,cAAc,GAAG,KAAK,CAAC,gBAAgB,CAAC,YAAY,CAAC,CAAC;gBAC5D,IAAI,cAAc,IAAI,cAAc,KAAK,MAAM,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,YAAY,EAAE,CAAC,EAAE,CAAC;oBACvG,kBAAkB,CAAC,GAAG,CAAC,SAAS,IAAI,CAAC,YAAY,EAAE,CAAC,CAAC;oBACrD,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO;wBAC1D,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO;4BAC5C,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,QAAQ;gCAC9C,CAAC,CAAC,SAAS,CAAC;oBAExB,MAAM,KAAK,GAAG,cAAc,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAE3E,eAAe,CAAC,IAAI,CAAC;wBACjB,QAAQ,EAAE,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;wBACzC,UAAU,EAAE,KAAK;wBACjB,QAAQ,EAAE,cAAc,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM;wBACxD,cAAc,EAAE,cAAc,CAAC,KAAK,CAAC,0CAA0C,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,MAAM;wBAC/F,OAAO,EAAE,OAAmC;wBAC5C,WAAW,EAAE,iBAAiB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,OAAO,EAAE;qBAChE,CAAC,CAAC;gBACP,CAAC;YACL,CAAC;QACL,CAAC;QAED,OAAO;YACH,UAAU,EAAE,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;YACvC,WAAW,EAAE,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;SAC5C,CAAC;IACN,CAAC,CAAC,CAAC;AACP,CAAC;AAED,iFAAiF;AAEjF,KAAK,UAAU,mBAAmB,CAC9B,IAAU;IAEV,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAyE,EAAE;QAC5F,MAAM,GAAG,GAAG,MAA4C,CAAC;QACzD,MAAM,YAAY,GAAkB,EAAE,CAAC;QACvC,MAAM,gBAAgB,GAAsB,EAAE,CAAC;QAE/C,OAAO;QACP,IAAI,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACd,YAAY,CAAC,IAAI,CAAC;gBACd,OAAO,EAAE,MAAM;gBACf,QAAQ,EAAE,UAAU;gBACpB,WAAW,EAAE,yDAAyD;gBACtE,OAAO,EAAE,WAAW;aACvB,CAAC,CAAC;QACP,CAAC;QAED,gBAAgB;QAChB,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,mBAAmB,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,sBAAsB,CAAC,EAAE,CAAC;YAC9F,YAAY,CAAC,IAAI,CAAC;gBACd,OAAO,EAAE,eAAe;gBACxB,QAAQ,EAAE,sBAAsB;gBAChC,WAAW,EAAE,8DAA8D;gBAC3E,OAAO,EAAE,WAAW;aACvB,CAAC,CAAC;QACP,CAAC;QAED,WAAW;QACX,IAAI,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACf,YAAY,CAAC,IAAI,CAAC;gBACd,OAAO,EAAE,SAAS;gBAClB,QAAQ,EAAE,UAAU;gBACpB,WAAW,EAAE,8DAA8D;gBAC3E,OAAO,EAAE,WAAW;aACvB,CAAC,CAAC;QACP,CAAC;QAED,SAAS;QACT,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,8BAA8B,CAAC,EAAE,CAAC;YAC3F,YAAY,CAAC,IAAI,CAAC;gBACd,OAAO,EAAE,QAAQ;gBACjB,QAAQ,EAAE,8BAA8B;gBACxC,WAAW,EAAE,uDAAuD;gBACpE,OAAO,EAAE,WAAW;aACvB,CAAC,CAAC;QACP,CAAC;QAED,qBAAqB;QACrB,IAAI,OAAO,OAAO,CAAC,SAAS,CAAC,OAAO,KAAK,UAAU,EAAE,CAAC;YAClD,kGAAkG;YAClG,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,0BAA0B,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAClF,IAAI,QAAQ,EAAE,CAAC;gBACX,YAAY,CAAC,IAAI,CAAC;oBACd,OAAO,EAAE,oBAAoB;oBAC7B,QAAQ,EAAE,0BAA0B;oBACpC,WAAW,EAAE,2BAA2B;oBACxC,OAAO,EAAE,WAAW;iBACvB,CAAC,CAAC;YACP,CAAC;QACL,CAAC;QAED,0BAA0B;QAC1B,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC,EAAE,CAAC;YACrD,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAChF,MAAM,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;gBAClB,gBAAgB,CAAC,IAAI,CAAC;oBAClB,QAAQ,EAAE,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACtG,OAAO,EAAE,KAAK;oBACd,aAAa,EAAE,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,MAAM;oBACpD,WAAW,EAAE,yBAAyB,EAAE,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,MAAM,0CAA0C;iBACxH,CAAC,CAAC;YACP,CAAC,CAAC,CAAC;QACP,CAAC;QAED,+BAA+B;QAC/B,IAAI,GAAG,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,eAAe,CAAC,CAAC,EAAE,CAAC;YACnF,QAAQ,CAAC,gBAAgB,CAAC,sCAAsC,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;gBAC7E,gBAAgB,CAAC,IAAI,CAAC;oBAClB,QAAQ,EAAE,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;oBAClC,OAAO,EAAE,oBAAoB;oBAC7B,aAAa,EAAE,EAAE,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,eAAe;oBAChE,WAAW,EAAE,wDAAwD;iBACxE,CAAC,CAAC;YACP,CAAC,CAAC,CAAC;QACP,CAAC;QAED,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC;IAC9C,CAAC,CAAC,CAAC;AACP,CAAC;AAED,gFAAgF;AAEhF,SAAS,eAAe,CAAC,IAAkB;IACvC,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QACpB,MAAM,QAAQ,GAAG,GAAG,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;QAEvE,IAAI,OAAO,GAAe,SAAS,CAAC;QAEpC,IAAI,wCAAwC,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1D,OAAO,GAAG,iBAAiB,CAAC;QAChC,CAAC;aAAM,IAAI,qEAAqE,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9F,OAAO,GAAG,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC;gBACrF,CAAC,CAAC,UAAU;gBACZ,CAAC,CAAC,cAAc,CAAC;QACzB,CAAC;aAAM,IAAI,uDAAuD,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChF,OAAO,GAAG,YAAY,CAAC;QAC3B,CAAC;aAAM,IAAI,GAAG,CAAC,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7D,OAAO,GAAG,YAAY,CAAC;QAC3B,CAAC;QAED,OAAO;YACH,GAAG,EAAE,GAAG,CAAC,GAAG;YACZ,QAAQ,EAAE,GAAG,CAAC,QAAQ;YACtB,GAAG,EAAE,GAAG,CAAC,GAAG;YACZ,OAAO;SACV,CAAC;IACN,CAAC,CAAC,CAAC;AACP,CAAC;AAED,iFAAiF;AAEjF,SAAS,YAAY,CACjB,aAA6B,EAC7B,cAA+B,EAC/B,YAA2B,EAC3B,gBAAmC,EACnC,aAA6B;IAE7B,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpF,KAAK,CAAC,IAAI,CAAC,mBAAmB,KAAK,KAAK,aAAa,CAAC,MAAM,SAAS,CAAC,CAAC;IAC3E,CAAC;IAED,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;QAC9E,KAAK,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC,MAAM,mBAAmB,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,UAAU,YAAY,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC/G,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzE,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;IAC9B,CAAC;IAED,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,gBAAgB,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,QAAQ,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,GAAG,gBAAgB,CAAC,MAAM,uBAAuB,GAAG,GAAG,CAAC,CAAC;IACxE,CAAC;IAED,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,iBAAiB,CAAC,CAAC,MAAM,CAAC;QACrF,MAAM,KAAK,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;QAC/E,MAAM,MAAM,GAAG,CAAC,QAAQ,GAAG,CAAC,IAAI,GAAG,QAAQ,UAAU,EAAE,KAAK,GAAG,CAAC,IAAI,GAAG,KAAK,OAAO,CAAC;aAC/E,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,IAAI,CAAC,CAAC;QAChB,KAAK,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,MAAM,OAAO,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IACnH,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,yBAAyB,CAAC;AACjF,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { Page } from 'playwright';
2
+ import type { AssetMap } from '../types/KhojContext.js';
3
+ /**
4
+ * Collects all assets from the page:
5
+ * - Images and GIFs with metadata (alt, dimensions, lazy-load flag, CSS selector)
6
+ * - External scripts
7
+ * - Favicon and icon links
8
+ * - Font URLs from @font-face rules
9
+ */
10
+ export declare function extractAssets(page: Page, baseUrl: string): Promise<AssetMap>;
11
+ //# sourceMappingURL=AssetExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AssetExtractor.d.ts","sourceRoot":"","sources":["../../src/extractors/AssetExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,QAAQ,EAAc,MAAM,yBAAyB,CAAC;AAEpE;;;;;;GAMG;AACH,wBAAsB,aAAa,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CA2HlF"}
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Collects all assets from the page:
3
+ * - Images and GIFs with metadata (alt, dimensions, lazy-load flag, CSS selector)
4
+ * - External scripts
5
+ * - Favicon and icon links
6
+ * - Font URLs from @font-face rules
7
+ */
8
+ export async function extractAssets(page, baseUrl) {
9
+ const result = await page.evaluate(({ base }) => {
10
+ function toAbsolute(url) {
11
+ if (!url || url.startsWith('data:'))
12
+ return url;
13
+ try {
14
+ return new URL(url, base).href;
15
+ }
16
+ catch {
17
+ return url;
18
+ }
19
+ }
20
+ function getCssSelector(el) {
21
+ if (el.id)
22
+ return `#${el.id}`;
23
+ const parts = [];
24
+ let current = el;
25
+ while (current && current !== document.body && parts.length < 4) {
26
+ let selector = current.tagName.toLowerCase();
27
+ if (current.className) {
28
+ const firstClass = current.className.toString().trim().split(/\s+/)[0];
29
+ if (firstClass)
30
+ selector += `.${firstClass}`;
31
+ }
32
+ parts.unshift(selector);
33
+ current = current.parentElement;
34
+ }
35
+ return parts.join(' > ');
36
+ }
37
+ function isGif(url) {
38
+ return url.toLowerCase().includes('.gif') || url.toLowerCase().endsWith('.gif');
39
+ }
40
+ // ── Images ────────────────────────────────────────────────────────────
41
+ const rawImages = [];
42
+ document.querySelectorAll('img').forEach((img) => {
43
+ const src = img.getAttribute('src') ?? img.getAttribute('data-src') ?? '';
44
+ if (!src || src.startsWith('data:'))
45
+ return;
46
+ const absoluteSrc = toAbsolute(src);
47
+ rawImages.push({
48
+ url: absoluteSrc,
49
+ alt: img.getAttribute('alt') ?? '',
50
+ type: isGif(absoluteSrc) ? 'gif' : 'image',
51
+ width: img.naturalWidth || parseInt(img.getAttribute('width') ?? '0') || null,
52
+ height: img.naturalHeight || parseInt(img.getAttribute('height') ?? '0') || null,
53
+ isLazy: img.getAttribute('loading') === 'lazy' || !!img.getAttribute('data-src'),
54
+ selector: getCssSelector(img),
55
+ });
56
+ });
57
+ // ── Scripts ───────────────────────────────────────────────────────────
58
+ const scripts = [];
59
+ document.querySelectorAll('script[src]').forEach((s) => {
60
+ const src = s.src;
61
+ if (src && !src.startsWith(window.location.origin)) {
62
+ scripts.push(toAbsolute(src));
63
+ }
64
+ });
65
+ // ── Icons / Favicons ──────────────────────────────────────────────────
66
+ const icons = [];
67
+ document.querySelectorAll('link[rel~="icon"], link[rel~="apple-touch-icon"], link[rel~="shortcut"]').forEach((l) => {
68
+ const href = l.href;
69
+ if (href)
70
+ icons.push(toAbsolute(href));
71
+ });
72
+ // ── Fonts ─────────────────────────────────────────────────────────────
73
+ const fonts = [];
74
+ for (const sheet of Array.from(document.styleSheets)) {
75
+ try {
76
+ for (const rule of Array.from(sheet.cssRules)) {
77
+ if (rule instanceof CSSFontFaceRule) {
78
+ const src = rule.style.getPropertyValue('src');
79
+ const matches = src.match(/url\(['"]?([^'")\s]+)['"]?\)/g) ?? [];
80
+ matches.forEach((m) => {
81
+ const urlMatch = m.match(/url\(['"]?([^'")\s]+)['"]?\)/);
82
+ if (urlMatch?.[1])
83
+ fonts.push(toAbsolute(urlMatch[1]));
84
+ });
85
+ }
86
+ }
87
+ }
88
+ catch {
89
+ continue;
90
+ }
91
+ }
92
+ return { rawImages, images: [], scripts: [...new Set(scripts)], icons: [...new Set(icons)], fonts: [...new Set(fonts)] };
93
+ }, { base: baseUrl });
94
+ // Retrieve GIFs intercepted from network responses
95
+ const networkGifs = await page.evaluate(() => {
96
+ return window.__khoj_gifs__ ?? [];
97
+ });
98
+ const allImages = result.rawImages;
99
+ // Merge network-discovered GIFs that weren't in <img> tags
100
+ const knownUrls = new Set(allImages.map((i) => i.url));
101
+ for (const gifUrl of networkGifs) {
102
+ if (!knownUrls.has(gifUrl)) {
103
+ allImages.push({
104
+ url: gifUrl,
105
+ alt: '',
106
+ type: 'gif',
107
+ width: null,
108
+ height: null,
109
+ isLazy: false,
110
+ selector: '',
111
+ });
112
+ }
113
+ }
114
+ const images = allImages.filter((i) => i.type === 'image');
115
+ const gifs = allImages.filter((i) => i.type === 'gif');
116
+ return {
117
+ images,
118
+ gifs,
119
+ fonts: result.fonts,
120
+ icons: result.icons,
121
+ scripts: result.scripts,
122
+ };
123
+ }
124
+ //# sourceMappingURL=AssetExtractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"AssetExtractor.js","sourceRoot":"","sources":["../../src/extractors/AssetExtractor.ts"],"names":[],"mappings":"AAGA;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,IAAU,EAAE,OAAe;IAC3D,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAC9B,CAAC,EAAE,IAAI,EAAoB,EAAwD,EAAE;QACjF,SAAS,UAAU,CAAC,GAAW;YAC3B,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,OAAO,GAAG,CAAC;YAChD,IAAI,CAAC;gBACD,OAAO,IAAI,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC;YACnC,CAAC;YAAC,MAAM,CAAC;gBACL,OAAO,GAAG,CAAC;YACf,CAAC;QACL,CAAC;QAED,SAAS,cAAc,CAAC,EAAW;YAC/B,IAAI,EAAE,CAAC,EAAE;gBAAE,OAAO,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAa,EAAE,CAAC;YAC3B,IAAI,OAAO,GAAmB,EAAE,CAAC;YACjC,OAAO,OAAO,IAAI,OAAO,KAAK,QAAQ,CAAC,IAAI,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9D,IAAI,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;gBAC7C,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;oBACpB,MAAM,UAAU,GAAG,OAAO,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;oBACvE,IAAI,UAAU;wBAAE,QAAQ,IAAI,IAAI,UAAU,EAAE,CAAC;gBACjD,CAAC;gBACD,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;gBACxB,OAAO,GAAG,OAAO,CAAC,aAAa,CAAC;YACpC,CAAC;YACD,OAAO,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,CAAC;QAED,SAAS,KAAK,CAAC,GAAW;YACtB,OAAO,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpF,CAAC;QAED,yEAAyE;QACzE,MAAM,SAAS,GAAiB,EAAE,CAAC;QACnC,QAAQ,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;YAC7C,MAAM,GAAG,GAAG,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,YAAY,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC;YAC1E,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,OAAO;YAE5C,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;YACpC,SAAS,CAAC,IAAI,CAAC;gBACX,GAAG,EAAE,WAAW;gBAChB,GAAG,EAAE,GAAG,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE;gBAClC,IAAI,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO;gBAC1C,KAAK,EAAE,GAAG,CAAC,YAAY,IAAI,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC,IAAI,IAAI;gBAC7E,MAAM,EAAE,GAAG,CAAC,aAAa,IAAI,QAAQ,CAAC,GAAG,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,IAAI,IAAI;gBAChF,MAAM,EAAE,GAAG,CAAC,YAAY,CAAC,SAAS,CAAC,KAAK,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,YAAY,CAAC,UAAU,CAAC;gBAChF,QAAQ,EAAE,cAAc,CAAC,GAAG,CAAC;aAChC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;QAEH,yEAAyE;QACzE,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YACnD,MAAM,GAAG,GAAI,CAAuB,CAAC,GAAG,CAAC;YACzC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACjD,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;YAClC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,yEAAyE;QACzE,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,QAAQ,CAAC,gBAAgB,CAAC,yEAAyE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YAC/G,MAAM,IAAI,GAAI,CAAqB,CAAC,IAAI,CAAC;YACzC,IAAI,IAAI;gBAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,yEAAyE;QACzE,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;YACnD,IAAI,CAAC;gBACD,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;oBAC5C,IAAI,IAAI,YAAY,eAAe,EAAE,CAAC;wBAClC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;wBAC/C,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,+BAA+B,CAAC,IAAI,EAAE,CAAC;wBACjE,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;4BAClB,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;4BACzD,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC;gCAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;wBAC3D,CAAC,CAAC,CAAC;oBACP,CAAC;gBACL,CAAC;YACL,CAAC;YAAC,MAAM,CAAC;gBACL,SAAS;YACb,CAAC;QACL,CAAC;QAED,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;IAC7H,CAAC,EACD,EAAE,IAAI,EAAE,OAAO,EAAE,CACpB,CAAC;IAEF,mDAAmD;IACnD,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAa,EAAE;QACnD,OAAQ,MAAgD,CAAC,aAAa,IAAI,EAAE,CAAC;IACjF,CAAC,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;IAEnC,2DAA2D;IAC3D,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACvD,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;QAC/B,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACzB,SAAS,CAAC,IAAI,CAAC;gBACX,GAAG,EAAE,MAAM;gBACX,GAAG,EAAE,EAAE;gBACP,IAAI,EAAE,KAAK;gBACX,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,IAAI;gBACZ,MAAM,EAAE,KAAK;gBACb,QAAQ,EAAE,EAAE;aACf,CAAC,CAAC;QACP,CAAC;IACL,CAAC;IAED,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC;IAC3D,MAAM,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC;IAEvD,OAAO;QACH,MAAM;QACN,IAAI;QACJ,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,KAAK,EAAE,MAAM,CAAC,KAAK;QACnB,OAAO,EAAE,MAAM,CAAC,OAAO;KAC1B,CAAC;AACN,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { Page } from 'playwright';
2
+ import type { ContentBlock } from '../types/KhojContext.js';
3
+ /**
4
+ * Extracts structured content blocks:
5
+ * - Headings h1–h6 with level
6
+ * - Paragraphs (truncated to 300 chars)
7
+ * - Button labels
8
+ * - Anchor links with href
9
+ * - Form labels
10
+ * Deduplicates by text.
11
+ */
12
+ export declare function extractContent(page: Page): Promise<ContentBlock[]>;
13
+ //# sourceMappingURL=ContentExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ContentExtractor.d.ts","sourceRoot":"","sources":["../../src/extractors/ContentExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAI5D;;;;;;;;GAQG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAuDxE"}
@@ -0,0 +1,60 @@
1
+ const MAX_PARAGRAPH_LENGTH = 300;
2
+ /**
3
+ * Extracts structured content blocks:
4
+ * - Headings h1–h6 with level
5
+ * - Paragraphs (truncated to 300 chars)
6
+ * - Button labels
7
+ * - Anchor links with href
8
+ * - Form labels
9
+ * Deduplicates by text.
10
+ */
11
+ export async function extractContent(page) {
12
+ return page.evaluate(({ maxParaLen }) => {
13
+ const blocks = [];
14
+ const seen = new Set();
15
+ function add(block) {
16
+ const key = `${block.type}:${block.text}`;
17
+ if (!seen.has(key) && block.text.length > 0) {
18
+ seen.add(key);
19
+ blocks.push(block);
20
+ }
21
+ }
22
+ // Headings
23
+ for (let level = 1; level <= 6; level++) {
24
+ document.querySelectorAll(`h${level}`).forEach((el) => {
25
+ const text = el.textContent?.trim() ?? '';
26
+ if (text)
27
+ add({ type: 'heading', text, level });
28
+ });
29
+ }
30
+ // Paragraphs
31
+ document.querySelectorAll('p').forEach((el) => {
32
+ const text = (el.textContent?.trim() ?? '').slice(0, maxParaLen);
33
+ if (text.split(' ').length >= 3) {
34
+ add({ type: 'paragraph', text });
35
+ }
36
+ });
37
+ // Buttons
38
+ document.querySelectorAll('button, [role="button"], input[type="submit"], input[type="button"]').forEach((el) => {
39
+ const text = el.textContent?.trim() ?? el.value ?? '';
40
+ if (text)
41
+ add({ type: 'button', text });
42
+ });
43
+ // Links
44
+ document.querySelectorAll('a[href]').forEach((el) => {
45
+ const text = el.textContent?.trim() ?? '';
46
+ const href = el.href;
47
+ if (text && href && !href.startsWith('javascript:')) {
48
+ add({ type: 'link', text, href });
49
+ }
50
+ });
51
+ // Labels
52
+ document.querySelectorAll('label').forEach((el) => {
53
+ const text = el.textContent?.trim() ?? '';
54
+ if (text)
55
+ add({ type: 'label', text });
56
+ });
57
+ return blocks;
58
+ }, { maxParaLen: MAX_PARAGRAPH_LENGTH });
59
+ }
60
+ //# sourceMappingURL=ContentExtractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ContentExtractor.js","sourceRoot":"","sources":["../../src/extractors/ContentExtractor.ts"],"names":[],"mappings":"AAGA,MAAM,oBAAoB,GAAG,GAAG,CAAC;AAEjC;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,IAAU;IAC3C,OAAO,IAAI,CAAC,QAAQ,CAChB,CAAC,EAAE,UAAU,EAA0B,EAAkB,EAAE;QACvD,MAAM,MAAM,GAAmB,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,SAAS,GAAG,CAAC,KAAmB;YAC5B,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1C,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACd,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACvB,CAAC;QACL,CAAC;QAED,WAAW;QACX,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YACtC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;gBAClD,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gBAC1C,IAAI,IAAI;oBAAE,GAAG,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;YACpD,CAAC,CAAC,CAAC;QACP,CAAC;QAED,aAAa;QACb,QAAQ,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;YAC1C,MAAM,IAAI,GAAG,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACjE,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBAC9B,GAAG,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC;YACrC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,UAAU;QACV,QAAQ,CAAC,gBAAgB,CAAC,qEAAqE,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;YAC5G,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAK,EAAuB,CAAC,KAAK,IAAI,EAAE,CAAC;YAC5E,IAAI,IAAI;gBAAE,GAAG,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,QAAQ;QACR,QAAQ,CAAC,gBAAgB,CAAC,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;YAChD,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC1C,MAAM,IAAI,GAAI,EAAwB,CAAC,IAAI,CAAC;YAC5C,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBAClD,GAAG,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;YACtC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,SAAS;QACT,QAAQ,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;YAC9C,MAAM,IAAI,GAAG,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAC1C,IAAI,IAAI;gBAAE,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAClB,CAAC,EACD,EAAE,UAAU,EAAE,oBAAoB,EAAE,CACvC,CAAC;AACN,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { Page } from 'playwright';
2
+ import type { DomNode } from '../types/KhojContext.js';
3
+ /**
4
+ * Extracts a semantic DOM tree from the page body.
5
+ * - Skips noise tags (script, style, svg, etc.)
6
+ * - Caps depth at 10 levels
7
+ * - Trims text content to 200 characters
8
+ * - Captures: tag, id, classes, role, text
9
+ */
10
+ export declare function extractDom(page: Page): Promise<DomNode[]>;
11
+ //# sourceMappingURL=DomExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DomExtractor.d.ts","sourceRoot":"","sources":["../../src/extractors/DomExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAYvD;;;;;;GAMG;AACH,wBAAsB,UAAU,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,CA4D/D"}
@@ -0,0 +1,68 @@
1
+ // Tags to skip entirely — they add noise, not signal
2
+ const NOISE_TAGS = new Set([
3
+ 'script', 'style', 'noscript', 'svg', 'path', 'defs',
4
+ 'symbol', 'use', 'clippath', 'head', 'meta', 'link',
5
+ 'br', 'hr', 'wbr', 'template', 'iframe',
6
+ ]);
7
+ const MAX_DEPTH = 10;
8
+ const MAX_TEXT_LENGTH = 200;
9
+ /**
10
+ * Extracts a semantic DOM tree from the page body.
11
+ * - Skips noise tags (script, style, svg, etc.)
12
+ * - Caps depth at 10 levels
13
+ * - Trims text content to 200 characters
14
+ * - Captures: tag, id, classes, role, text
15
+ */
16
+ export async function extractDom(page) {
17
+ return page.evaluate(({ noiseTags, maxDepth, maxTextLen }) => {
18
+ const noiseSet = new Set(noiseTags);
19
+ function walk(el, depth) {
20
+ if (depth > maxDepth)
21
+ return null;
22
+ const tag = el.tagName.toLowerCase();
23
+ if (noiseSet.has(tag))
24
+ return null;
25
+ const children = [];
26
+ for (const child of Array.from(el.children)) {
27
+ const node = walk(child, depth + 1);
28
+ if (node)
29
+ children.push(node);
30
+ }
31
+ // Get direct text (not from children)
32
+ let text;
33
+ const directText = Array.from(el.childNodes)
34
+ .filter((n) => n.nodeType === Node.TEXT_NODE)
35
+ .map((n) => n.textContent?.trim() ?? '')
36
+ .join(' ')
37
+ .trim();
38
+ if (directText.length > 0) {
39
+ text = directText.slice(0, maxTextLen);
40
+ }
41
+ const node = {
42
+ tag,
43
+ classes: Array.from(el.classList),
44
+ children: children.length > 0 ? children : undefined,
45
+ };
46
+ const id = el.getAttribute('id');
47
+ if (id)
48
+ node.id = id;
49
+ const role = el.getAttribute('role');
50
+ if (role)
51
+ node.role = role;
52
+ if (text)
53
+ node.text = text;
54
+ // Prune leaf nodes with no info
55
+ if (!text && !node.id && node.classes.length === 0 && !node.role && !children.length) {
56
+ return null;
57
+ }
58
+ return node;
59
+ }
60
+ const body = document.body;
61
+ if (!body)
62
+ return [];
63
+ return Array.from(body.children)
64
+ .map((child) => walk(child, 1))
65
+ .filter((n) => n !== null);
66
+ }, { noiseTags: Array.from(NOISE_TAGS), maxDepth: MAX_DEPTH, maxTextLen: MAX_TEXT_LENGTH });
67
+ }
68
+ //# sourceMappingURL=DomExtractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DomExtractor.js","sourceRoot":"","sources":["../../src/extractors/DomExtractor.ts"],"names":[],"mappings":"AAGA,qDAAqD;AACrD,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC;IACvB,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM;IACpD,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM;IACnD,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE,QAAQ;CAC1C,CAAC,CAAC;AAEH,MAAM,SAAS,GAAG,EAAE,CAAC;AACrB,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAU;IACvC,OAAO,IAAI,CAAC,QAAQ,CAChB,CAAC,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAiE,EAAa,EAAE;QAC9G,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;QAEpC,SAAS,IAAI,CAAC,EAAW,EAAE,KAAa;YACpC,IAAI,KAAK,GAAG,QAAQ;gBAAE,OAAO,IAAI,CAAC;YAElC,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;YACrC,IAAI,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO,IAAI,CAAC;YAEnC,MAAM,QAAQ,GAAc,EAAE,CAAC;YAC/B,KAAK,MAAM,KAAK,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI;oBAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAClC,CAAC;YAED,sCAAsC;YACtC,IAAI,IAAwB,CAAC;YAC7B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC;iBACvC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,IAAI,CAAC,SAAS,CAAC;iBAC5C,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;iBACvC,IAAI,CAAC,GAAG,CAAC;iBACT,IAAI,EAAE,CAAC;YAEZ,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACxB,IAAI,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YAC3C,CAAC;YAED,MAAM,IAAI,GAAY;gBAClB,GAAG;gBACH,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,SAAS,CAAC;gBACjC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;aACvD,CAAC;YAEF,MAAM,EAAE,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;YACjC,IAAI,EAAE;gBAAE,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC;YAErB,MAAM,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YACrC,IAAI,IAAI;gBAAE,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YAE3B,IAAI,IAAI;gBAAE,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;YAE3B,gCAAgC;YAChC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;gBACnF,OAAO,IAAI,CAAC;YAChB,CAAC;YAED,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;QAC3B,IAAI,CAAC,IAAI;YAAE,OAAO,EAAE,CAAC;QAErB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC;aAC3B,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;aAC9B,MAAM,CAAC,CAAC,CAAC,EAAgB,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IACjD,CAAC,EACD,EAAE,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,eAAe,EAAE,CAC1F,CAAC;AACN,CAAC"}
@@ -0,0 +1,10 @@
1
+ import type { Page } from 'playwright';
2
+ import type { Interaction } from '../types/KhojContext.js';
3
+ /**
4
+ * Maps interactive regions of the page:
5
+ * - Forms with all their input fields
6
+ * - Navigation elements
7
+ * - Interactive button-like elements
8
+ */
9
+ export declare function extractInteractions(page: Page): Promise<Interaction[]>;
10
+ //# sourceMappingURL=InteractionExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"InteractionExtractor.d.ts","sourceRoot":"","sources":["../../src/extractors/InteractionExtractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,WAAW,EAAa,MAAM,yBAAyB,CAAC;AAEtE;;;;;GAKG;AACH,wBAAsB,mBAAmB,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAmE5E"}