@shadowcoderr/context-graph 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +439 -88
- package/dist/analyzers/a11y-extractor.d.ts +19 -5
- package/dist/analyzers/a11y-extractor.d.ts.map +1 -1
- package/dist/analyzers/a11y-extractor.js +274 -104
- package/dist/analyzers/a11y-extractor.js.map +1 -1
- package/dist/analyzers/network-logger.d.ts +20 -2
- package/dist/analyzers/network-logger.d.ts.map +1 -1
- package/dist/analyzers/network-logger.js +122 -42
- package/dist/analyzers/network-logger.js.map +1 -1
- package/dist/analyzers/network-patterns.d.ts +73 -0
- package/dist/analyzers/network-patterns.d.ts.map +1 -0
- package/dist/analyzers/network-patterns.js +316 -0
- package/dist/analyzers/network-patterns.js.map +1 -0
- package/dist/analyzers/page-notifier.d.ts +40 -0
- package/dist/analyzers/page-notifier.d.ts.map +1 -0
- package/dist/analyzers/page-notifier.js +198 -0
- package/dist/analyzers/page-notifier.js.map +1 -0
- package/dist/analyzers/screenshot-capturer.d.ts +73 -0
- package/dist/analyzers/screenshot-capturer.d.ts.map +1 -0
- package/dist/analyzers/screenshot-capturer.js +190 -0
- package/dist/analyzers/screenshot-capturer.js.map +1 -0
- package/dist/cli/index.js +15 -6
- package/dist/cli/index.js.map +1 -1
- package/dist/config/defaults.d.ts.map +1 -1
- package/dist/config/defaults.js +3 -1
- package/dist/config/defaults.js.map +1 -1
- package/dist/config/schema.d.ts +8 -3
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +7 -2
- package/dist/config/schema.js.map +1 -1
- package/dist/core/browser-adapter.d.ts.map +1 -1
- package/dist/core/browser-adapter.js +0 -2
- package/dist/core/browser-adapter.js.map +1 -1
- package/dist/core/capture-engine.d.ts +30 -25
- package/dist/core/capture-engine.d.ts.map +1 -1
- package/dist/core/capture-engine.js +290 -276
- package/dist/core/capture-engine.js.map +1 -1
- package/dist/core/runtime.d.ts +1 -0
- package/dist/core/runtime.d.ts.map +1 -1
- package/dist/core/runtime.js +21 -0
- package/dist/core/runtime.js.map +1 -1
- package/dist/exporters/ai-context-bundler.d.ts +88 -0
- package/dist/exporters/ai-context-bundler.d.ts.map +1 -0
- package/dist/exporters/ai-context-bundler.js +380 -0
- package/dist/exporters/ai-context-bundler.js.map +1 -0
- package/dist/security/redactor.d.ts +16 -0
- package/dist/security/redactor.d.ts.map +1 -1
- package/dist/security/redactor.js +127 -57
- package/dist/security/redactor.js.map +1 -1
- package/dist/storage/engine.d.ts +24 -21
- package/dist/storage/engine.d.ts.map +1 -1
- package/dist/storage/engine.js +208 -175
- package/dist/storage/engine.js.map +1 -1
- package/dist/types/config.d.ts +4 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/notifications.d.ts +37 -0
- package/dist/types/notifications.d.ts.map +1 -0
- package/dist/types/notifications.js +4 -0
- package/dist/types/notifications.js.map +1 -0
- package/package.json +71 -70
package/dist/storage/engine.js
CHANGED
|
@@ -37,13 +37,14 @@ exports.StorageEngine = void 0;
|
|
|
37
37
|
// Developer: Shadow Coderr, Architect
|
|
38
38
|
const fs = __importStar(require("fs-extra"));
|
|
39
39
|
const path = __importStar(require("path"));
|
|
40
|
+
const crypto = __importStar(require("crypto"));
|
|
40
41
|
const version_1 = require("../utils/version");
|
|
41
42
|
const logger_1 = require("../utils/logger");
|
|
42
43
|
class StorageEngine {
|
|
43
44
|
outputDir;
|
|
44
45
|
prettyJson;
|
|
45
46
|
scriptsDir;
|
|
46
|
-
contentHashHistory = new Map(); // pageName
|
|
47
|
+
contentHashHistory = new Map(); // pageName → array of hashes
|
|
47
48
|
forceCapture = false;
|
|
48
49
|
constructor(outputDir, prettyJson = true, forceCapture = false) {
|
|
49
50
|
this.outputDir = path.resolve(outputDir);
|
|
@@ -51,44 +52,47 @@ class StorageEngine {
|
|
|
51
52
|
this.scriptsDir = path.join(this.outputDir, 'scripts');
|
|
52
53
|
this.forceCapture = forceCapture;
|
|
53
54
|
}
|
|
54
|
-
|
|
55
|
-
* Set force capture mode - always write artifacts even if content hash unchanged
|
|
56
|
-
*/
|
|
55
|
+
// ── Configuration ────────────────────────────────────────────────────────────
|
|
57
56
|
setForceCapture(force) {
|
|
58
57
|
this.forceCapture = force;
|
|
59
58
|
}
|
|
60
|
-
|
|
61
|
-
* Check if content has changed since last capture
|
|
62
|
-
*/
|
|
59
|
+
// ── Change detection ─────────────────────────────────────────────────────────
|
|
63
60
|
hasContentChanged(pageName, contentHash) {
|
|
64
61
|
const history = this.contentHashHistory.get(pageName);
|
|
65
62
|
if (!history || history.length === 0)
|
|
66
63
|
return true;
|
|
67
64
|
return !history.includes(contentHash);
|
|
68
65
|
}
|
|
69
|
-
/**
|
|
70
|
-
* Record content hash for a page
|
|
71
|
-
*/
|
|
72
66
|
recordContentHash(pageName, contentHash) {
|
|
73
67
|
const history = this.contentHashHistory.get(pageName) || [];
|
|
74
68
|
history.push(contentHash);
|
|
75
|
-
// Keep last 10 hashes per page
|
|
76
69
|
if (history.length > 10)
|
|
77
70
|
history.shift();
|
|
78
71
|
this.contentHashHistory.set(pageName, history);
|
|
79
72
|
}
|
|
73
|
+
// ── Initialisation ────────────────────────────────────────────────────────────
|
|
80
74
|
async initialize() {
|
|
81
|
-
// Create the root output directory and scripts directory
|
|
82
75
|
await fs.ensureDir(this.outputDir);
|
|
83
76
|
await fs.ensureDir(this.scriptsDir);
|
|
84
77
|
}
|
|
78
|
+
// ── Page directory resolution ─────────────────────────────────────────────────
|
|
79
|
+
/**
|
|
80
|
+
* Resolve the on-disk directory for a given page's metadata.
|
|
81
|
+
* Mirrors the same logic used inside savePageSnapshot so callers can
|
|
82
|
+
* compute the directory without needing to save first.
|
|
83
|
+
*/
|
|
84
|
+
resolvePageDir(metadata) {
|
|
85
|
+
const domainName = this.extractDomainName(metadata.domain);
|
|
86
|
+
const pageName = metadata.pageName || 'page';
|
|
87
|
+
return path.join(this.outputDir, domainName, 'pages', pageName);
|
|
88
|
+
}
|
|
89
|
+
// ── Script management ─────────────────────────────────────────────────────────
|
|
85
90
|
async getUniqueScriptPath(url) {
|
|
86
91
|
try {
|
|
87
92
|
const urlObj = new URL(url);
|
|
88
|
-
const hostname = urlObj.hostname.replace(/[^a-zA-Z0-9\-_.]/g, '_');
|
|
93
|
+
const hostname = urlObj.hostname.replace(/[^a-zA-Z0-9\-_.]/g, '_');
|
|
89
94
|
let scriptPath = path.join(this.scriptsDir, `${hostname}.spec.ts`);
|
|
90
95
|
let counter = 1;
|
|
91
|
-
// Check if file exists and increment counter if needed
|
|
92
96
|
while (await fs.pathExists(scriptPath)) {
|
|
93
97
|
scriptPath = path.join(this.scriptsDir, `${hostname}_${counter}.spec.ts`);
|
|
94
98
|
counter++;
|
|
@@ -97,9 +101,7 @@ class StorageEngine {
|
|
|
97
101
|
}
|
|
98
102
|
catch (error) {
|
|
99
103
|
logger_1.logger.error(`Error generating unique script path: ${error.message}`);
|
|
100
|
-
|
|
101
|
-
const timestamp = Date.now();
|
|
102
|
-
return path.join(this.scriptsDir, `recording_${timestamp}.spec.ts`);
|
|
104
|
+
return path.join(this.scriptsDir, `recording_${Date.now()}.spec.ts`);
|
|
103
105
|
}
|
|
104
106
|
}
|
|
105
107
|
async mergeRecordedScript(url, recordedScriptPath) {
|
|
@@ -110,7 +112,9 @@ class StorageEngine {
|
|
|
110
112
|
throw new Error(`Recorded script not found: ${recordedScriptPath}`);
|
|
111
113
|
}
|
|
112
114
|
const incoming = await fs.readFile(recordedScriptPath, 'utf8');
|
|
113
|
-
const existing = (await fs.pathExists(mergedPath))
|
|
115
|
+
const existing = (await fs.pathExists(mergedPath))
|
|
116
|
+
? await fs.readFile(mergedPath, 'utf8')
|
|
117
|
+
: '';
|
|
114
118
|
const merged = this.mergePlaywrightSpec(existing, incoming);
|
|
115
119
|
await fs.writeFile(mergedPath, merged);
|
|
116
120
|
if (path.resolve(recordedScriptPath) !== path.resolve(mergedPath)) {
|
|
@@ -131,11 +135,7 @@ class StorageEngine {
|
|
|
131
135
|
const takeHeader = (lines) => {
|
|
132
136
|
for (const line of lines) {
|
|
133
137
|
const trimmed = line.trim();
|
|
134
|
-
if (!trimmed)
|
|
135
|
-
continue;
|
|
136
|
-
if (!/^import\s/.test(trimmed))
|
|
137
|
-
continue;
|
|
138
|
-
if (seenHeader.has(trimmed))
|
|
138
|
+
if (!trimmed || !/^import\s/.test(trimmed) || seenHeader.has(trimmed))
|
|
139
139
|
continue;
|
|
140
140
|
seenHeader.add(trimmed);
|
|
141
141
|
header.push(trimmed);
|
|
@@ -151,18 +151,14 @@ class StorageEngine {
|
|
|
151
151
|
let inTest = false;
|
|
152
152
|
for (const line of lines) {
|
|
153
153
|
if (!inTest) {
|
|
154
|
-
if (/^\s*test\(/.test(line))
|
|
154
|
+
if (/^\s*test\(/.test(line))
|
|
155
155
|
inTest = true;
|
|
156
|
-
}
|
|
157
156
|
continue;
|
|
158
157
|
}
|
|
159
|
-
if (/^\s*\}\);\s*$/.test(line))
|
|
158
|
+
if (/^\s*\}\);\s*$/.test(line))
|
|
160
159
|
break;
|
|
161
|
-
}
|
|
162
160
|
const trimmed = line.trimEnd();
|
|
163
|
-
if (!trimmed.trim())
|
|
164
|
-
continue;
|
|
165
|
-
if (/^\s*\/\//.test(trimmed))
|
|
161
|
+
if (!trimmed.trim() || /^\s*\/\//.test(trimmed))
|
|
166
162
|
continue;
|
|
167
163
|
steps.push(trimmed);
|
|
168
164
|
}
|
|
@@ -193,124 +189,128 @@ class StorageEngine {
|
|
|
193
189
|
'',
|
|
194
190
|
].join('\n');
|
|
195
191
|
}
|
|
192
|
+
// ── Snapshot persistence ──────────────────────────────────────────────────────
|
|
196
193
|
async savePageSnapshot(snapshot) {
|
|
197
194
|
const domain = snapshot.metadata.domain;
|
|
198
|
-
|
|
195
|
+
let pageName = snapshot.metadata.pageName || 'page';
|
|
199
196
|
const contentHash = snapshot.metadata.contentHash || '';
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
const parts = domain.split('.');
|
|
203
|
-
// Remove 'www' prefix if present
|
|
204
|
-
const filtered = parts.filter(p => p.toLowerCase() !== 'www');
|
|
205
|
-
// Get the main domain name (second-to-last part, or first if only one part)
|
|
206
|
-
if (filtered.length >= 2) {
|
|
207
|
-
return filtered[filtered.length - 2];
|
|
208
|
-
}
|
|
209
|
-
return filtered[0] || parts[0];
|
|
210
|
-
})();
|
|
211
|
-
const domainDir = path.join(this.outputDir, domainName);
|
|
212
|
-
const pageDir = path.join(domainDir, 'pages', pageName);
|
|
213
|
-
// Check if we should save (force capture or content changed)
|
|
197
|
+
const domainName = this.extractDomainName(domain);
|
|
198
|
+
let pageDir = path.join(this.outputDir, domainName, 'pages', pageName);
|
|
214
199
|
const shouldSave = this.forceCapture || this.hasContentChanged(pageName, contentHash);
|
|
215
200
|
if (!shouldSave) {
|
|
216
201
|
logger_1.logger.info(`Skipping save for ${pageName}: content unchanged (hash: ${contentHash})`);
|
|
217
202
|
return;
|
|
218
203
|
}
|
|
219
|
-
logger_1.logger.info(`Saving page snapshot: domain=${domainName}, page=${pageName}, url=${snapshot.metadata.url}
|
|
220
|
-
|
|
221
|
-
|
|
204
|
+
logger_1.logger.info(`Saving page snapshot: domain=${domainName}, page=${pageName}, url=${snapshot.metadata.url}`);
|
|
205
|
+
try {
|
|
206
|
+
await fs.ensureDir(pageDir);
|
|
207
|
+
}
|
|
208
|
+
catch (error) {
|
|
209
|
+
const msg = error.message;
|
|
210
|
+
logger_1.logger.warn(`Failed to create directory '${pageDir}': ${msg}. Attempting fallback...`);
|
|
211
|
+
// Fallback: Use a shorter, safe name with a hash if the original name fails (likely due to length or invalid chars)
|
|
212
|
+
const safeHash = crypto.createHash('md5').update(snapshot.metadata.url).digest('hex').substring(0, 8);
|
|
213
|
+
pageName = `page-${safeHash}`;
|
|
214
|
+
pageDir = path.join(this.outputDir, domainName, 'pages', pageName);
|
|
215
|
+
// Update metadata so it's consistent with the actual folder name
|
|
216
|
+
snapshot.metadata.pageName = pageName;
|
|
217
|
+
logger_1.logger.info(`Using fallback directory: ${pageDir}`);
|
|
218
|
+
await fs.ensureDir(pageDir);
|
|
219
|
+
}
|
|
222
220
|
if (contentHash) {
|
|
223
221
|
this.recordContentHash(pageName, contentHash);
|
|
224
222
|
}
|
|
225
|
-
//
|
|
223
|
+
// ── Core files ──
|
|
226
224
|
await this.writeJson(path.join(pageDir, 'metadata.json'), snapshot.metadata);
|
|
227
|
-
|
|
228
|
-
const beautifiedDOM = this.beautifyHTML(snapshot.domSnapshot);
|
|
229
|
-
await fs.writeFile(path.join(pageDir, 'DOM'), beautifiedDOM);
|
|
230
|
-
// Save accessibility tree
|
|
225
|
+
await fs.writeFile(path.join(pageDir, 'DOM'), this.beautifyHTML(snapshot.domSnapshot));
|
|
231
226
|
await this.writeJson(path.join(pageDir, 'a11y_tree.json'), snapshot.a11yTree);
|
|
232
|
-
// Save locators
|
|
233
227
|
await this.writeJson(path.join(pageDir, 'locators.json'), snapshot.locators);
|
|
234
|
-
//
|
|
228
|
+
// ── Frames ──
|
|
229
|
+
await this.saveFrames(snapshot, pageDir);
|
|
230
|
+
// ── Console errors / warnings ──
|
|
231
|
+
const consoleErrors = {
|
|
232
|
+
errors: snapshot.consoleMessages
|
|
233
|
+
.filter((m) => m.type === 'error')
|
|
234
|
+
.map((m) => ({
|
|
235
|
+
timestamp: m.timestamp,
|
|
236
|
+
message: m.message,
|
|
237
|
+
source: `${m.location?.url || ''}:${m.location?.lineNumber || 0}:${m.location?.columnNumber || 0}`,
|
|
238
|
+
stack: m.stack,
|
|
239
|
+
})),
|
|
240
|
+
warnings: snapshot.consoleMessages
|
|
241
|
+
.filter((m) => m.type === 'warn')
|
|
242
|
+
.map((m) => ({
|
|
243
|
+
timestamp: m.timestamp,
|
|
244
|
+
message: m.message,
|
|
245
|
+
source: `${m.location?.url || ''}:${m.location?.lineNumber || 0}:${m.location?.columnNumber || 0}`,
|
|
246
|
+
})),
|
|
247
|
+
};
|
|
248
|
+
if (consoleErrors.errors.length > 0 || consoleErrors.warnings.length > 0) {
|
|
249
|
+
await this.writeJson(path.join(pageDir, 'console_errors.json'), consoleErrors);
|
|
250
|
+
}
|
|
251
|
+
// ── Screenshot paths (written into metadata after screenshot capture) ──
|
|
252
|
+
if (snapshot.screenshotPaths.length > 0) {
|
|
253
|
+
await this.writeJson(path.join(pageDir, 'screenshot_manifest.json'), {
|
|
254
|
+
capturedAt: new Date().toISOString(),
|
|
255
|
+
paths: snapshot.screenshotPaths,
|
|
256
|
+
count: snapshot.screenshotPaths.length,
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
// ── Network events (save summary) ──
|
|
260
|
+
if (snapshot.networkEvents.length > 0) {
|
|
261
|
+
const networkDir = path.join(this.outputDir, domainName, 'network');
|
|
262
|
+
await fs.ensureDir(networkDir);
|
|
263
|
+
const trafficLog = path.join(networkDir, 'traffic_log.jsonl');
|
|
264
|
+
const lines = snapshot.networkEvents.map(e => JSON.stringify(e)).join('\n') + '\n';
|
|
265
|
+
await fs.appendFile(trafficLog, lines);
|
|
266
|
+
}
|
|
267
|
+
logger_1.logger.info(`Saved page snapshot: ${pageName}`);
|
|
268
|
+
}
|
|
269
|
+
async saveFrames(snapshot, pageDir) {
|
|
235
270
|
await this.writeJson(path.join(pageDir, 'frames.json'), snapshot.frames);
|
|
236
|
-
// Save serialized frame contents as separate files where present
|
|
237
271
|
const framesDir = path.join(pageDir, 'frames');
|
|
238
272
|
await fs.ensureDir(framesDir);
|
|
239
273
|
let frameCounter = 0;
|
|
240
274
|
const writeFrame = async (frame) => {
|
|
241
275
|
const id = `frame_${String(frameCounter++).padStart(3, '0')}`;
|
|
242
|
-
if (frame
|
|
276
|
+
if (frame?.content) {
|
|
243
277
|
const filename = path.join(framesDir, `${id}_${(frame.name || 'main').replace(/[^a-z0-9\-_.]/gi, '_')}.html`);
|
|
244
278
|
await fs.writeFile(filename, frame.content);
|
|
245
|
-
// Replace content with relative path reference to keep JSON small
|
|
246
279
|
frame.contentFile = path.relative(pageDir, filename).replace(/\\/g, '/');
|
|
247
280
|
}
|
|
248
|
-
if (frame
|
|
249
|
-
for (const child of frame.children)
|
|
281
|
+
if (frame?.children?.length) {
|
|
282
|
+
for (const child of frame.children)
|
|
250
283
|
await writeFrame(child);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
};
|
|
254
|
-
try {
|
|
255
|
-
if (snapshot.frames) {
|
|
256
|
-
await writeFrame(snapshot.frames);
|
|
257
|
-
// Rewrite frames.json to include contentFile references
|
|
258
|
-
await this.writeJson(path.join(pageDir, 'frames.json'), snapshot.frames);
|
|
259
284
|
}
|
|
260
|
-
}
|
|
261
|
-
catch (err) {
|
|
262
|
-
logger_1.logger.warn('Failed to write frame contents: ' + err.message);
|
|
263
|
-
}
|
|
264
|
-
// Save console errors and warnings (separate file)
|
|
265
|
-
const consoleErrors = {
|
|
266
|
-
errors: snapshot.consoleMessages
|
|
267
|
-
.filter((msg) => msg.type === 'error')
|
|
268
|
-
.map((msg) => ({
|
|
269
|
-
timestamp: msg.timestamp,
|
|
270
|
-
message: msg.message,
|
|
271
|
-
source: `${msg.location?.url || ''}:${msg.location?.lineNumber || 0}:${msg.location?.columnNumber || 0}`,
|
|
272
|
-
stack: msg.stack,
|
|
273
|
-
})),
|
|
274
|
-
warnings: snapshot.consoleMessages
|
|
275
|
-
.filter((msg) => msg.type === 'warn')
|
|
276
|
-
.map((msg) => ({
|
|
277
|
-
timestamp: msg.timestamp,
|
|
278
|
-
message: msg.message,
|
|
279
|
-
source: `${msg.location?.url || ''}:${msg.location?.lineNumber || 0}:${msg.location?.columnNumber || 0}`,
|
|
280
|
-
})),
|
|
281
285
|
};
|
|
282
|
-
if (
|
|
283
|
-
await
|
|
286
|
+
if (snapshot.frames) {
|
|
287
|
+
await writeFrame(snapshot.frames).catch(err => logger_1.logger.warn('Failed to write frame contents: ' + err.message));
|
|
288
|
+
await this.writeJson(path.join(pageDir, 'frames.json'), snapshot.frames);
|
|
284
289
|
}
|
|
285
|
-
logger_1.logger.info(`Saved page snapshot: ${pageName}`);
|
|
286
290
|
}
|
|
287
|
-
|
|
288
|
-
* Save the components registry and update manifest reference
|
|
289
|
-
*/
|
|
291
|
+
// ── Components registry ────────────────────────────────────────────────────────
|
|
290
292
|
async saveComponentsRegistry(registry, domainName) {
|
|
291
293
|
const registryPath = path.join(this.outputDir, domainName, 'components_registry.json');
|
|
292
294
|
await fs.ensureDir(path.dirname(registryPath));
|
|
293
295
|
await this.writeJson(registryPath, registry);
|
|
294
296
|
logger_1.logger.info(`Saved components registry to ${registryPath}`);
|
|
295
297
|
}
|
|
296
|
-
/**
|
|
297
|
-
* Update manifest with components registry reference
|
|
298
|
-
*/
|
|
299
298
|
async updateManifestWithComponents(domainName, totalComponents) {
|
|
300
299
|
const manifestPath = path.join(this.outputDir, 'global_manifest.json');
|
|
301
|
-
if (await fs.pathExists(manifestPath))
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
300
|
+
if (!(await fs.pathExists(manifestPath)))
|
|
301
|
+
return;
|
|
302
|
+
const manifest = await fs.readJson(manifestPath);
|
|
303
|
+
manifest.componentsRegistry = {
|
|
304
|
+
path: `${domainName}/components_registry.json`,
|
|
305
|
+
totalComponents,
|
|
306
|
+
lastUpdated: new Date().toISOString(),
|
|
307
|
+
};
|
|
308
|
+
manifest.statistics.totalComponents = totalComponents;
|
|
309
|
+
manifest.lastUpdated = new Date().toISOString();
|
|
310
|
+
await this.writeJson(manifestPath, manifest);
|
|
312
311
|
}
|
|
313
|
-
|
|
312
|
+
// ── Global manifest ────────────────────────────────────────────────────────────
|
|
313
|
+
async updateGlobalManifest(entry, extras = {}) {
|
|
314
314
|
const manifestPath = path.join(this.outputDir, 'global_manifest.json');
|
|
315
315
|
let manifest;
|
|
316
316
|
if (await fs.pathExists(manifestPath)) {
|
|
@@ -329,11 +329,11 @@ class StorageEngine {
|
|
|
329
329
|
totalPages: 0,
|
|
330
330
|
totalNetworkRequests: 0,
|
|
331
331
|
totalScreenshots: 0,
|
|
332
|
-
storageSize: '0
|
|
332
|
+
storageSize: '0 KB',
|
|
333
333
|
},
|
|
334
334
|
};
|
|
335
335
|
}
|
|
336
|
-
// Update domains
|
|
336
|
+
// ── Update domains ──
|
|
337
337
|
let domainEntry = manifest.domains.find(d => d.domain === entry.domain);
|
|
338
338
|
if (!domainEntry) {
|
|
339
339
|
domainEntry = {
|
|
@@ -348,82 +348,119 @@ class StorageEngine {
|
|
|
348
348
|
domainEntry.lastVisited = entry.timestamp;
|
|
349
349
|
domainEntry.totalVisits++;
|
|
350
350
|
domainEntry.pages.push(entry);
|
|
351
|
-
// Update statistics
|
|
351
|
+
// ── Update statistics ──
|
|
352
|
+
manifest.statistics.totalDomains = manifest.domains.length;
|
|
352
353
|
manifest.statistics.totalPages = manifest.domains.reduce((sum, d) => sum + d.pages.length, 0);
|
|
354
|
+
manifest.statistics.totalNetworkRequests =
|
|
355
|
+
(manifest.statistics.totalNetworkRequests || 0) + (extras.networkRequests ?? 0);
|
|
356
|
+
manifest.statistics.totalScreenshots =
|
|
357
|
+
(manifest.statistics.totalScreenshots || 0) + (extras.screenshots ?? 0);
|
|
358
|
+
// Compute actual storage size (non-blocking — best effort)
|
|
359
|
+
try {
|
|
360
|
+
const bytes = await this.computeDirectorySizeBytes(this.outputDir);
|
|
361
|
+
manifest.statistics.storageSize = this.formatBytes(bytes);
|
|
362
|
+
}
|
|
363
|
+
catch {
|
|
364
|
+
// Leave existing value if directory walk fails
|
|
365
|
+
}
|
|
353
366
|
manifest.lastUpdated = new Date().toISOString();
|
|
354
367
|
await this.writeJson(manifestPath, manifest);
|
|
355
368
|
}
|
|
356
|
-
|
|
357
|
-
const jsonString = this.prettyJson
|
|
358
|
-
? JSON.stringify(data, null, 2)
|
|
359
|
-
: JSON.stringify(data);
|
|
360
|
-
await fs.writeFile(filePath, jsonString);
|
|
361
|
-
}
|
|
362
|
-
/**
|
|
363
|
-
* Beautify HTML by adding proper indentation and line breaks
|
|
364
|
-
* This makes the DOM more readable for RAG/context analysis
|
|
365
|
-
* Properly handles self-closing tags and preserves HTML structure
|
|
366
|
-
*/
|
|
369
|
+
// ── User interactions ──────────────────────────────────────────────────────────
|
|
367
370
|
async saveUserInteractions(url, interactions) {
|
|
368
371
|
try {
|
|
369
372
|
const urlObj = new URL(url);
|
|
370
|
-
const
|
|
371
|
-
// Derive logical domain name
|
|
372
|
-
const domainName = (() => {
|
|
373
|
-
const parts = domain.split('.');
|
|
374
|
-
const filtered = parts.filter(p => p.toLowerCase() !== 'www');
|
|
375
|
-
if (filtered.length >= 2) {
|
|
376
|
-
return filtered[filtered.length - 2];
|
|
377
|
-
}
|
|
378
|
-
return filtered[0] || parts[0];
|
|
379
|
-
})();
|
|
373
|
+
const domainName = this.extractDomainName(urlObj.hostname);
|
|
380
374
|
const domainDir = path.join(this.outputDir, domainName);
|
|
381
375
|
const interactionsFile = path.join(domainDir, 'user_interactions.json');
|
|
382
376
|
await fs.ensureDir(domainDir);
|
|
383
|
-
|
|
384
|
-
let existingInteractions = [];
|
|
377
|
+
let existing = [];
|
|
385
378
|
if (await fs.pathExists(interactionsFile)) {
|
|
386
|
-
|
|
379
|
+
existing = await fs.readJson(interactionsFile);
|
|
387
380
|
}
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
...interaction,
|
|
381
|
+
const stamped = interactions.map(i => ({
|
|
382
|
+
...i,
|
|
391
383
|
pageUrl: url,
|
|
392
|
-
recordedAt: new Date().toISOString()
|
|
384
|
+
recordedAt: new Date().toISOString(),
|
|
393
385
|
}));
|
|
394
|
-
|
|
395
|
-
const allInteractions = [...existingInteractions, ...interactionsWithUrl];
|
|
396
|
-
// Save all interactions
|
|
397
|
-
await this.writeJson(interactionsFile, allInteractions);
|
|
386
|
+
await this.writeJson(interactionsFile, [...existing, ...stamped]);
|
|
398
387
|
logger_1.logger.info(`Saved ${interactions.length} user interactions to ${interactionsFile}`);
|
|
399
388
|
}
|
|
400
389
|
catch (error) {
|
|
401
390
|
logger_1.logger.error(`Failed to save user interactions: ${error.message}`);
|
|
402
391
|
}
|
|
403
392
|
}
|
|
393
|
+
// ── Private helpers ────────────────────────────────────────────────────────────
|
|
394
|
+
extractDomainName(domain) {
|
|
395
|
+
const parts = domain.split('.');
|
|
396
|
+
const filtered = parts.filter(p => p.toLowerCase() !== 'www');
|
|
397
|
+
if (filtered.length >= 2)
|
|
398
|
+
return filtered[filtered.length - 2];
|
|
399
|
+
return filtered[0] || parts[0];
|
|
400
|
+
}
|
|
401
|
+
async writeJson(filePath, data) {
|
|
402
|
+
const jsonString = this.prettyJson
|
|
403
|
+
? JSON.stringify(data, null, 2)
|
|
404
|
+
: JSON.stringify(data);
|
|
405
|
+
await fs.writeFile(filePath, jsonString);
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Recursively walk a directory and sum all file sizes in bytes.
|
|
409
|
+
* Returns 0 if the directory does not exist.
|
|
410
|
+
*/
|
|
411
|
+
async computeDirectorySizeBytes(dir) {
|
|
412
|
+
if (!(await fs.pathExists(dir)))
|
|
413
|
+
return 0;
|
|
414
|
+
let total = 0;
|
|
415
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
416
|
+
for (const entry of entries) {
|
|
417
|
+
const full = path.join(dir, entry.name);
|
|
418
|
+
if (entry.isDirectory()) {
|
|
419
|
+
total += await this.computeDirectorySizeBytes(full);
|
|
420
|
+
}
|
|
421
|
+
else if (entry.isFile()) {
|
|
422
|
+
try {
|
|
423
|
+
const stat = await fs.stat(full);
|
|
424
|
+
total += stat.size;
|
|
425
|
+
}
|
|
426
|
+
catch {
|
|
427
|
+
// File may have been removed during iteration
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return total;
|
|
432
|
+
}
|
|
433
|
+
formatBytes(bytes) {
|
|
434
|
+
if (bytes < 1024)
|
|
435
|
+
return `${bytes} B`;
|
|
436
|
+
if (bytes < 1024 * 1024)
|
|
437
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
438
|
+
if (bytes < 1024 * 1024 * 1024)
|
|
439
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
440
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Beautify HTML by adding proper indentation and line breaks.
|
|
444
|
+
* Self-closing / void elements are handled correctly so the indent
|
|
445
|
+
* level is never corrupted by unclosed tags.
|
|
446
|
+
*/
|
|
404
447
|
beautifyHTML(html) {
|
|
405
448
|
try {
|
|
406
|
-
if (!html
|
|
449
|
+
if (!html?.trim())
|
|
407
450
|
return html;
|
|
408
|
-
}
|
|
409
|
-
// Use a proper HTML parser approach - format with indentation
|
|
410
|
-
// Split by tags while preserving them
|
|
411
451
|
const parts = [];
|
|
412
452
|
const tagRegex = /(<[^>]+>)/g;
|
|
413
453
|
let lastIndex = 0;
|
|
414
454
|
let match;
|
|
415
455
|
while ((match = tagRegex.exec(html)) !== null) {
|
|
416
|
-
// Add text before tag
|
|
417
456
|
if (match.index > lastIndex) {
|
|
418
457
|
const text = html.substring(lastIndex, match.index).trim();
|
|
419
458
|
if (text)
|
|
420
459
|
parts.push(text);
|
|
421
460
|
}
|
|
422
|
-
// Add tag
|
|
423
461
|
parts.push(match[0]);
|
|
424
462
|
lastIndex = match.index + match[0].length;
|
|
425
463
|
}
|
|
426
|
-
// Add remaining text
|
|
427
464
|
if (lastIndex < html.length) {
|
|
428
465
|
const text = html.substring(lastIndex).trim();
|
|
429
466
|
if (text)
|
|
@@ -432,45 +469,41 @@ class StorageEngine {
|
|
|
432
469
|
const indentSize = 2;
|
|
433
470
|
let indent = 0;
|
|
434
471
|
const result = [];
|
|
435
|
-
const voidElements = new Set([
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
472
|
+
const voidElements = new Set([
|
|
473
|
+
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
|
|
474
|
+
'link', 'meta', 'param', 'source', 'track', 'wbr', 'noscript',
|
|
475
|
+
]);
|
|
476
|
+
for (const part of parts) {
|
|
477
|
+
const p = part.trim();
|
|
478
|
+
if (!p)
|
|
439
479
|
continue;
|
|
440
|
-
if (
|
|
441
|
-
result.push(
|
|
480
|
+
if (p.startsWith('<!DOCTYPE')) {
|
|
481
|
+
result.push(p);
|
|
442
482
|
}
|
|
443
|
-
else if (
|
|
444
|
-
result.push(' '.repeat(indent) +
|
|
483
|
+
else if (p.startsWith('<!--')) {
|
|
484
|
+
result.push(' '.repeat(indent) + p);
|
|
445
485
|
}
|
|
446
|
-
else if (
|
|
447
|
-
// Closing tag
|
|
486
|
+
else if (p.startsWith('</')) {
|
|
448
487
|
indent = Math.max(0, indent - indentSize);
|
|
449
|
-
result.push(' '.repeat(indent) +
|
|
488
|
+
result.push(' '.repeat(indent) + p);
|
|
450
489
|
}
|
|
451
|
-
else if (
|
|
452
|
-
|
|
453
|
-
const
|
|
454
|
-
|
|
455
|
-
result.push(' '.repeat(indent) + part);
|
|
490
|
+
else if (p.startsWith('<')) {
|
|
491
|
+
const tagName = p.match(/^<(\w+)/)?.[1]?.toLowerCase();
|
|
492
|
+
const isSelfClosing = p.endsWith('/>') || (tagName && voidElements.has(tagName));
|
|
493
|
+
result.push(' '.repeat(indent) + p);
|
|
456
494
|
if (!isSelfClosing && tagName !== 'script' && tagName !== 'style') {
|
|
457
495
|
indent += indentSize;
|
|
458
496
|
}
|
|
459
497
|
}
|
|
460
498
|
else {
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
for (const line of lines) {
|
|
464
|
-
if (line.trim()) {
|
|
465
|
-
result.push(' '.repeat(indent) + line.trim());
|
|
466
|
-
}
|
|
499
|
+
for (const line of p.split(/\n/).filter(l => l.trim())) {
|
|
500
|
+
result.push(' '.repeat(indent) + line.trim());
|
|
467
501
|
}
|
|
468
502
|
}
|
|
469
503
|
}
|
|
470
504
|
return result.join('\n');
|
|
471
505
|
}
|
|
472
506
|
catch (error) {
|
|
473
|
-
// If beautification fails, return original
|
|
474
507
|
logger_1.logger.warn('HTML beautification failed, using original: ' + error.message);
|
|
475
508
|
return html;
|
|
476
509
|
}
|