@shadowcoderr/context-graph 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +448 -88
- package/dist/analyzers/a11y-extractor.d.ts +19 -5
- package/dist/analyzers/a11y-extractor.d.ts.map +1 -1
- package/dist/analyzers/a11y-extractor.js +274 -104
- package/dist/analyzers/a11y-extractor.js.map +1 -1
- package/dist/analyzers/network-logger.d.ts +20 -2
- package/dist/analyzers/network-logger.d.ts.map +1 -1
- package/dist/analyzers/network-logger.js +122 -42
- package/dist/analyzers/network-logger.js.map +1 -1
- package/dist/analyzers/network-patterns.d.ts +73 -0
- package/dist/analyzers/network-patterns.d.ts.map +1 -0
- package/dist/analyzers/network-patterns.js +316 -0
- package/dist/analyzers/network-patterns.js.map +1 -0
- package/dist/analyzers/screenshot-capturer.d.ts +73 -0
- package/dist/analyzers/screenshot-capturer.d.ts.map +1 -0
- package/dist/analyzers/screenshot-capturer.js +190 -0
- package/dist/analyzers/screenshot-capturer.js.map +1 -0
- package/dist/cli/index.js +17 -7
- package/dist/cli/index.js.map +1 -1
- package/dist/core/capture-engine.d.ts +30 -25
- package/dist/core/capture-engine.d.ts.map +1 -1
- package/dist/core/capture-engine.js +290 -276
- package/dist/core/capture-engine.js.map +1 -1
- package/dist/exporters/ai-context-bundler.d.ts +88 -0
- package/dist/exporters/ai-context-bundler.d.ts.map +1 -0
- package/dist/exporters/ai-context-bundler.js +380 -0
- package/dist/exporters/ai-context-bundler.js.map +1 -0
- package/dist/security/redactor.d.ts +16 -0
- package/dist/security/redactor.d.ts.map +1 -1
- package/dist/security/redactor.js +127 -57
- package/dist/security/redactor.js.map +1 -1
- package/dist/storage/engine.d.ts +24 -21
- package/dist/storage/engine.d.ts.map +1 -1
- package/dist/storage/engine.js +210 -176
- package/dist/storage/engine.js.map +1 -1
- package/dist/storage/manifest.d.ts.map +1 -1
- package/dist/storage/manifest.js +4 -3
- package/dist/storage/manifest.js.map +1 -1
- package/dist/utils/version.d.ts +5 -0
- package/dist/utils/version.d.ts.map +1 -0
- package/dist/utils/version.js +53 -0
- package/dist/utils/version.js.map +1 -0
- package/package.json +3 -3
package/dist/storage/engine.js
CHANGED
|
@@ -37,12 +37,14 @@ exports.StorageEngine = void 0;
|
|
|
37
37
|
// Developer: Shadow Coderr, Architect
|
|
38
38
|
const fs = __importStar(require("fs-extra"));
|
|
39
39
|
const path = __importStar(require("path"));
|
|
40
|
+
const crypto = __importStar(require("crypto"));
|
|
41
|
+
const version_1 = require("../utils/version");
|
|
40
42
|
const logger_1 = require("../utils/logger");
|
|
41
43
|
class StorageEngine {
|
|
42
44
|
outputDir;
|
|
43
45
|
prettyJson;
|
|
44
46
|
scriptsDir;
|
|
45
|
-
contentHashHistory = new Map(); // pageName
|
|
47
|
+
contentHashHistory = new Map(); // pageName → array of hashes
|
|
46
48
|
forceCapture = false;
|
|
47
49
|
constructor(outputDir, prettyJson = true, forceCapture = false) {
|
|
48
50
|
this.outputDir = path.resolve(outputDir);
|
|
@@ -50,44 +52,47 @@ class StorageEngine {
|
|
|
50
52
|
this.scriptsDir = path.join(this.outputDir, 'scripts');
|
|
51
53
|
this.forceCapture = forceCapture;
|
|
52
54
|
}
|
|
53
|
-
|
|
54
|
-
* Set force capture mode - always write artifacts even if content hash unchanged
|
|
55
|
-
*/
|
|
55
|
+
// ── Configuration ────────────────────────────────────────────────────────────
|
|
56
56
|
setForceCapture(force) {
|
|
57
57
|
this.forceCapture = force;
|
|
58
58
|
}
|
|
59
|
-
|
|
60
|
-
* Check if content has changed since last capture
|
|
61
|
-
*/
|
|
59
|
+
// ── Change detection ─────────────────────────────────────────────────────────
|
|
62
60
|
hasContentChanged(pageName, contentHash) {
|
|
63
61
|
const history = this.contentHashHistory.get(pageName);
|
|
64
62
|
if (!history || history.length === 0)
|
|
65
63
|
return true;
|
|
66
64
|
return !history.includes(contentHash);
|
|
67
65
|
}
|
|
68
|
-
/**
|
|
69
|
-
* Record content hash for a page
|
|
70
|
-
*/
|
|
71
66
|
recordContentHash(pageName, contentHash) {
|
|
72
67
|
const history = this.contentHashHistory.get(pageName) || [];
|
|
73
68
|
history.push(contentHash);
|
|
74
|
-
// Keep last 10 hashes per page
|
|
75
69
|
if (history.length > 10)
|
|
76
70
|
history.shift();
|
|
77
71
|
this.contentHashHistory.set(pageName, history);
|
|
78
72
|
}
|
|
73
|
+
// ── Initialisation ────────────────────────────────────────────────────────────
|
|
79
74
|
async initialize() {
|
|
80
|
-
// Create the root output directory and scripts directory
|
|
81
75
|
await fs.ensureDir(this.outputDir);
|
|
82
76
|
await fs.ensureDir(this.scriptsDir);
|
|
83
77
|
}
|
|
78
|
+
// ── Page directory resolution ─────────────────────────────────────────────────
|
|
79
|
+
/**
|
|
80
|
+
* Resolve the on-disk directory for a given page's metadata.
|
|
81
|
+
* Mirrors the same logic used inside savePageSnapshot so callers can
|
|
82
|
+
* compute the directory without needing to save first.
|
|
83
|
+
*/
|
|
84
|
+
resolvePageDir(metadata) {
|
|
85
|
+
const domainName = this.extractDomainName(metadata.domain);
|
|
86
|
+
const pageName = metadata.pageName || 'page';
|
|
87
|
+
return path.join(this.outputDir, domainName, 'pages', pageName);
|
|
88
|
+
}
|
|
89
|
+
// ── Script management ─────────────────────────────────────────────────────────
|
|
84
90
|
async getUniqueScriptPath(url) {
|
|
85
91
|
try {
|
|
86
92
|
const urlObj = new URL(url);
|
|
87
|
-
const hostname = urlObj.hostname.replace(/[^a-zA-Z0-9\-_.]/g, '_');
|
|
93
|
+
const hostname = urlObj.hostname.replace(/[^a-zA-Z0-9\-_.]/g, '_');
|
|
88
94
|
let scriptPath = path.join(this.scriptsDir, `${hostname}.spec.ts`);
|
|
89
95
|
let counter = 1;
|
|
90
|
-
// Check if file exists and increment counter if needed
|
|
91
96
|
while (await fs.pathExists(scriptPath)) {
|
|
92
97
|
scriptPath = path.join(this.scriptsDir, `${hostname}_${counter}.spec.ts`);
|
|
93
98
|
counter++;
|
|
@@ -96,9 +101,7 @@ class StorageEngine {
|
|
|
96
101
|
}
|
|
97
102
|
catch (error) {
|
|
98
103
|
logger_1.logger.error(`Error generating unique script path: ${error.message}`);
|
|
99
|
-
|
|
100
|
-
const timestamp = Date.now();
|
|
101
|
-
return path.join(this.scriptsDir, `recording_${timestamp}.spec.ts`);
|
|
104
|
+
return path.join(this.scriptsDir, `recording_${Date.now()}.spec.ts`);
|
|
102
105
|
}
|
|
103
106
|
}
|
|
104
107
|
async mergeRecordedScript(url, recordedScriptPath) {
|
|
@@ -109,7 +112,9 @@ class StorageEngine {
|
|
|
109
112
|
throw new Error(`Recorded script not found: ${recordedScriptPath}`);
|
|
110
113
|
}
|
|
111
114
|
const incoming = await fs.readFile(recordedScriptPath, 'utf8');
|
|
112
|
-
const existing = (await fs.pathExists(mergedPath))
|
|
115
|
+
const existing = (await fs.pathExists(mergedPath))
|
|
116
|
+
? await fs.readFile(mergedPath, 'utf8')
|
|
117
|
+
: '';
|
|
113
118
|
const merged = this.mergePlaywrightSpec(existing, incoming);
|
|
114
119
|
await fs.writeFile(mergedPath, merged);
|
|
115
120
|
if (path.resolve(recordedScriptPath) !== path.resolve(mergedPath)) {
|
|
@@ -130,11 +135,7 @@ class StorageEngine {
|
|
|
130
135
|
const takeHeader = (lines) => {
|
|
131
136
|
for (const line of lines) {
|
|
132
137
|
const trimmed = line.trim();
|
|
133
|
-
if (!trimmed)
|
|
134
|
-
continue;
|
|
135
|
-
if (!/^import\s/.test(trimmed))
|
|
136
|
-
continue;
|
|
137
|
-
if (seenHeader.has(trimmed))
|
|
138
|
+
if (!trimmed || !/^import\s/.test(trimmed) || seenHeader.has(trimmed))
|
|
138
139
|
continue;
|
|
139
140
|
seenHeader.add(trimmed);
|
|
140
141
|
header.push(trimmed);
|
|
@@ -150,18 +151,14 @@ class StorageEngine {
|
|
|
150
151
|
let inTest = false;
|
|
151
152
|
for (const line of lines) {
|
|
152
153
|
if (!inTest) {
|
|
153
|
-
if (/^\s*test\(/.test(line))
|
|
154
|
+
if (/^\s*test\(/.test(line))
|
|
154
155
|
inTest = true;
|
|
155
|
-
}
|
|
156
156
|
continue;
|
|
157
157
|
}
|
|
158
|
-
if (/^\s*\}\);\s*$/.test(line))
|
|
158
|
+
if (/^\s*\}\);\s*$/.test(line))
|
|
159
159
|
break;
|
|
160
|
-
}
|
|
161
160
|
const trimmed = line.trimEnd();
|
|
162
|
-
if (!trimmed.trim())
|
|
163
|
-
continue;
|
|
164
|
-
if (/^\s*\/\//.test(trimmed))
|
|
161
|
+
if (!trimmed.trim() || /^\s*\/\//.test(trimmed))
|
|
165
162
|
continue;
|
|
166
163
|
steps.push(trimmed);
|
|
167
164
|
}
|
|
@@ -192,124 +189,128 @@ class StorageEngine {
|
|
|
192
189
|
'',
|
|
193
190
|
].join('\n');
|
|
194
191
|
}
|
|
192
|
+
// ── Snapshot persistence ──────────────────────────────────────────────────────
|
|
195
193
|
async savePageSnapshot(snapshot) {
|
|
196
194
|
const domain = snapshot.metadata.domain;
|
|
197
|
-
|
|
195
|
+
let pageName = snapshot.metadata.pageName || 'page';
|
|
198
196
|
const contentHash = snapshot.metadata.contentHash || '';
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
const parts = domain.split('.');
|
|
202
|
-
// Remove 'www' prefix if present
|
|
203
|
-
const filtered = parts.filter(p => p.toLowerCase() !== 'www');
|
|
204
|
-
// Get the main domain name (second-to-last part, or first if only one part)
|
|
205
|
-
if (filtered.length >= 2) {
|
|
206
|
-
return filtered[filtered.length - 2];
|
|
207
|
-
}
|
|
208
|
-
return filtered[0] || parts[0];
|
|
209
|
-
})();
|
|
210
|
-
const domainDir = path.join(this.outputDir, domainName);
|
|
211
|
-
const pageDir = path.join(domainDir, 'pages', pageName);
|
|
212
|
-
// Check if we should save (force capture or content changed)
|
|
197
|
+
const domainName = this.extractDomainName(domain);
|
|
198
|
+
let pageDir = path.join(this.outputDir, domainName, 'pages', pageName);
|
|
213
199
|
const shouldSave = this.forceCapture || this.hasContentChanged(pageName, contentHash);
|
|
214
200
|
if (!shouldSave) {
|
|
215
201
|
logger_1.logger.info(`Skipping save for ${pageName}: content unchanged (hash: ${contentHash})`);
|
|
216
202
|
return;
|
|
217
203
|
}
|
|
218
|
-
logger_1.logger.info(`Saving page snapshot: domain=${domainName}, page=${pageName}, url=${snapshot.metadata.url}
|
|
219
|
-
|
|
220
|
-
|
|
204
|
+
logger_1.logger.info(`Saving page snapshot: domain=${domainName}, page=${pageName}, url=${snapshot.metadata.url}`);
|
|
205
|
+
try {
|
|
206
|
+
await fs.ensureDir(pageDir);
|
|
207
|
+
}
|
|
208
|
+
catch (error) {
|
|
209
|
+
const msg = error.message;
|
|
210
|
+
logger_1.logger.warn(`Failed to create directory '${pageDir}': ${msg}. Attempting fallback...`);
|
|
211
|
+
// Fallback: Use a shorter, safe name with a hash if the original name fails (likely due to length or invalid chars)
|
|
212
|
+
const safeHash = crypto.createHash('md5').update(snapshot.metadata.url).digest('hex').substring(0, 8);
|
|
213
|
+
pageName = `page-${safeHash}`;
|
|
214
|
+
pageDir = path.join(this.outputDir, domainName, 'pages', pageName);
|
|
215
|
+
// Update metadata so it's consistent with the actual folder name
|
|
216
|
+
snapshot.metadata.pageName = pageName;
|
|
217
|
+
logger_1.logger.info(`Using fallback directory: ${pageDir}`);
|
|
218
|
+
await fs.ensureDir(pageDir);
|
|
219
|
+
}
|
|
221
220
|
if (contentHash) {
|
|
222
221
|
this.recordContentHash(pageName, contentHash);
|
|
223
222
|
}
|
|
224
|
-
//
|
|
223
|
+
// ── Core files ──
|
|
225
224
|
await this.writeJson(path.join(pageDir, 'metadata.json'), snapshot.metadata);
|
|
226
|
-
|
|
227
|
-
const beautifiedDOM = this.beautifyHTML(snapshot.domSnapshot);
|
|
228
|
-
await fs.writeFile(path.join(pageDir, 'DOM'), beautifiedDOM);
|
|
229
|
-
// Save accessibility tree
|
|
225
|
+
await fs.writeFile(path.join(pageDir, 'DOM'), this.beautifyHTML(snapshot.domSnapshot));
|
|
230
226
|
await this.writeJson(path.join(pageDir, 'a11y_tree.json'), snapshot.a11yTree);
|
|
231
|
-
// Save locators
|
|
232
227
|
await this.writeJson(path.join(pageDir, 'locators.json'), snapshot.locators);
|
|
233
|
-
//
|
|
228
|
+
// ── Frames ──
|
|
229
|
+
await this.saveFrames(snapshot, pageDir);
|
|
230
|
+
// ── Console errors / warnings ──
|
|
231
|
+
const consoleErrors = {
|
|
232
|
+
errors: snapshot.consoleMessages
|
|
233
|
+
.filter((m) => m.type === 'error')
|
|
234
|
+
.map((m) => ({
|
|
235
|
+
timestamp: m.timestamp,
|
|
236
|
+
message: m.message,
|
|
237
|
+
source: `${m.location?.url || ''}:${m.location?.lineNumber || 0}:${m.location?.columnNumber || 0}`,
|
|
238
|
+
stack: m.stack,
|
|
239
|
+
})),
|
|
240
|
+
warnings: snapshot.consoleMessages
|
|
241
|
+
.filter((m) => m.type === 'warn')
|
|
242
|
+
.map((m) => ({
|
|
243
|
+
timestamp: m.timestamp,
|
|
244
|
+
message: m.message,
|
|
245
|
+
source: `${m.location?.url || ''}:${m.location?.lineNumber || 0}:${m.location?.columnNumber || 0}`,
|
|
246
|
+
})),
|
|
247
|
+
};
|
|
248
|
+
if (consoleErrors.errors.length > 0 || consoleErrors.warnings.length > 0) {
|
|
249
|
+
await this.writeJson(path.join(pageDir, 'console_errors.json'), consoleErrors);
|
|
250
|
+
}
|
|
251
|
+
// ── Screenshot paths (written into metadata after screenshot capture) ──
|
|
252
|
+
if (snapshot.screenshotPaths.length > 0) {
|
|
253
|
+
await this.writeJson(path.join(pageDir, 'screenshot_manifest.json'), {
|
|
254
|
+
capturedAt: new Date().toISOString(),
|
|
255
|
+
paths: snapshot.screenshotPaths,
|
|
256
|
+
count: snapshot.screenshotPaths.length,
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
// ── Network events (save summary) ──
|
|
260
|
+
if (snapshot.networkEvents.length > 0) {
|
|
261
|
+
const networkDir = path.join(this.outputDir, domainName, 'network');
|
|
262
|
+
await fs.ensureDir(networkDir);
|
|
263
|
+
const trafficLog = path.join(networkDir, 'traffic_log.jsonl');
|
|
264
|
+
const lines = snapshot.networkEvents.map(e => JSON.stringify(e)).join('\n') + '\n';
|
|
265
|
+
await fs.appendFile(trafficLog, lines);
|
|
266
|
+
}
|
|
267
|
+
logger_1.logger.info(`Saved page snapshot: ${pageName}`);
|
|
268
|
+
}
|
|
269
|
+
async saveFrames(snapshot, pageDir) {
|
|
234
270
|
await this.writeJson(path.join(pageDir, 'frames.json'), snapshot.frames);
|
|
235
|
-
// Save serialized frame contents as separate files where present
|
|
236
271
|
const framesDir = path.join(pageDir, 'frames');
|
|
237
272
|
await fs.ensureDir(framesDir);
|
|
238
273
|
let frameCounter = 0;
|
|
239
274
|
const writeFrame = async (frame) => {
|
|
240
275
|
const id = `frame_${String(frameCounter++).padStart(3, '0')}`;
|
|
241
|
-
if (frame
|
|
276
|
+
if (frame?.content) {
|
|
242
277
|
const filename = path.join(framesDir, `${id}_${(frame.name || 'main').replace(/[^a-z0-9\-_.]/gi, '_')}.html`);
|
|
243
278
|
await fs.writeFile(filename, frame.content);
|
|
244
|
-
// Replace content with relative path reference to keep JSON small
|
|
245
279
|
frame.contentFile = path.relative(pageDir, filename).replace(/\\/g, '/');
|
|
246
280
|
}
|
|
247
|
-
if (frame
|
|
248
|
-
for (const child of frame.children)
|
|
281
|
+
if (frame?.children?.length) {
|
|
282
|
+
for (const child of frame.children)
|
|
249
283
|
await writeFrame(child);
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
};
|
|
253
|
-
try {
|
|
254
|
-
if (snapshot.frames) {
|
|
255
|
-
await writeFrame(snapshot.frames);
|
|
256
|
-
// Rewrite frames.json to include contentFile references
|
|
257
|
-
await this.writeJson(path.join(pageDir, 'frames.json'), snapshot.frames);
|
|
258
284
|
}
|
|
259
|
-
}
|
|
260
|
-
catch (err) {
|
|
261
|
-
logger_1.logger.warn('Failed to write frame contents: ' + err.message);
|
|
262
|
-
}
|
|
263
|
-
// Save console errors and warnings (separate file)
|
|
264
|
-
const consoleErrors = {
|
|
265
|
-
errors: snapshot.consoleMessages
|
|
266
|
-
.filter((msg) => msg.type === 'error')
|
|
267
|
-
.map((msg) => ({
|
|
268
|
-
timestamp: msg.timestamp,
|
|
269
|
-
message: msg.message,
|
|
270
|
-
source: `${msg.location?.url || ''}:${msg.location?.lineNumber || 0}:${msg.location?.columnNumber || 0}`,
|
|
271
|
-
stack: msg.stack,
|
|
272
|
-
})),
|
|
273
|
-
warnings: snapshot.consoleMessages
|
|
274
|
-
.filter((msg) => msg.type === 'warn')
|
|
275
|
-
.map((msg) => ({
|
|
276
|
-
timestamp: msg.timestamp,
|
|
277
|
-
message: msg.message,
|
|
278
|
-
source: `${msg.location?.url || ''}:${msg.location?.lineNumber || 0}:${msg.location?.columnNumber || 0}`,
|
|
279
|
-
})),
|
|
280
285
|
};
|
|
281
|
-
if (
|
|
282
|
-
await
|
|
286
|
+
if (snapshot.frames) {
|
|
287
|
+
await writeFrame(snapshot.frames).catch(err => logger_1.logger.warn('Failed to write frame contents: ' + err.message));
|
|
288
|
+
await this.writeJson(path.join(pageDir, 'frames.json'), snapshot.frames);
|
|
283
289
|
}
|
|
284
|
-
logger_1.logger.info(`Saved page snapshot: ${pageName}`);
|
|
285
290
|
}
|
|
286
|
-
|
|
287
|
-
* Save the components registry and update manifest reference
|
|
288
|
-
*/
|
|
291
|
+
// ── Components registry ────────────────────────────────────────────────────────
|
|
289
292
|
async saveComponentsRegistry(registry, domainName) {
|
|
290
293
|
const registryPath = path.join(this.outputDir, domainName, 'components_registry.json');
|
|
291
294
|
await fs.ensureDir(path.dirname(registryPath));
|
|
292
295
|
await this.writeJson(registryPath, registry);
|
|
293
296
|
logger_1.logger.info(`Saved components registry to ${registryPath}`);
|
|
294
297
|
}
|
|
295
|
-
/**
|
|
296
|
-
* Update manifest with components registry reference
|
|
297
|
-
*/
|
|
298
298
|
async updateManifestWithComponents(domainName, totalComponents) {
|
|
299
299
|
const manifestPath = path.join(this.outputDir, 'global_manifest.json');
|
|
300
|
-
if (await fs.pathExists(manifestPath))
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
300
|
+
if (!(await fs.pathExists(manifestPath)))
|
|
301
|
+
return;
|
|
302
|
+
const manifest = await fs.readJson(manifestPath);
|
|
303
|
+
manifest.componentsRegistry = {
|
|
304
|
+
path: `${domainName}/components_registry.json`,
|
|
305
|
+
totalComponents,
|
|
306
|
+
lastUpdated: new Date().toISOString(),
|
|
307
|
+
};
|
|
308
|
+
manifest.statistics.totalComponents = totalComponents;
|
|
309
|
+
manifest.lastUpdated = new Date().toISOString();
|
|
310
|
+
await this.writeJson(manifestPath, manifest);
|
|
311
311
|
}
|
|
312
|
-
|
|
312
|
+
// ── Global manifest ────────────────────────────────────────────────────────────
|
|
313
|
+
async updateGlobalManifest(entry, extras = {}) {
|
|
313
314
|
const manifestPath = path.join(this.outputDir, 'global_manifest.json');
|
|
314
315
|
let manifest;
|
|
315
316
|
if (await fs.pathExists(manifestPath)) {
|
|
@@ -317,7 +318,7 @@ class StorageEngine {
|
|
|
317
318
|
}
|
|
318
319
|
else {
|
|
319
320
|
manifest = {
|
|
320
|
-
version:
|
|
321
|
+
version: (0, version_1.getVersion)(),
|
|
321
322
|
createdAt: new Date().toISOString(),
|
|
322
323
|
lastUpdated: new Date().toISOString(),
|
|
323
324
|
sessions: [],
|
|
@@ -328,11 +329,11 @@ class StorageEngine {
|
|
|
328
329
|
totalPages: 0,
|
|
329
330
|
totalNetworkRequests: 0,
|
|
330
331
|
totalScreenshots: 0,
|
|
331
|
-
storageSize: '0
|
|
332
|
+
storageSize: '0 KB',
|
|
332
333
|
},
|
|
333
334
|
};
|
|
334
335
|
}
|
|
335
|
-
// Update domains
|
|
336
|
+
// ── Update domains ──
|
|
336
337
|
let domainEntry = manifest.domains.find(d => d.domain === entry.domain);
|
|
337
338
|
if (!domainEntry) {
|
|
338
339
|
domainEntry = {
|
|
@@ -347,82 +348,119 @@ class StorageEngine {
|
|
|
347
348
|
domainEntry.lastVisited = entry.timestamp;
|
|
348
349
|
domainEntry.totalVisits++;
|
|
349
350
|
domainEntry.pages.push(entry);
|
|
350
|
-
// Update statistics
|
|
351
|
+
// ── Update statistics ──
|
|
352
|
+
manifest.statistics.totalDomains = manifest.domains.length;
|
|
351
353
|
manifest.statistics.totalPages = manifest.domains.reduce((sum, d) => sum + d.pages.length, 0);
|
|
354
|
+
manifest.statistics.totalNetworkRequests =
|
|
355
|
+
(manifest.statistics.totalNetworkRequests || 0) + (extras.networkRequests ?? 0);
|
|
356
|
+
manifest.statistics.totalScreenshots =
|
|
357
|
+
(manifest.statistics.totalScreenshots || 0) + (extras.screenshots ?? 0);
|
|
358
|
+
// Compute actual storage size (non-blocking — best effort)
|
|
359
|
+
try {
|
|
360
|
+
const bytes = await this.computeDirectorySizeBytes(this.outputDir);
|
|
361
|
+
manifest.statistics.storageSize = this.formatBytes(bytes);
|
|
362
|
+
}
|
|
363
|
+
catch {
|
|
364
|
+
// Leave existing value if directory walk fails
|
|
365
|
+
}
|
|
352
366
|
manifest.lastUpdated = new Date().toISOString();
|
|
353
367
|
await this.writeJson(manifestPath, manifest);
|
|
354
368
|
}
|
|
355
|
-
|
|
356
|
-
const jsonString = this.prettyJson
|
|
357
|
-
? JSON.stringify(data, null, 2)
|
|
358
|
-
: JSON.stringify(data);
|
|
359
|
-
await fs.writeFile(filePath, jsonString);
|
|
360
|
-
}
|
|
361
|
-
/**
|
|
362
|
-
* Beautify HTML by adding proper indentation and line breaks
|
|
363
|
-
* This makes the DOM more readable for RAG/context analysis
|
|
364
|
-
* Properly handles self-closing tags and preserves HTML structure
|
|
365
|
-
*/
|
|
369
|
+
// ── User interactions ──────────────────────────────────────────────────────────
|
|
366
370
|
async saveUserInteractions(url, interactions) {
|
|
367
371
|
try {
|
|
368
372
|
const urlObj = new URL(url);
|
|
369
|
-
const
|
|
370
|
-
// Derive logical domain name
|
|
371
|
-
const domainName = (() => {
|
|
372
|
-
const parts = domain.split('.');
|
|
373
|
-
const filtered = parts.filter(p => p.toLowerCase() !== 'www');
|
|
374
|
-
if (filtered.length >= 2) {
|
|
375
|
-
return filtered[filtered.length - 2];
|
|
376
|
-
}
|
|
377
|
-
return filtered[0] || parts[0];
|
|
378
|
-
})();
|
|
373
|
+
const domainName = this.extractDomainName(urlObj.hostname);
|
|
379
374
|
const domainDir = path.join(this.outputDir, domainName);
|
|
380
375
|
const interactionsFile = path.join(domainDir, 'user_interactions.json');
|
|
381
376
|
await fs.ensureDir(domainDir);
|
|
382
|
-
|
|
383
|
-
let existingInteractions = [];
|
|
377
|
+
let existing = [];
|
|
384
378
|
if (await fs.pathExists(interactionsFile)) {
|
|
385
|
-
|
|
379
|
+
existing = await fs.readJson(interactionsFile);
|
|
386
380
|
}
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
...interaction,
|
|
381
|
+
const stamped = interactions.map(i => ({
|
|
382
|
+
...i,
|
|
390
383
|
pageUrl: url,
|
|
391
|
-
recordedAt: new Date().toISOString()
|
|
384
|
+
recordedAt: new Date().toISOString(),
|
|
392
385
|
}));
|
|
393
|
-
|
|
394
|
-
const allInteractions = [...existingInteractions, ...interactionsWithUrl];
|
|
395
|
-
// Save all interactions
|
|
396
|
-
await this.writeJson(interactionsFile, allInteractions);
|
|
386
|
+
await this.writeJson(interactionsFile, [...existing, ...stamped]);
|
|
397
387
|
logger_1.logger.info(`Saved ${interactions.length} user interactions to ${interactionsFile}`);
|
|
398
388
|
}
|
|
399
389
|
catch (error) {
|
|
400
390
|
logger_1.logger.error(`Failed to save user interactions: ${error.message}`);
|
|
401
391
|
}
|
|
402
392
|
}
|
|
393
|
+
// ── Private helpers ────────────────────────────────────────────────────────────
|
|
394
|
+
extractDomainName(domain) {
|
|
395
|
+
const parts = domain.split('.');
|
|
396
|
+
const filtered = parts.filter(p => p.toLowerCase() !== 'www');
|
|
397
|
+
if (filtered.length >= 2)
|
|
398
|
+
return filtered[filtered.length - 2];
|
|
399
|
+
return filtered[0] || parts[0];
|
|
400
|
+
}
|
|
401
|
+
async writeJson(filePath, data) {
|
|
402
|
+
const jsonString = this.prettyJson
|
|
403
|
+
? JSON.stringify(data, null, 2)
|
|
404
|
+
: JSON.stringify(data);
|
|
405
|
+
await fs.writeFile(filePath, jsonString);
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Recursively walk a directory and sum all file sizes in bytes.
|
|
409
|
+
* Returns 0 if the directory does not exist.
|
|
410
|
+
*/
|
|
411
|
+
async computeDirectorySizeBytes(dir) {
|
|
412
|
+
if (!(await fs.pathExists(dir)))
|
|
413
|
+
return 0;
|
|
414
|
+
let total = 0;
|
|
415
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
416
|
+
for (const entry of entries) {
|
|
417
|
+
const full = path.join(dir, entry.name);
|
|
418
|
+
if (entry.isDirectory()) {
|
|
419
|
+
total += await this.computeDirectorySizeBytes(full);
|
|
420
|
+
}
|
|
421
|
+
else if (entry.isFile()) {
|
|
422
|
+
try {
|
|
423
|
+
const stat = await fs.stat(full);
|
|
424
|
+
total += stat.size;
|
|
425
|
+
}
|
|
426
|
+
catch {
|
|
427
|
+
// File may have been removed during iteration
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return total;
|
|
432
|
+
}
|
|
433
|
+
formatBytes(bytes) {
|
|
434
|
+
if (bytes < 1024)
|
|
435
|
+
return `${bytes} B`;
|
|
436
|
+
if (bytes < 1024 * 1024)
|
|
437
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
438
|
+
if (bytes < 1024 * 1024 * 1024)
|
|
439
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
440
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Beautify HTML by adding proper indentation and line breaks.
|
|
444
|
+
* Self-closing / void elements are handled correctly so the indent
|
|
445
|
+
* level is never corrupted by unclosed tags.
|
|
446
|
+
*/
|
|
403
447
|
beautifyHTML(html) {
|
|
404
448
|
try {
|
|
405
|
-
if (!html
|
|
449
|
+
if (!html?.trim())
|
|
406
450
|
return html;
|
|
407
|
-
}
|
|
408
|
-
// Use a proper HTML parser approach - format with indentation
|
|
409
|
-
// Split by tags while preserving them
|
|
410
451
|
const parts = [];
|
|
411
452
|
const tagRegex = /(<[^>]+>)/g;
|
|
412
453
|
let lastIndex = 0;
|
|
413
454
|
let match;
|
|
414
455
|
while ((match = tagRegex.exec(html)) !== null) {
|
|
415
|
-
// Add text before tag
|
|
416
456
|
if (match.index > lastIndex) {
|
|
417
457
|
const text = html.substring(lastIndex, match.index).trim();
|
|
418
458
|
if (text)
|
|
419
459
|
parts.push(text);
|
|
420
460
|
}
|
|
421
|
-
// Add tag
|
|
422
461
|
parts.push(match[0]);
|
|
423
462
|
lastIndex = match.index + match[0].length;
|
|
424
463
|
}
|
|
425
|
-
// Add remaining text
|
|
426
464
|
if (lastIndex < html.length) {
|
|
427
465
|
const text = html.substring(lastIndex).trim();
|
|
428
466
|
if (text)
|
|
@@ -431,45 +469,41 @@ class StorageEngine {
|
|
|
431
469
|
const indentSize = 2;
|
|
432
470
|
let indent = 0;
|
|
433
471
|
const result = [];
|
|
434
|
-
const voidElements = new Set([
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
472
|
+
const voidElements = new Set([
|
|
473
|
+
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
|
|
474
|
+
'link', 'meta', 'param', 'source', 'track', 'wbr', 'noscript',
|
|
475
|
+
]);
|
|
476
|
+
for (const part of parts) {
|
|
477
|
+
const p = part.trim();
|
|
478
|
+
if (!p)
|
|
438
479
|
continue;
|
|
439
|
-
if (
|
|
440
|
-
result.push(
|
|
480
|
+
if (p.startsWith('<!DOCTYPE')) {
|
|
481
|
+
result.push(p);
|
|
441
482
|
}
|
|
442
|
-
else if (
|
|
443
|
-
result.push(' '.repeat(indent) +
|
|
483
|
+
else if (p.startsWith('<!--')) {
|
|
484
|
+
result.push(' '.repeat(indent) + p);
|
|
444
485
|
}
|
|
445
|
-
else if (
|
|
446
|
-
// Closing tag
|
|
486
|
+
else if (p.startsWith('</')) {
|
|
447
487
|
indent = Math.max(0, indent - indentSize);
|
|
448
|
-
result.push(' '.repeat(indent) +
|
|
488
|
+
result.push(' '.repeat(indent) + p);
|
|
449
489
|
}
|
|
450
|
-
else if (
|
|
451
|
-
|
|
452
|
-
const
|
|
453
|
-
|
|
454
|
-
result.push(' '.repeat(indent) + part);
|
|
490
|
+
else if (p.startsWith('<')) {
|
|
491
|
+
const tagName = p.match(/^<(\w+)/)?.[1]?.toLowerCase();
|
|
492
|
+
const isSelfClosing = p.endsWith('/>') || (tagName && voidElements.has(tagName));
|
|
493
|
+
result.push(' '.repeat(indent) + p);
|
|
455
494
|
if (!isSelfClosing && tagName !== 'script' && tagName !== 'style') {
|
|
456
495
|
indent += indentSize;
|
|
457
496
|
}
|
|
458
497
|
}
|
|
459
498
|
else {
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
for (const line of lines) {
|
|
463
|
-
if (line.trim()) {
|
|
464
|
-
result.push(' '.repeat(indent) + line.trim());
|
|
465
|
-
}
|
|
499
|
+
for (const line of p.split(/\n/).filter(l => l.trim())) {
|
|
500
|
+
result.push(' '.repeat(indent) + line.trim());
|
|
466
501
|
}
|
|
467
502
|
}
|
|
468
503
|
}
|
|
469
504
|
return result.join('\n');
|
|
470
505
|
}
|
|
471
506
|
catch (error) {
|
|
472
|
-
// If beautification fails, return original
|
|
473
507
|
logger_1.logger.warn('HTML beautification failed, using original: ' + error.message);
|
|
474
508
|
return html;
|
|
475
509
|
}
|