@xelth/eck-snapshot 6.6.0 → 6.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +1 -1
- package/setup.json +34 -0
- package/src/cli/cli.js +4 -4
- package/src/cli/commands/createSnapshot.js +11 -7
- package/src/cli/commands/recon.js +26 -17
- package/src/cli/commands/updateSnapshot.js +5 -5
- package/src/utils/fileUtils.js +99 -2
- package/src/utils/projectDetector.js +60 -0
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# 📸 eckSnapshot v6.
|
|
1
|
+
# 📸 eckSnapshot v6.7.0 (AI-Native Edition)
|
|
2
2
|
|
|
3
3
|
A specialized, AI-native CLI tool that creates single-file text snapshots of entire Git repositories and feeds them directly into LLM context windows. Instead of letting AI agents guess which files to read, eckSnapshot force-feeds the complete project into the model's context — giving it a "university degree" in your codebase from the very first prompt.
|
|
4
4
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xelth/eck-snapshot",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.7.0",
|
|
4
4
|
"description": "A powerful CLI tool to create and restore single-file text snapshots of Git repositories. Optimized for AI context, LLM workflows, and multi-agent Swarm coordination.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
package/setup.json
CHANGED
|
@@ -164,6 +164,19 @@
|
|
|
164
164
|
],
|
|
165
165
|
"priority": 7
|
|
166
166
|
},
|
|
167
|
+
"esp-idf": {
|
|
168
|
+
"files": [
|
|
169
|
+
"sdkconfig.defaults",
|
|
170
|
+
"partitions.csv"
|
|
171
|
+
],
|
|
172
|
+
"directories": [
|
|
173
|
+
"managed_components"
|
|
174
|
+
],
|
|
175
|
+
"manifestFiles": [
|
|
176
|
+
"idf_component.yml"
|
|
177
|
+
],
|
|
178
|
+
"priority": 8
|
|
179
|
+
},
|
|
167
180
|
"c": {
|
|
168
181
|
"files": [
|
|
169
182
|
"Makefile",
|
|
@@ -518,6 +531,27 @@
|
|
|
518
531
|
"packages/"
|
|
519
532
|
]
|
|
520
533
|
},
|
|
534
|
+
"esp-idf": {
|
|
535
|
+
"filesToIgnore": [
|
|
536
|
+
"sdkconfig.old",
|
|
537
|
+
"dependencies.lock"
|
|
538
|
+
],
|
|
539
|
+
"dirsToIgnore": [
|
|
540
|
+
"managed_components/",
|
|
541
|
+
"build/",
|
|
542
|
+
"managed_components/*/examples/",
|
|
543
|
+
"managed_components/*/test/",
|
|
544
|
+
"managed_components/*/test_apps/",
|
|
545
|
+
"managed_components/*/docs/",
|
|
546
|
+
"managed_components/*/hw/"
|
|
547
|
+
],
|
|
548
|
+
"extensionsToIgnore": [
|
|
549
|
+
".bin",
|
|
550
|
+
".elf",
|
|
551
|
+
".map",
|
|
552
|
+
".flash"
|
|
553
|
+
]
|
|
554
|
+
},
|
|
521
555
|
"c": {
|
|
522
556
|
"filesToIgnore": [
|
|
523
557
|
"*.o",
|
package/src/cli/cli.js
CHANGED
|
@@ -49,17 +49,17 @@ const LEGACY_COMMANDS = {
|
|
|
49
49
|
const base = baseIdx !== -1 && args[baseIdx + 1] ? args[baseIdx + 1] : undefined;
|
|
50
50
|
return { name: 'eck_update_auto', arguments: { fail: args.includes('--fail') || args.includes('-f'), base } };
|
|
51
51
|
},
|
|
52
|
-
'snapshot': () => ({ name: 'eck_snapshot', arguments: {} }),
|
|
52
|
+
'snapshot': (args) => ({ name: 'eck_snapshot', arguments: { ml: args.includes('--ml') } }),
|
|
53
53
|
'update': (args) => {
|
|
54
54
|
const baseIdx = args.indexOf('--base');
|
|
55
55
|
const base = baseIdx !== -1 && args[baseIdx + 1] ? args[baseIdx + 1] : undefined;
|
|
56
|
-
return { name: 'eck_update', arguments: { fail: args.includes('--fail') || args.includes('-f'), base } };
|
|
56
|
+
return { name: 'eck_update', arguments: { fail: args.includes('--fail') || args.includes('-f'), base, ml: args.includes('--ml') } };
|
|
57
57
|
},
|
|
58
58
|
'setup-mcp': (args) => ({ name: 'eck_setup_mcp', arguments: { opencode: args.includes('--opencode'), both: args.includes('--both') } }),
|
|
59
59
|
'detect': () => ({ name: 'eck_detect', arguments: {} }),
|
|
60
60
|
'doctor': () => ({ name: 'eck_doctor', arguments: {} }),
|
|
61
|
-
'scout': (args) => ({ name: 'eck_scout', arguments: { depth: args[0] !== undefined ? parseInt(args[0], 10) : 0 } }),
|
|
62
|
-
'fetch': (args) => ({ name: 'eck_fetch', arguments: { patterns: args } }),
|
|
61
|
+
'scout': (args) => ({ name: 'eck_scout', arguments: { depth: args[0] !== undefined ? parseInt(args[0], 10) : 0, ml: args.includes('--ml') } }),
|
|
62
|
+
'fetch': (args) => ({ name: 'eck_fetch', arguments: { patterns: args.filter(a => a !== '--ml'), ml: args.includes('--ml') } }),
|
|
63
63
|
'link': (args) => ({ name: 'eck_snapshot', arguments: { isLinkedProject: true, linkDepth: args[0] !== undefined ? parseInt(args[0], 10) : 0 } }),
|
|
64
64
|
'profile': (args) => args[0] ? ({ name: 'eck_snapshot', arguments: { profile: args.join(',') } }) : ({ name: 'eck_snapshot', arguments: { profile: true } }),
|
|
65
65
|
'booklm': () => ({ name: 'eck_snapshot', arguments: { notebooklm: 'scout' } }),
|
|
@@ -3,7 +3,6 @@ import path from 'path';
|
|
|
3
3
|
import { execa } from 'execa';
|
|
4
4
|
import pLimit from 'p-limit';
|
|
5
5
|
import { SingleBar, Presets } from 'cli-progress';
|
|
6
|
-
import isBinaryPath from 'is-binary-path';
|
|
7
6
|
import zlib from 'zlib';
|
|
8
7
|
import { promisify } from 'util';
|
|
9
8
|
import ora from 'ora';
|
|
@@ -15,7 +14,8 @@ import {
|
|
|
15
14
|
scanDirectoryRecursively, loadGitignore, readFileWithSizeCheck,
|
|
16
15
|
generateDirectoryTree, loadConfig, displayProjectInfo, loadProjectEckManifest,
|
|
17
16
|
ensureSnapshotsInGitignore, initializeEckManifest, generateTimestamp,
|
|
18
|
-
getShortRepoName, SecretScanner, getProjectFiles, readMlModelMetadata
|
|
17
|
+
getShortRepoName, SecretScanner, getProjectFiles, readMlModelMetadata,
|
|
18
|
+
isBinaryFile
|
|
19
19
|
} from '../../utils/fileUtils.js';
|
|
20
20
|
import { detectProjectType, getProjectSpecificFiltering, getAllDetectedTypes } from '../../utils/projectDetector.js';
|
|
21
21
|
import { estimateTokensWithPolynomial, generateTrainingCommand } from '../../utils/tokenEstimator.js';
|
|
@@ -252,7 +252,7 @@ async function estimateProjectTokens(projectPath, config, projectTypes = null) {
|
|
|
252
252
|
continue;
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
-
if (
|
|
255
|
+
if (await isBinaryFile(path.join(projectPath, file))) {
|
|
256
256
|
continue;
|
|
257
257
|
}
|
|
258
258
|
|
|
@@ -396,10 +396,14 @@ async function processProjectFiles(repoPath, options, config, projectTypes = nul
|
|
|
396
396
|
|
|
397
397
|
const mlExt = path.extname(filePath).toLowerCase();
|
|
398
398
|
const ML_EXTENSIONS = ['.safetensors', '.onnx', '.pt', '.pth', '.h5', '.pb', '.bin', '.ckpt', '.gguf'];
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
//
|
|
402
|
-
|
|
399
|
+
// ML peek is opt-in via `arguments.ml: true`. Default: ML files treated as plain binaries.
|
|
400
|
+
// This prevents false-positives where `.bin` raw dumps (mitm captures, sniffer output)
|
|
401
|
+
// get included via readMlModelMetadata when no real model is present.
|
|
402
|
+
const isMlModel = !!options?.ml && ML_EXTENSIONS.includes(mlExt);
|
|
403
|
+
|
|
404
|
+
// Content-aware binary check (catches extensionless ELFs, SQLite DBs, archives).
|
|
405
|
+
// ML models bypass to allow header metadata extraction below.
|
|
406
|
+
if (!isMlModel && await isBinaryFile(path.join(repoPath, filePath))) {
|
|
403
407
|
stats.binaryFiles++;
|
|
404
408
|
trackSkippedFile(normalizedPath, 'Binary files');
|
|
405
409
|
return null;
|
|
@@ -2,7 +2,6 @@ import fs from 'fs/promises';
|
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import chalk from 'chalk';
|
|
4
4
|
import micromatch from 'micromatch';
|
|
5
|
-
import isBinaryPath from 'is-binary-path';
|
|
6
5
|
import {
|
|
7
6
|
generateDirectoryTree,
|
|
8
7
|
generateTimestamp,
|
|
@@ -12,7 +11,8 @@ import {
|
|
|
12
11
|
getProjectFiles,
|
|
13
12
|
matchesPattern,
|
|
14
13
|
ensureSnapshotsInGitignore,
|
|
15
|
-
readMlModelMetadata
|
|
14
|
+
readMlModelMetadata,
|
|
15
|
+
isBinaryFile
|
|
16
16
|
} from '../../utils/fileUtils.js';
|
|
17
17
|
import { detectProjectType, getProjectSpecificFiltering, getAllDetectedTypes } from '../../utils/projectDetector.js';
|
|
18
18
|
import { loadSetupConfig } from '../../config.js';
|
|
@@ -22,20 +22,21 @@ import { skeletonize } from '../../core/skeletonizer.js';
|
|
|
22
22
|
export async function runReconTool(payload) {
|
|
23
23
|
const toolName = payload.name;
|
|
24
24
|
const args = payload.arguments || {};
|
|
25
|
+
const ml = !!args.ml;
|
|
25
26
|
|
|
26
27
|
if (toolName === 'eck_scout') {
|
|
27
28
|
const depth = args.depth !== undefined ? parseInt(args.depth, 10) : 0;
|
|
28
|
-
await runScout(depth);
|
|
29
|
+
await runScout(depth, { ml });
|
|
29
30
|
} else if (toolName === 'eck_fetch') {
|
|
30
31
|
if (!args.patterns || !Array.isArray(args.patterns)) {
|
|
31
32
|
console.log(chalk.red('❌ Error: eck_fetch requires an array of "patterns" in arguments.'));
|
|
32
33
|
return;
|
|
33
34
|
}
|
|
34
|
-
await runFetch(args.patterns);
|
|
35
|
+
await runFetch(args.patterns, { ml });
|
|
35
36
|
}
|
|
36
37
|
}
|
|
37
38
|
|
|
38
|
-
async function runScout(depth = 0) {
|
|
39
|
+
async function runScout(depth = 0, opts = {}) {
|
|
39
40
|
const depthCfg = getDepthConfig(depth);
|
|
40
41
|
const depthInfo = DEPTH_SCALE[depth] || DEPTH_SCALE[0];
|
|
41
42
|
console.log(chalk.blue(`🕵️ Scouting repository (depth ${depth}: ${depthInfo.mode})...`));
|
|
@@ -65,16 +66,21 @@ async function runScout(depth = 0) {
|
|
|
65
66
|
let allFiles = await getProjectFiles(repoPath, config);
|
|
66
67
|
const gitignore = await loadGitignore(repoPath);
|
|
67
68
|
|
|
68
|
-
// Filter binaries, gitignore/eckignore, and file-level ignores
|
|
69
|
-
|
|
69
|
+
// Filter binaries, gitignore/eckignore, and file-level ignores.
|
|
70
|
+
// Binary check is content-aware (magic-bytes) — needed for extensionless firmware/DB files.
|
|
71
|
+
// ML peek is opt-in (opts.ml === true) — otherwise ML extensions go through normal binary skip.
|
|
72
|
+
const ML_EXTENSIONS = ['.safetensors', '.onnx', '.pt', '.pth', '.h5', '.pb', '.bin', '.ckpt', '.gguf'];
|
|
73
|
+
const mlPeek = !!opts.ml;
|
|
74
|
+
const keepFlags = await Promise.all(allFiles.map(async (f) => {
|
|
70
75
|
const normalized = f.replace(/\\/g, '/');
|
|
71
76
|
const mlExt = path.extname(f).toLowerCase();
|
|
72
|
-
const
|
|
73
|
-
if (isBinaryPath(f) && !ML_EXTENSIONS.includes(mlExt)) return false;
|
|
77
|
+
const isMlModel = mlPeek && ML_EXTENSIONS.includes(mlExt);
|
|
74
78
|
if (gitignore.ignores(normalized)) return false;
|
|
75
79
|
if (config.filesToIgnore && matchesPattern(normalized, config.filesToIgnore)) return false;
|
|
80
|
+
if (!isMlModel && await isBinaryFile(path.join(repoPath, f))) return false;
|
|
76
81
|
return true;
|
|
77
|
-
});
|
|
82
|
+
}));
|
|
83
|
+
allFiles = allFiles.filter((_, i) => keepFlags[i]);
|
|
78
84
|
|
|
79
85
|
const directoryTree = await generateDirectoryTree(repoPath, '', allFiles, 0, config.maxDepth, config);
|
|
80
86
|
|
|
@@ -91,7 +97,7 @@ async function runScout(depth = 0) {
|
|
|
91
97
|
const ML_EXTENSIONS = ['.safetensors', '.onnx', '.pt', '.pth', '.h5', '.pb', '.bin', '.ckpt', '.gguf'];
|
|
92
98
|
|
|
93
99
|
let content;
|
|
94
|
-
if (ML_EXTENSIONS.includes(mlExt)) {
|
|
100
|
+
if (mlPeek && ML_EXTENSIONS.includes(mlExt)) {
|
|
95
101
|
content = await readMlModelMetadata(fullPath);
|
|
96
102
|
} else {
|
|
97
103
|
content = await readFileWithSizeCheck(fullPath, maxFileSize);
|
|
@@ -188,7 +194,7 @@ ${directoryTree}
|
|
|
188
194
|
}
|
|
189
195
|
}
|
|
190
196
|
|
|
191
|
-
async function runFetch(patterns) {
|
|
197
|
+
async function runFetch(patterns, opts = {}) {
|
|
192
198
|
console.log(chalk.blue(`🚚 Fetching files matching patterns: ${patterns.join(', ')}...`));
|
|
193
199
|
try {
|
|
194
200
|
const repoPath = process.cwd();
|
|
@@ -213,15 +219,18 @@ async function runFetch(patterns) {
|
|
|
213
219
|
let allFiles = await getProjectFiles(repoPath, config);
|
|
214
220
|
const gitignore = await loadGitignore(repoPath);
|
|
215
221
|
|
|
216
|
-
|
|
222
|
+
const ML_EXTENSIONS = ['.safetensors', '.onnx', '.pt', '.pth', '.h5', '.pb', '.bin', '.ckpt', '.gguf'];
|
|
223
|
+
const mlPeek = !!opts.ml;
|
|
224
|
+
const keepFlags = await Promise.all(allFiles.map(async (f) => {
|
|
217
225
|
const normalized = f.replace(/\\/g, '/');
|
|
218
226
|
const mlExt = path.extname(f).toLowerCase();
|
|
219
|
-
const
|
|
220
|
-
if (isBinaryPath(f) && !ML_EXTENSIONS.includes(mlExt)) return false;
|
|
227
|
+
const isMlModel = mlPeek && ML_EXTENSIONS.includes(mlExt);
|
|
221
228
|
if (gitignore.ignores(normalized)) return false;
|
|
222
229
|
if (config.filesToIgnore && matchesPattern(normalized, config.filesToIgnore)) return false;
|
|
230
|
+
if (!isMlModel && await isBinaryFile(path.join(repoPath, f))) return false;
|
|
223
231
|
return true;
|
|
224
|
-
});
|
|
232
|
+
}));
|
|
233
|
+
allFiles = allFiles.filter((_, i) => keepFlags[i]);
|
|
225
234
|
|
|
226
235
|
// Normalize patterns: strip absolute cwd prefix, convert backslashes,
|
|
227
236
|
// and auto-wrap bare filenames with **/ for convenience
|
|
@@ -262,7 +271,7 @@ async function runFetch(patterns) {
|
|
|
262
271
|
const ML_EXTENSIONS = ['.safetensors', '.onnx', '.pt', '.pth', '.h5', '.pb', '.bin', '.ckpt', '.gguf'];
|
|
263
272
|
|
|
264
273
|
let content;
|
|
265
|
-
if (ML_EXTENSIONS.includes(mlExt)) {
|
|
274
|
+
if (mlPeek && ML_EXTENSIONS.includes(mlExt)) {
|
|
266
275
|
content = await readMlModelMetadata(fullPath);
|
|
267
276
|
} else {
|
|
268
277
|
content = await readFileWithSizeCheck(fullPath, maxFileSize);
|
|
@@ -2,10 +2,9 @@ import fs from 'fs/promises';
|
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import ora from 'ora';
|
|
4
4
|
import chalk from 'chalk';
|
|
5
|
-
import isBinaryPath from 'is-binary-path';
|
|
6
5
|
import { getGitAnchor, getChangedFiles } from '../../utils/gitUtils.js';
|
|
7
6
|
import { loadSetupConfig } from '../../config.js';
|
|
8
|
-
import { readFileWithSizeCheck, parseSize, formatSize, matchesPattern, loadGitignore, generateTimestamp, getShortRepoName, ensureSnapshotsInGitignore, readMlModelMetadata } from '../../utils/fileUtils.js';
|
|
7
|
+
import { readFileWithSizeCheck, parseSize, formatSize, matchesPattern, loadGitignore, generateTimestamp, getShortRepoName, ensureSnapshotsInGitignore, readMlModelMetadata, isBinaryFile } from '../../utils/fileUtils.js';
|
|
9
8
|
import { detectProjectType, getProjectSpecificFiltering } from '../../utils/projectDetector.js';
|
|
10
9
|
import { execa } from 'execa';
|
|
11
10
|
import { fileURLToPath } from 'url';
|
|
@@ -105,10 +104,11 @@ async function generateSnapshotContent(repoPath, changedFiles, anchor, config, g
|
|
|
105
104
|
|
|
106
105
|
const mlExt = path.extname(filePath).toLowerCase();
|
|
107
106
|
const ML_EXTENSIONS = ['.safetensors', '.onnx', '.pt', '.pth', '.h5', '.pb', '.bin', '.ckpt', '.gguf'];
|
|
108
|
-
|
|
107
|
+
// ML peek is opt-in (`arguments.ml: true` flows into config via options spread upstream).
|
|
108
|
+
const isMlModel = !!config?.ml && ML_EXTENSIONS.includes(mlExt);
|
|
109
109
|
|
|
110
|
-
// Skip binary files — mirrors createSnapshot.js
|
|
111
|
-
if (
|
|
110
|
+
// Skip binary files — mirrors createSnapshot.js (content-aware: catches extensionless ELFs/DBs)
|
|
111
|
+
if (!isMlModel && await isBinaryFile(path.join(repoPath, filePath))) continue;
|
|
112
112
|
|
|
113
113
|
const pathParts = normalizedPath.split('/');
|
|
114
114
|
let isIgnoredDir = false;
|
package/src/utils/fileUtils.js
CHANGED
|
@@ -1,6 +1,84 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import { execa } from 'execa';
|
|
4
|
+
import isBinaryPath from 'is-binary-path';
|
|
5
|
+
|
|
6
|
+
// Magic-byte signatures for content-based binary detection.
|
|
7
|
+
// Used by isBinaryFile() when an extension-only check (is-binary-path) is inconclusive —
|
|
8
|
+
// catches extensionless binaries like ARM ELF firmware (`xlt_agent`), SQLite dumps without
|
|
9
|
+
// `.db`/`.sqlite` suffix (`inbody270DB`), and similar files common in embedded/firmware repos.
|
|
10
|
+
const BINARY_MAGIC_NUMBERS = [
|
|
11
|
+
Buffer.from([0x7F, 0x45, 0x4C, 0x46]), // ELF (Linux/Android executables, .so)
|
|
12
|
+
Buffer.from([0x4D, 0x5A]), // PE/EXE/DLL (Windows)
|
|
13
|
+
Buffer.from('SQLite format 3\0', 'binary'), // SQLite 3
|
|
14
|
+
Buffer.from([0xCA, 0xFE, 0xBA, 0xBE]), // Java .class / Mach-O fat
|
|
15
|
+
Buffer.from([0xFE, 0xED, 0xFA, 0xCE]), // Mach-O 32-bit
|
|
16
|
+
Buffer.from([0xFE, 0xED, 0xFA, 0xCF]), // Mach-O 64-bit
|
|
17
|
+
Buffer.from([0xCF, 0xFA, 0xED, 0xFE]), // Mach-O 64-bit LE
|
|
18
|
+
Buffer.from([0xCE, 0xFA, 0xED, 0xFE]), // Mach-O 32-bit LE
|
|
19
|
+
Buffer.from([0x00, 0x61, 0x73, 0x6D]), // WebAssembly
|
|
20
|
+
Buffer.from([0x50, 0x4B, 0x03, 0x04]), // ZIP / JAR / APK / docx / xlsx
|
|
21
|
+
Buffer.from([0x50, 0x4B, 0x05, 0x06]), // ZIP (empty)
|
|
22
|
+
Buffer.from([0x50, 0x4B, 0x07, 0x08]), // ZIP (spanned)
|
|
23
|
+
Buffer.from([0x1F, 0x8B]), // GZIP
|
|
24
|
+
Buffer.from([0x42, 0x5A, 0x68]), // BZIP2
|
|
25
|
+
Buffer.from([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]), // XZ
|
|
26
|
+
Buffer.from([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C]), // 7-zip
|
|
27
|
+
Buffer.from([0x52, 0x61, 0x72, 0x21, 0x1A, 0x07]), // RAR
|
|
28
|
+
Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]), // MS Compound (old .doc/.xls/.ppt/.msi)
|
|
29
|
+
Buffer.from('%PDF-', 'binary'), // PDF
|
|
30
|
+
Buffer.from([0x89, 0x50, 0x4E, 0x47]), // PNG
|
|
31
|
+
Buffer.from([0xFF, 0xD8, 0xFF]), // JPEG
|
|
32
|
+
Buffer.from('GIF8', 'binary'), // GIF
|
|
33
|
+
Buffer.from('RIFF', 'binary'), // RIFF (AVI/WAV/WebP)
|
|
34
|
+
Buffer.from('OggS', 'binary'), // OGG
|
|
35
|
+
Buffer.from('fLaC', 'binary'), // FLAC
|
|
36
|
+
Buffer.from('ID3', 'binary'), // MP3 (with ID3)
|
|
37
|
+
];
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Async binary detection with two-tier strategy.
|
|
41
|
+
* Fast path: `is-binary-path` extension match (instant, no I/O).
|
|
42
|
+
* Slow path: read first 8KB and check magic-byte signatures + null-byte heuristic.
|
|
43
|
+
*
|
|
44
|
+
* The slow path is what catches extensionless binaries that the extension-only
|
|
45
|
+
* detector misses (firmware ELFs, SQLite DBs, archives renamed without extension, etc.)
|
|
46
|
+
* — the primary cause of past snapshot bloat in embedded/firmware repos.
|
|
47
|
+
*
|
|
48
|
+
* @param {string} absolutePath - Absolute path to the file
|
|
49
|
+
* @returns {Promise<boolean>} True if the file is binary
|
|
50
|
+
*/
|
|
51
|
+
export async function isBinaryFile(absolutePath) {
|
|
52
|
+
if (isBinaryPath(absolutePath)) return true;
|
|
53
|
+
|
|
54
|
+
let fileHandle;
|
|
55
|
+
try {
|
|
56
|
+
fileHandle = await fs.open(absolutePath, 'r');
|
|
57
|
+
const buffer = Buffer.alloc(8192);
|
|
58
|
+
const { bytesRead } = await fileHandle.read(buffer, 0, 8192, 0);
|
|
59
|
+
if (bytesRead === 0) return false; // empty file → treat as text
|
|
60
|
+
|
|
61
|
+
const sample = buffer.subarray(0, bytesRead);
|
|
62
|
+
|
|
63
|
+
for (const magic of BINARY_MAGIC_NUMBERS) {
|
|
64
|
+
if (bytesRead >= magic.length && sample.subarray(0, magic.length).equals(magic)) {
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Null-byte heuristic — text files virtually never contain NULs; binaries almost always do.
|
|
70
|
+
// Strong signal even when the file has no recognizable magic header (raw dumps, custom formats).
|
|
71
|
+
for (let i = 0; i < bytesRead; i++) {
|
|
72
|
+
if (sample[i] === 0) return true;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return false;
|
|
76
|
+
} catch {
|
|
77
|
+
return false; // unreadable → don't skip; let downstream surface the error
|
|
78
|
+
} finally {
|
|
79
|
+
if (fileHandle) await fileHandle.close();
|
|
80
|
+
}
|
|
81
|
+
}
|
|
4
82
|
|
|
5
83
|
/**
|
|
6
84
|
* Safely extracts metadata headers from Large ML models without loading them into memory.
|
|
@@ -38,6 +116,24 @@ import { minimatch } from 'minimatch';
|
|
|
38
116
|
// Global hard-ignore patterns (shared between git-based and scan-based file collection)
|
|
39
117
|
const GLOBAL_HARD_IGNORE_DIRS = ['node_modules', '.git', '.idea', '.vscode', '.gradle', 'build', '__pycache__'];
|
|
40
118
|
const GLOBAL_HARD_IGNORE_FILES = ['package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'go.sum'];
|
|
119
|
+
// Glob-based hard-ignore — for patterns that exact-name matching can't express.
|
|
120
|
+
// Primary target: rotated logs (logrotate/journald) and core dumps.
|
|
121
|
+
// `*.log` is widely conventional to ignore; including it here hardens the default for projects
|
|
122
|
+
// without their own .eckignore. Caller can still opt-in to logs via explicit profile/include.
|
|
123
|
+
const GLOBAL_HARD_IGNORE_GLOBS = [
|
|
124
|
+
'*.log', // standard log files
|
|
125
|
+
'*.log.[0-9]*', // logrotate numeric suffix: app.log.0, app.log.1
|
|
126
|
+
'*.log.gz', // compressed rotated log
|
|
127
|
+
'*.log.*.gz', // app.log.0.gz
|
|
128
|
+
'*.log.bz2',
|
|
129
|
+
'*.log.xz',
|
|
130
|
+
'core.[0-9]*', // Linux core dumps: core.12345
|
|
131
|
+
'*.swp', '*.swo', // editor swap files
|
|
132
|
+
];
|
|
133
|
+
|
|
134
|
+
function matchesGlobalHardIgnoreGlob(fileName) {
|
|
135
|
+
return GLOBAL_HARD_IGNORE_GLOBS.some(p => minimatch(fileName, p, { nocase: true }));
|
|
136
|
+
}
|
|
41
137
|
|
|
42
138
|
/**
|
|
43
139
|
* Scanner for detecting and redacting secrets (API keys, tokens)
|
|
@@ -234,7 +330,7 @@ export async function scanDirectoryRecursively(dirPath, config, relativeTo = dir
|
|
|
234
330
|
// --- GLOBAL HARD IGNORES (Zero-Config Safety) ---
|
|
235
331
|
if (entry.isDirectory() && GLOBAL_HARD_IGNORE_DIRS.includes(entry.name)) {
|
|
236
332
|
continue;
|
|
237
|
-
} else if (!entry.isDirectory() && GLOBAL_HARD_IGNORE_FILES.includes(entry.name)) {
|
|
333
|
+
} else if (!entry.isDirectory() && (GLOBAL_HARD_IGNORE_FILES.includes(entry.name) || matchesGlobalHardIgnoreGlob(entry.name))) {
|
|
238
334
|
continue;
|
|
239
335
|
}
|
|
240
336
|
// -----------------------------------------------
|
|
@@ -340,7 +436,7 @@ export async function generateDirectoryTree(dir, prefix = '', allFiles, depth =
|
|
|
340
436
|
for (const entry of sortedEntries) {
|
|
341
437
|
// --- GLOBAL HARD IGNORES ---
|
|
342
438
|
if (entry.isDirectory() && GLOBAL_HARD_IGNORE_DIRS.includes(entry.name)) continue;
|
|
343
|
-
if (!entry.isDirectory() && GLOBAL_HARD_IGNORE_FILES.includes(entry.name)) continue;
|
|
439
|
+
if (!entry.isDirectory() && (GLOBAL_HARD_IGNORE_FILES.includes(entry.name) || matchesGlobalHardIgnoreGlob(entry.name))) continue;
|
|
344
440
|
// ---------------------------
|
|
345
441
|
|
|
346
442
|
// Skip hidden directories and files (starting with '.')
|
|
@@ -1075,6 +1171,7 @@ export async function getProjectFiles(projectPath, config) {
|
|
|
1075
1171
|
if (dirsToIgnore.includes(pathParts[i])) return false;
|
|
1076
1172
|
}
|
|
1077
1173
|
if (filesToIgnore.includes(fileName)) return false;
|
|
1174
|
+
if (matchesGlobalHardIgnoreGlob(fileName)) return false;
|
|
1078
1175
|
if (fileExt && extensionsToIgnore.includes(fileExt)) return false;
|
|
1079
1176
|
return true;
|
|
1080
1177
|
});
|
|
@@ -207,6 +207,8 @@ async function getProjectDetails(projectPath, type) {
|
|
|
207
207
|
return await getGoDetails(projectPath);
|
|
208
208
|
case 'dotnet':
|
|
209
209
|
return await getDotnetDetails(projectPath);
|
|
210
|
+
case 'esp-idf':
|
|
211
|
+
return await getEspIdfDetails(projectPath);
|
|
210
212
|
default:
|
|
211
213
|
return details;
|
|
212
214
|
}
|
|
@@ -641,6 +643,64 @@ async function getDotnetDetails(projectPath) {
|
|
|
641
643
|
return details;
|
|
642
644
|
}
|
|
643
645
|
|
|
646
|
+
async function getEspIdfDetails(projectPath) {
|
|
647
|
+
const details = { type: 'esp-idf' };
|
|
648
|
+
|
|
649
|
+
try {
|
|
650
|
+
const cmakePath = path.join(projectPath, 'CMakeLists.txt');
|
|
651
|
+
if (await fileExists(cmakePath)) {
|
|
652
|
+
const cmakeContent = await fs.readFile(cmakePath, 'utf-8');
|
|
653
|
+
const projectMatch = cmakeContent.match(/project\((\w+)/);
|
|
654
|
+
if (projectMatch) details.projectName = projectMatch[1];
|
|
655
|
+
const idfPathMatch = cmakeContent.includes('IDF_PATH');
|
|
656
|
+
if (idfPathMatch) details.usesIdfPath = true;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
const sdkDefaultsPath = path.join(projectPath, 'sdkconfig.defaults');
|
|
660
|
+
if (await fileExists(sdkDefaultsPath)) {
|
|
661
|
+
const sdkContent = await fs.readFile(sdkDefaultsPath, 'utf-8');
|
|
662
|
+
const chipMatch = sdkContent.match(/CONFIG_IDF_TARGET="(\w+)"/);
|
|
663
|
+
if (chipMatch) details.targetChip = chipMatch[1];
|
|
664
|
+
const flashSizeMatch = sdkContent.match(/CONFIG_ESPTOOLPY_FLASHSIZE_(\w+)/);
|
|
665
|
+
if (flashSizeMatch) details.flashSize = flashSizeMatch[1];
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
const componentYmlPath = path.join(projectPath, 'main', 'idf_component.yml');
|
|
669
|
+
if (await fileExists(componentYmlPath)) {
|
|
670
|
+
details.hasComponentManifest = true;
|
|
671
|
+
try {
|
|
672
|
+
const ymlContent = await fs.readFile(componentYmlPath, 'utf-8');
|
|
673
|
+
const deps = [];
|
|
674
|
+
const depMatches = ymlContent.matchAll(/^\s{2}(\S+):/gm);
|
|
675
|
+
for (const m of depMatches) {
|
|
676
|
+
if (m[1] !== 'version') deps.push(m[1]);
|
|
677
|
+
}
|
|
678
|
+
if (deps.length > 0) details.dependencies = deps;
|
|
679
|
+
} catch {}
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
const partitionsPath = path.join(projectPath, 'partitions.csv');
|
|
683
|
+
if (await fileExists(partitionsPath)) {
|
|
684
|
+
details.hasPartitions = true;
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
const mainDir = path.join(projectPath, 'main');
|
|
688
|
+
if (await directoryExists(mainDir)) {
|
|
689
|
+
try {
|
|
690
|
+
const mainFiles = await fs.readdir(mainDir);
|
|
691
|
+
details.mainFiles = mainFiles.filter(f =>
|
|
692
|
+
f.endsWith('.c') || f.endsWith('.h') || f.endsWith('.cpp')
|
|
693
|
+
);
|
|
694
|
+
} catch {}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
} catch (error) {
|
|
698
|
+
console.warn('Error getting ESP-IDF project details:', error.message);
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
return details;
|
|
702
|
+
}
|
|
703
|
+
|
|
644
704
|
// Utility functions
|
|
645
705
|
async function fileExists(filePath) {
|
|
646
706
|
try {
|