codesynapt 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -0
- package/LICENSE +686 -0
- package/LICENSES.md +141 -0
- package/README.md +331 -0
- package/electron/main.cjs +2849 -0
- package/electron/plugin-loader.cjs +184 -0
- package/electron/preload.cjs +108 -0
- package/package.json +216 -0
- package/packages/core/bin/codesynapt-mcp.cjs +611 -0
- package/packages/core/bin/codesynapt.cjs +1933 -0
- package/packages/core/legacy.js +300 -0
- package/packages/core/lib/control-server.cjs +1539 -0
- package/packages/core/lib/embedding.cjs +89 -0
- package/packages/core/lib/logger.cjs +63 -0
- package/packages/core/lib/search-cache.cjs +140 -0
- package/packages/core/lib/search-worker.cjs +255 -0
- package/packages/core/lib/search.cjs +211 -0
- package/packages/core/lib/symbol-graph.cjs +402 -0
- package/packages/core/lib/symbol-parser-js.cjs +542 -0
- package/packages/core/lib/symbol-parser-misc.cjs +394 -0
- package/packages/core/lib/symbol-parser-py.cjs +215 -0
- package/packages/core/lib/symbol-parser-treesitter.cjs +658 -0
- package/packages/core/lib/symbol-parser-tsc.cjs +332 -0
- package/packages/core/monorepo.js +310 -0
- package/packages/core/parser.js +2234 -0
- package/packages/core/scanner.js +623 -0
- package/plugin-api/LICENSE +21 -0
- package/plugin-api/README.md +114 -0
- package/plugin-api/docs/01-getting-started.md +197 -0
- package/plugin-api/docs/02-concepts.md +269 -0
- package/plugin-api/docs/api-reference.md +463 -0
- package/plugin-api/docs/troubleshooting.md +332 -0
- package/plugin-api/docs/types/exporter.md +377 -0
- package/plugin-api/docs/types/theme.md +312 -0
- package/plugin-api/examples/hello-world-plugin/README.md +70 -0
- package/plugin-api/examples/hello-world-plugin/main.js +36 -0
- package/plugin-api/examples/hello-world-plugin/manifest.json +12 -0
- package/plugin-api/examples/mermaid-exporter/README.md +125 -0
- package/plugin-api/examples/mermaid-exporter/main.js +58 -0
- package/plugin-api/examples/mermaid-exporter/manifest.json +12 -0
- package/plugin-api/examples/rust-parser/README.md +71 -0
- package/plugin-api/examples/rust-parser/main.js +123 -0
- package/plugin-api/examples/rust-parser/manifest.json +12 -0
- package/plugin-api/examples/sunset-theme/README.md +95 -0
- package/plugin-api/examples/sunset-theme/manifest.json +12 -0
- package/plugin-api/examples/sunset-theme/theme.css +31 -0
- package/plugin-api/package.json +20 -0
- package/plugin-api/types.d.ts +395 -0
- package/public/app.js +6837 -0
- package/public/backend.js +285 -0
- package/public/index.html +647 -0
- package/public/plugin-host.js +321 -0
- package/public/style.css +4359 -0
- package/public/vendor/three.module.js +53044 -0
- package/scripts/competitor-watch.mjs +144 -0
- package/scripts/copy-vendor.js +21 -0
- package/scripts/download-bundled-node.cjs +53 -0
- package/scripts/fuses-after-pack.cjs +34 -0
- package/scripts/license-check.js +119 -0
- package/scripts/perf-test.js +200 -0
- package/server.js +132 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// Semantic embeddings via @xenova/transformers (MiniLM-L6-v2, ~25 MB
|
|
2
|
+
// quantized ONNX model). Runs entirely locally — no network call at
|
|
3
|
+
// query time, no telemetry. The model itself is fetched once on
|
|
4
|
+
// first load and cached under the OS HF cache directory.
|
|
5
|
+
//
|
|
6
|
+
// Used by /symbol/explore to give "auth" ↔ "login" / "signIn" /
|
|
7
|
+
// "verifyJWT" the synonym match that keyword-only scoring misses.
|
|
8
|
+
//
|
|
9
|
+
// All exports are lazy: until the first call, this module doesn't
|
|
10
|
+
// even import @xenova/transformers — so the 45 MB dep cost only
|
|
11
|
+
// hits processes that actually opt into embedding mode.
|
|
12
|
+
|
|
13
|
+
'use strict'
|
|
14
|
+
|
|
15
|
+
let _pipelinePromise = null
|
|
16
|
+
let _ready = false
|
|
17
|
+
let _failed = false
|
|
18
|
+
|
|
19
|
+
async function loadPipeline() {
|
|
20
|
+
if (_pipelinePromise) return _pipelinePromise
|
|
21
|
+
if (_failed) return null
|
|
22
|
+
_pipelinePromise = (async () => {
|
|
23
|
+
try {
|
|
24
|
+
const mod = await import('@xenova/transformers')
|
|
25
|
+
const p = await mod.pipeline(
|
|
26
|
+
'feature-extraction',
|
|
27
|
+
'Xenova/all-MiniLM-L6-v2',
|
|
28
|
+
{ quantized: true }
|
|
29
|
+
)
|
|
30
|
+
_ready = true
|
|
31
|
+
return p
|
|
32
|
+
} catch (e) {
|
|
33
|
+
console.warn('[embedding] pipeline init failed — falling back to keyword-only:', e.message)
|
|
34
|
+
_failed = true
|
|
35
|
+
_pipelinePromise = null
|
|
36
|
+
return null
|
|
37
|
+
}
|
|
38
|
+
})()
|
|
39
|
+
return _pipelinePromise
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Single text → 384-d normalized vector. Caller should await loadPipeline
|
|
43
|
+
// at least once before calling embed in a tight loop.
|
|
44
|
+
async function embed(text) {
|
|
45
|
+
const p = await loadPipeline()
|
|
46
|
+
if (!p) return null
|
|
47
|
+
try {
|
|
48
|
+
const out = await p(text, { pooling: 'mean', normalize: true })
|
|
49
|
+
return Array.from(out.data)
|
|
50
|
+
} catch { return null }
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Batch of N texts → N vectors. Faster than N single calls thanks
|
|
54
|
+
// to ONNX op fusion. CHUNK keeps peak memory bounded.
|
|
55
|
+
async function embedBatch(texts) {
|
|
56
|
+
const p = await loadPipeline()
|
|
57
|
+
if (!p) return null
|
|
58
|
+
try {
|
|
59
|
+
const out = await p(texts, { pooling: 'mean', normalize: true })
|
|
60
|
+
const D = out.dims[1]
|
|
61
|
+
const N = out.dims[0]
|
|
62
|
+
const result = []
|
|
63
|
+
for (let i = 0; i < N; i++) {
|
|
64
|
+
result.push(Array.from(out.data.slice(i * D, (i + 1) * D)))
|
|
65
|
+
}
|
|
66
|
+
return result
|
|
67
|
+
} catch (e) {
|
|
68
|
+
console.warn('[embedding] batch failed:', e.message)
|
|
69
|
+
return null
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Both vectors come back L2-normalized from the pipeline, so dot
|
|
74
|
+
// product is already cosine similarity in [-1, 1].
|
|
75
|
+
function cosineSim(a, b) {
|
|
76
|
+
if (!a || !b || a.length !== b.length) return 0
|
|
77
|
+
let dot = 0
|
|
78
|
+
for (let i = 0; i < a.length; i++) dot += a[i] * b[i]
|
|
79
|
+
return dot
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
module.exports = {
|
|
83
|
+
loadPipeline,
|
|
84
|
+
embed,
|
|
85
|
+
embedBatch,
|
|
86
|
+
cosineSim,
|
|
87
|
+
isReady: () => _ready,
|
|
88
|
+
isFailed: () => _failed,
|
|
89
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// logger.cjs — minimal structured logger (NDJSON file + stderr).
|
|
2
|
+
//
|
|
3
|
+
// Why not pino: pino adds a runtime dep (we currently have 3 deps total —
|
|
4
|
+
// keeping it lean matters). This is a 50-line equivalent for our actual
|
|
5
|
+
// needs: ts/level/module/msg as one JSON line per entry, stderr fallback
|
|
6
|
+
// for errors, no rotation (callers pick the file path).
|
|
7
|
+
//
|
|
8
|
+
// Usage:
|
|
9
|
+
// const { createLogger } = require('./logger.cjs')
|
|
10
|
+
// const log = createLogger({ file: '/var/log/cs.jsonl', module: 'search' })
|
|
11
|
+
// log.info('search started', { q: 'foo' })
|
|
12
|
+
// log.warn('cache eviction', { freed: 12345 })
|
|
13
|
+
// log.error('disk full', { errno: 28 })
|
|
14
|
+
|
|
15
|
+
const fs = require('fs')
|
|
16
|
+
const path = require('path')
|
|
17
|
+
|
|
18
|
+
const LEVELS = { trace: 10, debug: 20, info: 30, warn: 40, error: 50, fatal: 60 }
|
|
19
|
+
|
|
20
|
+
function createLogger(opts = {}) {
|
|
21
|
+
const file = opts.file || null
|
|
22
|
+
const module_ = opts.module || 'cs'
|
|
23
|
+
const minLevel = LEVELS[opts.level || 'info'] || 30
|
|
24
|
+
const echoStderr = opts.echoStderr ?? 'warn' // 'never' | 'warn' | 'always'
|
|
25
|
+
|
|
26
|
+
// Ensure log directory exists once
|
|
27
|
+
if (file) {
|
|
28
|
+
try { fs.mkdirSync(path.dirname(file), { recursive: true }) } catch {}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function emit(level, msg, meta) {
|
|
32
|
+
if (LEVELS[level] < minLevel) return
|
|
33
|
+
const entry = {
|
|
34
|
+
ts: new Date().toISOString(),
|
|
35
|
+
level,
|
|
36
|
+
module: module_,
|
|
37
|
+
msg,
|
|
38
|
+
...(meta || {}),
|
|
39
|
+
}
|
|
40
|
+
const line = JSON.stringify(entry) + '\n'
|
|
41
|
+
if (file) {
|
|
42
|
+
try { fs.appendFileSync(file, line) }
|
|
43
|
+
catch (e) { process.stderr.write(`[logger] append failed: ${e.message}\n`) }
|
|
44
|
+
}
|
|
45
|
+
const shouldEcho = echoStderr === 'always'
|
|
46
|
+
|| (echoStderr === 'warn' && LEVELS[level] >= LEVELS.warn)
|
|
47
|
+
if (shouldEcho) {
|
|
48
|
+
process.stderr.write(`[${module_}] ${level.toUpperCase()} ${msg}\n`)
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
trace: (m, x) => emit('trace', m, x),
|
|
54
|
+
debug: (m, x) => emit('debug', m, x),
|
|
55
|
+
info: (m, x) => emit('info', m, x),
|
|
56
|
+
warn: (m, x) => emit('warn', m, x),
|
|
57
|
+
error: (m, x) => emit('error', m, x),
|
|
58
|
+
fatal: (m, x) => emit('fatal', m, x),
|
|
59
|
+
child: (extra) => createLogger({ ...opts, module: extra.module || module_ }),
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
module.exports = { createLogger, LEVELS }
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
// search-cache.cjs — LRU file content cache for full-text search.
|
|
2
|
+
//
|
|
3
|
+
// Design goals:
|
|
4
|
+
// 1) Zero misses: scanner emits add/change/remove → cache reacts.
|
|
5
|
+
// mtime is also checked on every read as a belt-and-suspenders guard.
|
|
6
|
+
// 2) Fast: second search re-reads ~0 files; everything's in memory.
|
|
7
|
+
// 3) Bounded: LRU evicts least-recent files when over MAX_BYTES.
|
|
8
|
+
//
|
|
9
|
+
// Usage:
|
|
10
|
+
// const cache = createSearchCache(scanner, { maxBytes: 100 * 1024 * 1024 })
|
|
11
|
+
// const buf = await cache.read(id, absPath) // returns Buffer
|
|
12
|
+
// cache.invalidate(id) // manual nuke
|
|
13
|
+
// cache.clear() // wipe all
|
|
14
|
+
// cache.stats() // { entries, bytes, hits, misses, evictions }
|
|
15
|
+
|
|
16
|
+
const fs = require('fs')
|
|
17
|
+
|
|
18
|
+
const DEFAULT_MAX_BYTES = 100 * 1024 * 1024 // 100 MB
|
|
19
|
+
const DEFAULT_MAX_FILE_BYTES = 2 * 1024 * 1024 // skip caching files > 2 MB
|
|
20
|
+
|
|
21
|
+
function createSearchCache(scanner, opts = {}) {
|
|
22
|
+
const maxBytes = opts.maxBytes || DEFAULT_MAX_BYTES
|
|
23
|
+
const maxFileBytes = opts.maxFileBytes || DEFAULT_MAX_FILE_BYTES
|
|
24
|
+
|
|
25
|
+
// id → { mtime, buf, lastAccess }
|
|
26
|
+
const cache = new Map()
|
|
27
|
+
let bytesUsed = 0
|
|
28
|
+
const stats = { hits: 0, misses: 0, evictions: 0, stales: 0 }
|
|
29
|
+
|
|
30
|
+
// Hook scanner events for proactive invalidation
|
|
31
|
+
if (scanner && typeof scanner.on === 'function') {
|
|
32
|
+
scanner.on('file-changed', ({ id }) => invalidate(id))
|
|
33
|
+
scanner.on('file-removed', ({ id }) => invalidate(id))
|
|
34
|
+
// file-added: no action — next read will populate
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function invalidate(id) {
|
|
38
|
+
const entry = cache.get(id)
|
|
39
|
+
if (entry) {
|
|
40
|
+
bytesUsed -= entry.buf.length
|
|
41
|
+
cache.delete(id)
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function clear() {
|
|
46
|
+
cache.clear()
|
|
47
|
+
bytesUsed = 0
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function touch(id, entry) {
|
|
51
|
+
// Move to end (most recently used) by re-inserting
|
|
52
|
+
cache.delete(id)
|
|
53
|
+
entry.lastAccess = Date.now()
|
|
54
|
+
cache.set(id, entry)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function evictUntilFits(needed) {
|
|
58
|
+
// Map iteration order = insertion order; oldest is at the front
|
|
59
|
+
for (const [id, entry] of cache) {
|
|
60
|
+
if (bytesUsed + needed <= maxBytes) break
|
|
61
|
+
bytesUsed -= entry.buf.length
|
|
62
|
+
cache.delete(id)
|
|
63
|
+
stats.evictions++
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function read(id, absPath) {
|
|
68
|
+
// 1) stat for mtime — ~0.1ms
|
|
69
|
+
let stat
|
|
70
|
+
try { stat = await fs.promises.stat(absPath) }
|
|
71
|
+
catch (e) { invalidate(id); throw e }
|
|
72
|
+
|
|
73
|
+
// 2) cache hit if mtime unchanged
|
|
74
|
+
const cached = cache.get(id)
|
|
75
|
+
if (cached && cached.mtime === stat.mtimeMs) {
|
|
76
|
+
stats.hits++
|
|
77
|
+
touch(id, cached)
|
|
78
|
+
return cached.buf
|
|
79
|
+
}
|
|
80
|
+
if (cached) stats.stales++ // mtime differed → stale
|
|
81
|
+
|
|
82
|
+
// 3) read fresh
|
|
83
|
+
let buf
|
|
84
|
+
try { buf = await fs.promises.readFile(absPath) }
|
|
85
|
+
catch (e) { invalidate(id); throw e }
|
|
86
|
+
stats.misses++
|
|
87
|
+
|
|
88
|
+
// 4) cache the new content if it fits the per-file limit
|
|
89
|
+
if (buf.length <= maxFileBytes) {
|
|
90
|
+
if (cached) {
|
|
91
|
+
bytesUsed -= cached.buf.length
|
|
92
|
+
cache.delete(id)
|
|
93
|
+
}
|
|
94
|
+
if (buf.length <= maxBytes) {
|
|
95
|
+
evictUntilFits(buf.length)
|
|
96
|
+
cache.set(id, { mtime: stat.mtimeMs, buf, lastAccess: Date.now() })
|
|
97
|
+
bytesUsed += buf.length
|
|
98
|
+
}
|
|
99
|
+
} else if (cached) {
|
|
100
|
+
// too big to cache anymore — drop old entry
|
|
101
|
+
bytesUsed -= cached.buf.length
|
|
102
|
+
cache.delete(id)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return buf
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function getStats() {
|
|
109
|
+
return {
|
|
110
|
+
entries: cache.size,
|
|
111
|
+
bytes: bytesUsed,
|
|
112
|
+
bytesMb: +(bytesUsed / 1024 / 1024).toFixed(1),
|
|
113
|
+
maxBytes,
|
|
114
|
+
hits: stats.hits,
|
|
115
|
+
misses: stats.misses,
|
|
116
|
+
stales: stats.stales,
|
|
117
|
+
evictions: stats.evictions,
|
|
118
|
+
hitRate: stats.hits + stats.misses === 0 ? null
|
|
119
|
+
: +(stats.hits / (stats.hits + stats.misses)).toFixed(3),
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// getText — like read, but returns the utf-8 decoded string. Caches the
|
|
124
|
+
// decoded text on the entry so repeat searches skip the (CPU-bound) decode.
|
|
125
|
+
// mtime invalidation also drops the text since the buf is replaced.
|
|
126
|
+
async function getText(id, absPath) {
|
|
127
|
+
const buf = await read(id, absPath)
|
|
128
|
+
const entry = cache.get(id)
|
|
129
|
+
if (entry) {
|
|
130
|
+
if (!entry.text) entry.text = buf.toString('utf8')
|
|
131
|
+
return entry.text
|
|
132
|
+
}
|
|
133
|
+
// Too large to cache — decode without storing
|
|
134
|
+
return buf.toString('utf8')
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return { read, getText, invalidate, clear, stats: getStats }
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
module.exports = { createSearchCache }
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
// search-worker.cjs — runs in a Node worker_thread, isolated from the main
|
|
2
|
+
// thread (which is busy with scanner / chokidar / babel parse).
|
|
3
|
+
//
|
|
4
|
+
// Protocol (messages from main):
|
|
5
|
+
// { type: 'search', id, files: [{id, absPath}], q, regex, caseSensitive, max, maxPerFile, concurrency, fileTimeoutMs }
|
|
6
|
+
// { type: 'invalidate', id } — drop one cache entry
|
|
7
|
+
// { type: 'clear' } — drop all cache
|
|
8
|
+
//
|
|
9
|
+
// Reply (messages to main):
|
|
10
|
+
// { type: 'result', id, payload: { matches, skipped, filesScanned, filesMatched, ms, cacheStats, truncated } }
|
|
11
|
+
// { type: 'error', id, error: string }
|
|
12
|
+
|
|
13
|
+
const { parentPort, workerData } = require('worker_threads')
|
|
14
|
+
const fs = require('fs')
|
|
15
|
+
|
|
16
|
+
if (!parentPort) {
|
|
17
|
+
throw new Error('search-worker.cjs must be loaded as a worker_thread')
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const _reqId = workerData?.reqId ?? '?'
|
|
21
|
+
const _tracePath = workerData?.tracePath
|
|
22
|
+
function _trace(msg) {
|
|
23
|
+
if (!_tracePath) return
|
|
24
|
+
try { fs.appendFileSync(_tracePath, `${new Date().toISOString()} [#${_reqId}] [worker] ${msg}\n`) } catch {}
|
|
25
|
+
}
|
|
26
|
+
_trace('boot')
|
|
27
|
+
|
|
28
|
+
const DEFAULT_MAX = 100
|
|
29
|
+
// 8 is the sweet spot with the size-gate in place (large files are skipped
|
|
30
|
+
// without holding threads). Going higher gives diminishing returns.
|
|
31
|
+
const DEFAULT_CONCURRENCY = 8
|
|
32
|
+
const DEFAULT_FILE_TIMEOUT_MS = 5000
|
|
33
|
+
const SNIPPET_CONTEXT = 50
|
|
34
|
+
const MAX_CACHE_BYTES = 100 * 1024 * 1024
|
|
35
|
+
const MAX_FILE_BYTES = 2 * 1024 * 1024
|
|
36
|
+
// Files larger than this are SKIPPED entirely (not even attempted).
|
|
37
|
+
// Reason: AI model tokenizer JSONs (tens of MB) read slowly enough that
|
|
38
|
+
// they hold a libuv thread for seconds, blocking every other concurrent
|
|
39
|
+
// task. Better to skip them honestly than hang the whole search.
|
|
40
|
+
const MAX_SEARCH_BYTES = 5 * 1024 * 1024 // 5 MB
|
|
41
|
+
|
|
42
|
+
// Cache: id → { mtime, buf, text? }
|
|
43
|
+
const cache = new Map()
|
|
44
|
+
let bytesUsed = 0
|
|
45
|
+
const cacheStats = { hits: 0, misses: 0, stales: 0, evictions: 0 }
|
|
46
|
+
|
|
47
|
+
function invalidate(id) {
|
|
48
|
+
const entry = cache.get(id)
|
|
49
|
+
if (entry) {
|
|
50
|
+
bytesUsed -= entry.buf.length
|
|
51
|
+
cache.delete(id)
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
function clearCache() { cache.clear(); bytesUsed = 0 }
|
|
55
|
+
|
|
56
|
+
function evictUntilFits(needed) {
|
|
57
|
+
for (const [id, entry] of cache) {
|
|
58
|
+
if (bytesUsed + needed <= MAX_CACHE_BYTES) break
|
|
59
|
+
bytesUsed -= entry.buf.length
|
|
60
|
+
cache.delete(id)
|
|
61
|
+
cacheStats.evictions++
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function getText(id, absPath) {
|
|
66
|
+
// Fast path: cache hit, no stat call. Chokidar invalidation messages
|
|
67
|
+
// from main keep us honest. Skipping stat saves ~0.5ms × N files.
|
|
68
|
+
const cached = cache.get(id)
|
|
69
|
+
if (cached) {
|
|
70
|
+
cacheStats.hits++
|
|
71
|
+
cache.delete(id); cache.set(id, cached) // LRU touch
|
|
72
|
+
if (!cached.text) cached.text = cached.buf.toString('utf8')
|
|
73
|
+
return cached.text
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
let stat
|
|
77
|
+
try { stat = await fs.promises.stat(absPath) }
|
|
78
|
+
catch (e) { invalidate(id); throw e }
|
|
79
|
+
|
|
80
|
+
// Hard size gate — prevents tokenizer.json (50+ MB) from stalling pool.
|
|
81
|
+
if (stat.size > MAX_SEARCH_BYTES) {
|
|
82
|
+
throw new Error(`too-large: ${stat.size} bytes (cap ${MAX_SEARCH_BYTES})`)
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
let buf
|
|
86
|
+
try { buf = await fs.promises.readFile(absPath) }
|
|
87
|
+
catch (e) { invalidate(id); throw e }
|
|
88
|
+
cacheStats.misses++
|
|
89
|
+
|
|
90
|
+
if (buf.length <= MAX_FILE_BYTES) {
|
|
91
|
+
if (cached) { bytesUsed -= cached.buf.length; cache.delete(id) }
|
|
92
|
+
evictUntilFits(buf.length)
|
|
93
|
+
cache.set(id, { mtime: stat.mtimeMs, buf, text: null })
|
|
94
|
+
bytesUsed += buf.length
|
|
95
|
+
}
|
|
96
|
+
return buf.toString('utf8')
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function withTimeout(promise, ms, label) {
|
|
100
|
+
let to
|
|
101
|
+
const timeout = new Promise((_, reject) => {
|
|
102
|
+
to = setTimeout(() => reject(new Error(`timeout ${ms}ms: ${label}`)), ms)
|
|
103
|
+
})
|
|
104
|
+
return Promise.race([promise, timeout]).finally(() => clearTimeout(to))
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
function scanContent(text, q, opts, maxPerFile) {
|
|
108
|
+
const matches = []
|
|
109
|
+
if (opts.regex) {
|
|
110
|
+
const flags = opts.caseSensitive ? 'g' : 'gi'
|
|
111
|
+
let re
|
|
112
|
+
try { re = new RegExp(q, flags) } catch { return [] }
|
|
113
|
+
let m
|
|
114
|
+
while ((m = re.exec(text)) !== null) {
|
|
115
|
+
const idx = m.index
|
|
116
|
+
const before = text.lastIndexOf('\n', idx - 1)
|
|
117
|
+
const line = (text.slice(0, idx).match(/\n/g) || []).length + 1
|
|
118
|
+
const col = idx - (before + 1) + 1
|
|
119
|
+
const sStart = Math.max(0, idx - SNIPPET_CONTEXT)
|
|
120
|
+
const sEnd = Math.min(text.length, idx + m[0].length + SNIPPET_CONTEXT)
|
|
121
|
+
matches.push({ line, col, snippet: text.slice(sStart, sEnd).replace(/\r?\n/g, ' ') })
|
|
122
|
+
if (matches.length >= maxPerFile) break
|
|
123
|
+
if (m.index === re.lastIndex) re.lastIndex++
|
|
124
|
+
}
|
|
125
|
+
return matches
|
|
126
|
+
}
|
|
127
|
+
const needle = opts.caseSensitive ? q : q.toLowerCase()
|
|
128
|
+
const hay = opts.caseSensitive ? text : text.toLowerCase()
|
|
129
|
+
// Fast reject: most files have no match. One indexOf call vs O(lines).
|
|
130
|
+
if (hay.indexOf(needle) === -1) return []
|
|
131
|
+
let lineStart = 0, lineNo = 1
|
|
132
|
+
while (lineStart < hay.length) {
|
|
133
|
+
let lineEnd = hay.indexOf('\n', lineStart)
|
|
134
|
+
if (lineEnd === -1) lineEnd = hay.length
|
|
135
|
+
let from = lineStart
|
|
136
|
+
while (true) {
|
|
137
|
+
const idx = hay.indexOf(needle, from)
|
|
138
|
+
if (idx === -1 || idx >= lineEnd) break
|
|
139
|
+
const sStart = Math.max(0, idx - SNIPPET_CONTEXT)
|
|
140
|
+
const sEnd = Math.min(text.length, idx + needle.length + SNIPPET_CONTEXT)
|
|
141
|
+
matches.push({ line: lineNo, col: idx - lineStart + 1, snippet: text.slice(sStart, sEnd).replace(/\r?\n/g, ' ') })
|
|
142
|
+
if (matches.length >= maxPerFile) return matches
|
|
143
|
+
from = idx + needle.length
|
|
144
|
+
}
|
|
145
|
+
lineStart = lineEnd + 1
|
|
146
|
+
lineNo++
|
|
147
|
+
}
|
|
148
|
+
return matches
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
async function runConcurrent(tasks, concurrency, shouldStop) {
|
|
152
|
+
let i = 0
|
|
153
|
+
async function worker() {
|
|
154
|
+
while (i < tasks.length) {
|
|
155
|
+
if (shouldStop && shouldStop()) return
|
|
156
|
+
const idx = i++
|
|
157
|
+
await tasks[idx]()
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
const workers = Array.from({ length: Math.min(concurrency, tasks.length) }, worker)
|
|
161
|
+
await Promise.all(workers)
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function doSearch(req) {
|
|
165
|
+
const t0 = Date.now()
|
|
166
|
+
const {
|
|
167
|
+
files, q,
|
|
168
|
+
regex = false, caseSensitive = false,
|
|
169
|
+
max = DEFAULT_MAX, maxPerFile = 10,
|
|
170
|
+
concurrency = DEFAULT_CONCURRENCY, fileTimeoutMs = DEFAULT_FILE_TIMEOUT_MS,
|
|
171
|
+
} = req
|
|
172
|
+
|
|
173
|
+
if (!q) throw new Error('q (query) is required')
|
|
174
|
+
if (regex) { try { new RegExp(q) } catch (e) { throw new Error(`invalid regex: ${e.message}`) } }
|
|
175
|
+
|
|
176
|
+
const totalFiles = files.length
|
|
177
|
+
const matches = []
|
|
178
|
+
const skipped = []
|
|
179
|
+
let filesScanned = 0, filesMatched = 0, stopFlag = false
|
|
180
|
+
let _completed = 0
|
|
181
|
+
_trace(`doSearch begin: q=${q} totalFiles=${totalFiles} concurrency=${concurrency}`)
|
|
182
|
+
|
|
183
|
+
const tasks = files.map((f, taskIdx) => async () => {
|
|
184
|
+
if (stopFlag) return
|
|
185
|
+
let text
|
|
186
|
+
const tRead = Date.now()
|
|
187
|
+
try {
|
|
188
|
+
text = await withTimeout(getText(f.id, f.absPath), fileTimeoutMs, f.id)
|
|
189
|
+
} catch (e) {
|
|
190
|
+
_trace(` SKIP task[${taskIdx}] ${f.id} after ${Date.now()-tRead}ms — ${e.message}`)
|
|
191
|
+
let reason = 'read-error'
|
|
192
|
+
if (e.message.startsWith('timeout')) reason = 'timeout'
|
|
193
|
+
else if (e.message.startsWith('too-large')) reason = 'too-large'
|
|
194
|
+
skipped.push({ id: f.id, reason })
|
|
195
|
+
return
|
|
196
|
+
}
|
|
197
|
+
const rDur = Date.now() - tRead
|
|
198
|
+
if (rDur > 500) _trace(` SLOW task[${taskIdx}] ${f.id} read took ${rDur}ms`)
|
|
199
|
+
if (stopFlag) return
|
|
200
|
+
filesScanned++
|
|
201
|
+
let fileMatches
|
|
202
|
+
try {
|
|
203
|
+
fileMatches = scanContent(text, q, { regex, caseSensitive }, maxPerFile)
|
|
204
|
+
} catch {
|
|
205
|
+
skipped.push({ id: f.id, reason: 'scan-error' })
|
|
206
|
+
return
|
|
207
|
+
}
|
|
208
|
+
_completed++
|
|
209
|
+
if (_completed % 200 === 0) _trace(` progress: ${_completed}/${totalFiles} matches=${matches.length}`)
|
|
210
|
+
if (fileMatches.length === 0) return
|
|
211
|
+
filesMatched++
|
|
212
|
+
for (const m of fileMatches) {
|
|
213
|
+
matches.push({ id: f.id, line: m.line, col: m.col, snippet: m.snippet, totalInFile: fileMatches.length })
|
|
214
|
+
if (matches.length >= max) { stopFlag = true; break }
|
|
215
|
+
}
|
|
216
|
+
})
|
|
217
|
+
|
|
218
|
+
_trace(`runConcurrent starting`)
|
|
219
|
+
await runConcurrent(tasks, concurrency, () => stopFlag)
|
|
220
|
+
_trace(`runConcurrent finished: completed=${_completed} matches=${matches.length} skipped=${skipped.length}`)
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
query: q, regex, caseSensitive,
|
|
224
|
+
totalFiles, filesScanned, filesMatched,
|
|
225
|
+
matches, skipped, truncated: stopFlag,
|
|
226
|
+
ms: Date.now() - t0,
|
|
227
|
+
cacheStats: { ...cacheStats, entries: cache.size, bytesMb: +(bytesUsed/1024/1024).toFixed(1) },
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
parentPort.on('message', async (msg) => {
|
|
232
|
+
_trace(`recv ${msg.type}${msg.q ? ` q=${msg.q}`:''}${msg.files ? ` files=${msg.files.length}` : ''}`)
|
|
233
|
+
try {
|
|
234
|
+
if (msg.type === 'search') {
|
|
235
|
+
const t = Date.now()
|
|
236
|
+
const payload = await doSearch(msg)
|
|
237
|
+
_trace(`doSearch returned in ${Date.now() - t}ms — matches=${payload.matches.length} scanned=${payload.filesScanned}`)
|
|
238
|
+
parentPort.postMessage({ type: 'result', id: msg.id, payload })
|
|
239
|
+
_trace('postMessage result done')
|
|
240
|
+
} else if (msg.type === 'invalidate') {
|
|
241
|
+
invalidate(msg.id)
|
|
242
|
+
} else if (msg.type === 'clear') {
|
|
243
|
+
clearCache()
|
|
244
|
+
} else {
|
|
245
|
+
parentPort.postMessage({ type: 'error', id: msg.id, error: `unknown msg type: ${msg.type}` })
|
|
246
|
+
}
|
|
247
|
+
} catch (e) {
|
|
248
|
+
_trace(`error: ${e.message}`)
|
|
249
|
+
parentPort.postMessage({ type: 'error', id: msg.id, error: e.message || String(e) })
|
|
250
|
+
}
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
_trace('parentPort handler set, sending ready')
|
|
254
|
+
parentPort.postMessage({ type: 'ready' })
|
|
255
|
+
_trace('ready sent')
|