sweet-search 0.0.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/NOTICE +23 -0
- package/core/cli.js +51 -0
- package/core/config.js +27 -0
- package/core/embedding/embedding-cache.js +467 -0
- package/core/embedding/embedding-local-model.js +845 -0
- package/core/embedding/embedding-remote.js +492 -0
- package/core/embedding/embedding-service.js +712 -0
- package/core/embedding/embedding-telemetry.js +219 -0
- package/core/embedding/index.js +40 -0
- package/core/graph/community-detector.js +294 -0
- package/core/graph/graph-expansion.js +839 -0
- package/core/graph/graph-extractor.js +2304 -0
- package/core/graph/graph-search.js +2148 -0
- package/core/graph/hcgs-generator.js +666 -0
- package/core/graph/index.js +16 -0
- package/core/graph/leiden-algorithm.js +547 -0
- package/core/graph/relationship-resolver.js +366 -0
- package/core/graph/repo-map.js +408 -0
- package/core/graph/summary-manager.js +549 -0
- package/core/indexing/artifact-builder.js +1054 -0
- package/core/indexing/ast-chunker.js +709 -0
- package/core/indexing/chunking/chunk-builder.js +170 -0
- package/core/indexing/chunking/markdown-chunker.js +503 -0
- package/core/indexing/chunking/plaintext-chunker.js +104 -0
- package/core/indexing/dedup/dedup-phase.js +159 -0
- package/core/indexing/dedup/exemplar-selector.js +65 -0
- package/core/indexing/document-chunker.js +56 -0
- package/core/indexing/incremental-parser.js +390 -0
- package/core/indexing/incremental-tracker.js +761 -0
- package/core/indexing/index-codebase-v21.js +472 -0
- package/core/indexing/index-maintainer.mjs +1674 -0
- package/core/indexing/index.js +90 -0
- package/core/indexing/indexer-ann.js +1077 -0
- package/core/indexing/indexer-build.js +742 -0
- package/core/indexing/indexer-phases.js +800 -0
- package/core/indexing/indexer-pool.js +764 -0
- package/core/indexing/indexer-sparse-gram.js +98 -0
- package/core/indexing/indexer-utils.js +536 -0
- package/core/indexing/indexer-worker.js +148 -0
- package/core/indexing/li-skip-policy.js +225 -0
- package/core/indexing/merkle-tracker.js +244 -0
- package/core/indexing/model-pool.js +166 -0
- package/core/infrastructure/code-graph-repository.js +120 -0
- package/core/infrastructure/codebase-repository.js +131 -0
- package/core/infrastructure/config/dedup.js +54 -0
- package/core/infrastructure/config/embedding.js +298 -0
- package/core/infrastructure/config/graph.js +80 -0
- package/core/infrastructure/config/index.js +82 -0
- package/core/infrastructure/config/indexing.js +8 -0
- package/core/infrastructure/config/platform.js +254 -0
- package/core/infrastructure/config/ranking.js +221 -0
- package/core/infrastructure/config/search.js +396 -0
- package/core/infrastructure/config/translation.js +89 -0
- package/core/infrastructure/config/vector-store.js +114 -0
- package/core/infrastructure/constants.js +86 -0
- package/core/infrastructure/coreml-cascade.js +909 -0
- package/core/infrastructure/coreml-cascade.json +46 -0
- package/core/infrastructure/coreml-provider.js +81 -0
- package/core/infrastructure/db-utils.js +69 -0
- package/core/infrastructure/dedup-hashing.js +83 -0
- package/core/infrastructure/hardware-capability.js +332 -0
- package/core/infrastructure/index.js +104 -0
- package/core/infrastructure/language-patterns/maps.js +121 -0
- package/core/infrastructure/language-patterns/registry-core.js +323 -0
- package/core/infrastructure/language-patterns/registry-data-query.js +155 -0
- package/core/infrastructure/language-patterns/registry-object-oriented.js +285 -0
- package/core/infrastructure/language-patterns/registry-tooling.js +240 -0
- package/core/infrastructure/language-patterns/registry-web-style.js +143 -0
- package/core/infrastructure/language-patterns/registry.js +19 -0
- package/core/infrastructure/language-patterns.js +141 -0
- package/core/infrastructure/llm-provider.js +733 -0
- package/core/infrastructure/manifest.json +46 -0
- package/core/infrastructure/maxsim.wasm +0 -0
- package/core/infrastructure/model-fetcher.js +423 -0
- package/core/infrastructure/model-registry.js +214 -0
- package/core/infrastructure/native-inference.js +587 -0
- package/core/infrastructure/native-resolver.js +187 -0
- package/core/infrastructure/native-sparse-gram.js +257 -0
- package/core/infrastructure/native-tokenizer.js +160 -0
- package/core/infrastructure/onnx-mutex.js +45 -0
- package/core/infrastructure/onnx-session-utils.js +261 -0
- package/core/infrastructure/ort-pipeline.js +111 -0
- package/core/infrastructure/project-detector.js +102 -0
- package/core/infrastructure/quantization.js +410 -0
- package/core/infrastructure/simd-distance.js +502 -0
- package/core/infrastructure/simd-distance.wasm +0 -0
- package/core/infrastructure/tree-sitter-provider.js +665 -0
- package/core/infrastructure/webgpu-maxsim.js +222 -0
- package/core/query/index.js +35 -0
- package/core/query/intent-detector.js +201 -0
- package/core/query/intent-router.js +156 -0
- package/core/query/query-router-catboost.js +222 -0
- package/core/query/query-router-ml.js +266 -0
- package/core/query/query-router.js +213 -0
- package/core/ranking/cascaded-scorer.js +379 -0
- package/core/ranking/flashrank.js +810 -0
- package/core/ranking/index.js +49 -0
- package/core/ranking/late-interaction-index.js +2383 -0
- package/core/ranking/late-interaction-model.js +812 -0
- package/core/ranking/local-reranker.js +374 -0
- package/core/ranking/mmr.js +379 -0
- package/core/ranking/quality-scorer.js +363 -0
- package/core/search/context-expander.js +1167 -0
- package/core/search/dedup/sibling-expander.js +327 -0
- package/core/search/index.js +16 -0
- package/core/search/search-boost.js +259 -0
- package/core/search/search-cli.js +544 -0
- package/core/search/search-format.js +282 -0
- package/core/search/search-fusion.js +327 -0
- package/core/search/search-hybrid.js +204 -0
- package/core/search/search-pattern-chunks.js +337 -0
- package/core/search/search-pattern-planner.js +439 -0
- package/core/search/search-pattern-prefilter.js +412 -0
- package/core/search/search-pattern-ripgrep.js +663 -0
- package/core/search/search-pattern.js +463 -0
- package/core/search/search-postprocess.js +452 -0
- package/core/search/search-semantic.js +706 -0
- package/core/search/search-server.js +554 -0
- package/core/search/session-daemon-prewarm.mjs +164 -0
- package/core/search/session-warmup.js +595 -0
- package/core/search/sweet-search.js +632 -0
- package/core/search/warmup-metrics.js +532 -0
- package/core/start-server.js +6 -0
- package/core/training/query-router/features/extractor.js +762 -0
- package/core/training/query-router/features/multilingual-patterns.js +431 -0
- package/core/training/query-router/features/text-segmenter.js +303 -0
- package/core/training/query-router/features/unicode-utils.js +383 -0
- package/core/training/query-router/output/v45_router_d4.js +11521 -0
- package/core/training/query-router/output/v46_router_d4.js +11498 -0
- package/core/vector-store/binary-heap.js +227 -0
- package/core/vector-store/binary-hnsw-index.js +1004 -0
- package/core/vector-store/float-vector-store.js +234 -0
- package/core/vector-store/hnsw-index.js +580 -0
- package/core/vector-store/index.js +39 -0
- package/core/vector-store/seismic-index.js +498 -0
- package/core/vocabulary/index.js +84 -0
- package/core/vocabulary/vocab-constants.js +20 -0
- package/core/vocabulary/vocab-miner-extractors.js +375 -0
- package/core/vocabulary/vocab-miner-nl.js +404 -0
- package/core/vocabulary/vocab-miner-utils.js +146 -0
- package/core/vocabulary/vocab-miner.js +574 -0
- package/core/vocabulary/vocab-prewarm-cli.js +110 -0
- package/core/vocabulary/vocab-ranker.js +492 -0
- package/core/vocabulary/vocab-warmer.js +523 -0
- package/core/vocabulary/vocab-warmup-orchestrator.js +425 -0
- package/core/vocabulary/vocabulary-utils.js +704 -0
- package/crates/wasm-router/pkg/package.json +13 -0
- package/crates/wasm-router/pkg/query_router_wasm.d.ts +36 -0
- package/crates/wasm-router/pkg/query_router_wasm.js +271 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm +0 -0
- package/crates/wasm-router/pkg/query_router_wasm_bg.wasm.d.ts +19 -0
- package/mcp/config-gen.js +121 -0
- package/mcp/server.js +335 -0
- package/mcp/tool-handlers.js +476 -0
- package/package.json +131 -9
- package/scripts/benchmark-harness.js +794 -0
- package/scripts/init.js +1058 -0
- package/scripts/smoke-test.js +435 -0
- package/scripts/uninstall.js +478 -0
- package/scripts/verify-runtime.js +176 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vocabulary Miner Extractors
|
|
3
|
+
*
|
|
4
|
+
* Dependency-extraction functions for imports, exports, definitions,
|
|
5
|
+
* constants, and package manifest parsing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { splitIdentifier, addTerm, STOP_WORDS } from './vocab-miner-utils.js';
|
|
9
|
+
|
|
10
|
+
const NON_TERM_CONSTANTS = new Set([
|
|
11
|
+
'TODO', 'FIXME', 'NOTE', 'HACK', 'XXX', 'BUG',
|
|
12
|
+
'WARN', 'INFO', 'DEBUG', 'ERROR',
|
|
13
|
+
'TRUE', 'FALSE', 'NULL', 'NONE',
|
|
14
|
+
'SELF', 'THIS', 'VOID', 'ENUM', 'TYPE',
|
|
15
|
+
'CHAR', 'BYTE', 'INT8', 'UINT',
|
|
16
|
+
]);
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Import Extraction
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Extract import statements and add imported names as vocabulary terms.
|
|
24
|
+
* Supports JS/TS, Python, Go, Java/Kotlin, and Rust import syntaxes.
|
|
25
|
+
* @param {string} content - Source file content to parse
|
|
26
|
+
* @param {string} ext - File extension including dot (e.g. '.js', '.py')
|
|
27
|
+
* @param {Map<string, {score: number, source: string}>} terms - Accumulator map for discovered terms
|
|
28
|
+
* @returns {void}
|
|
29
|
+
*/
|
|
30
|
+
export function extractImports(content, ext, terms) {
|
|
31
|
+
// JS/TS: import { X, Y } from 'module'; import X from 'module'
|
|
32
|
+
if (['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'].includes(ext)) {
|
|
33
|
+
const addImportedName = (name) => {
|
|
34
|
+
if (!name || name.length <= 1) return;
|
|
35
|
+
addTerm(terms, name, 0.6, 'import');
|
|
36
|
+
for (const part of splitIdentifier(name)) {
|
|
37
|
+
if (part.length > 2 && !STOP_WORDS.has(part)) {
|
|
38
|
+
addTerm(terms, part, 0.4, 'import-part');
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
const addModuleTerm = (modulePath) => {
|
|
43
|
+
if (!modulePath || modulePath.startsWith('.')) return;
|
|
44
|
+
const modName = modulePath.replace(/^@[^/]+\//, '');
|
|
45
|
+
addTerm(terms, modName, 0.4, 'import-module');
|
|
46
|
+
};
|
|
47
|
+
const parseImportedList = (names, aliasSep = /\s+as\s+/, aliasPart = 'first') =>
|
|
48
|
+
names
|
|
49
|
+
.split(',')
|
|
50
|
+
.map((n) => n.trim())
|
|
51
|
+
.filter(Boolean)
|
|
52
|
+
.map((n) => {
|
|
53
|
+
const parts = n.split(aliasSep).map((p) => p.trim()).filter(Boolean);
|
|
54
|
+
if (parts.length === 0) return '';
|
|
55
|
+
return aliasPart === 'last' ? parts[parts.length - 1] : parts[0];
|
|
56
|
+
})
|
|
57
|
+
.filter(Boolean);
|
|
58
|
+
|
|
59
|
+
const importRe = /import\s+(?:{([^}]+)}|(\w+))\s+from\s+['"]([^'"]+)['"]/g;
|
|
60
|
+
let match;
|
|
61
|
+
while ((match = importRe.exec(content))) {
|
|
62
|
+
const names = match[1] || match[2];
|
|
63
|
+
const modulePath = match[3];
|
|
64
|
+
if (names) {
|
|
65
|
+
for (const name of parseImportedList(names)) addImportedName(name);
|
|
66
|
+
}
|
|
67
|
+
addModuleTerm(modulePath);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Namespace imports: import * as X from 'Y'
|
|
71
|
+
const namespaceRe = /import\s+\*\s+as\s+(\w+)\s+from\s+['"]([^'"]+)['"]/g;
|
|
72
|
+
while ((match = namespaceRe.exec(content))) {
|
|
73
|
+
const name = match[1];
|
|
74
|
+
const modulePath = match[2];
|
|
75
|
+
addImportedName(name);
|
|
76
|
+
addModuleTerm(modulePath);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Default + named combo imports: import X, { Y, Z } from 'W'
|
|
80
|
+
const comboRe = /import\s+(\w+)\s*,\s*\{([^}]+)\}\s+from\s+['"]([^'"]+)['"]/g;
|
|
81
|
+
while ((match = comboRe.exec(content))) {
|
|
82
|
+
const defaultName = match[1];
|
|
83
|
+
const namedNames = match[2];
|
|
84
|
+
const modulePath = match[3];
|
|
85
|
+
addImportedName(defaultName);
|
|
86
|
+
for (const name of parseImportedList(namedNames)) addImportedName(name);
|
|
87
|
+
addModuleTerm(modulePath);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// CommonJS requires:
|
|
91
|
+
// const X = require('mod')
|
|
92
|
+
// const { A, B: C } = require('mod')
|
|
93
|
+
const requireDefaultRe = /\b(?:const|let|var)\s+(\w+)\s*=\s*require\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
94
|
+
while ((match = requireDefaultRe.exec(content))) {
|
|
95
|
+
const localName = match[1];
|
|
96
|
+
const modulePath = match[2];
|
|
97
|
+
addImportedName(localName);
|
|
98
|
+
addModuleTerm(modulePath);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const requireDestructuredRe = /\b(?:const|let|var)\s+\{([^}]+)\}\s*=\s*require\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
102
|
+
while ((match = requireDestructuredRe.exec(content))) {
|
|
103
|
+
const names = match[1];
|
|
104
|
+
const modulePath = match[2];
|
|
105
|
+
for (const name of parseImportedList(names, /\s*:\s*/)) addImportedName(name);
|
|
106
|
+
addModuleTerm(modulePath);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Python: from X import Y; import X
|
|
113
|
+
if (['.py', '.pyi'].includes(ext)) {
|
|
114
|
+
const pyImportRe = /(?:from\s+([\w.]+)\s+import\s+([^#\n]+)|import\s+([\w.]+))/g;
|
|
115
|
+
let match;
|
|
116
|
+
while ((match = pyImportRe.exec(content))) {
|
|
117
|
+
const module = match[1] || match[3];
|
|
118
|
+
const names = match[2];
|
|
119
|
+
if (module) {
|
|
120
|
+
const parts = module.split('.');
|
|
121
|
+
for (const part of parts) {
|
|
122
|
+
if (part.length > 2) addTerm(terms, part, 0.5, 'import');
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (names) {
|
|
126
|
+
for (const name of names.split(',').map(n => n.trim().split(/\s+as\s+/)[0].trim())) {
|
|
127
|
+
if (name && name.length > 1 && name !== '*') {
|
|
128
|
+
addTerm(terms, name, 0.6, 'import');
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Go: import "pkg" or import ( "pkg" )
|
|
137
|
+
if (ext === '.go') {
|
|
138
|
+
const goImportRe = /import\s+(?:\(\s*([\s\S]*?)\)|"([^"]+)")/g;
|
|
139
|
+
let match;
|
|
140
|
+
while ((match = goImportRe.exec(content))) {
|
|
141
|
+
const block = match[1] || `"${match[2]}"`;
|
|
142
|
+
const pkgRe = /"([^"]+)"/g;
|
|
143
|
+
let pkgMatch;
|
|
144
|
+
while ((pkgMatch = pkgRe.exec(block))) {
|
|
145
|
+
const pkg = pkgMatch[1];
|
|
146
|
+
const lastPart = pkg.split('/').pop();
|
|
147
|
+
if (lastPart && lastPart.length > 1) {
|
|
148
|
+
addTerm(terms, lastPart, 0.5, 'import');
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Java/Kotlin: import com.example.Foo;
|
|
156
|
+
if (['.java', '.kt', '.kts'].includes(ext)) {
|
|
157
|
+
const javaImportRe = /import\s+(?:static\s+)?([\w.]+)/g;
|
|
158
|
+
let match;
|
|
159
|
+
while ((match = javaImportRe.exec(content))) {
|
|
160
|
+
const parts = match[1].split('.');
|
|
161
|
+
const className = parts[parts.length - 1];
|
|
162
|
+
if (className && className !== '*' && className.length > 1) {
|
|
163
|
+
addTerm(terms, className, 0.6, 'import');
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Rust: use std::collections::HashMap;
|
|
170
|
+
if (ext === '.rs') {
|
|
171
|
+
const rustUseRe = /use\s+([\w:]+(?:::\{[^}]+\})?)/g;
|
|
172
|
+
let match;
|
|
173
|
+
while ((match = rustUseRe.exec(content))) {
|
|
174
|
+
const path = match[1];
|
|
175
|
+
const braceMatch = path.match(/::\{([^}]+)\}/);
|
|
176
|
+
if (braceMatch) {
|
|
177
|
+
for (const name of braceMatch[1].split(',').map(n => n.trim())) {
|
|
178
|
+
if (name.length > 1) addTerm(terms, name, 0.6, 'import');
|
|
179
|
+
}
|
|
180
|
+
} else {
|
|
181
|
+
const parts = path.split('::');
|
|
182
|
+
const last = parts[parts.length - 1];
|
|
183
|
+
if (last && last.length > 1) addTerm(terms, last, 0.5, 'import');
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
// Export Extraction
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Extract exported declarations (classes, functions, constants, etc.) as vocabulary terms.
|
|
195
|
+
* Currently supports JS/TS export syntax.
|
|
196
|
+
* @param {string} content - Source file content to parse
|
|
197
|
+
* @param {string} ext - File extension including dot (e.g. '.js', '.ts')
|
|
198
|
+
* @param {Map<string, {score: number, source: string}>} terms - Accumulator map for discovered terms
|
|
199
|
+
* @returns {void}
|
|
200
|
+
*/
|
|
201
|
+
export function extractExports(content, ext, terms) {
|
|
202
|
+
// JS/TS: export { X, Y }; export class X; export function X; export default X
|
|
203
|
+
if (['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'].includes(ext)) {
|
|
204
|
+
const exportRe = /export\s+(?:default\s+)?(?:class|function|const|let|var|interface|type|enum)\s+(\w+)/g;
|
|
205
|
+
let match;
|
|
206
|
+
while ((match = exportRe.exec(content))) {
|
|
207
|
+
addTerm(terms, match[1], 0.8, 'export');
|
|
208
|
+
for (const part of splitIdentifier(match[1])) {
|
|
209
|
+
if (part.length > 2 && !STOP_WORDS.has(part)) {
|
|
210
|
+
addTerm(terms, part, 0.5, 'export-part');
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// Definition Extraction
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Extract class, function, and method definitions as vocabulary terms.
|
|
223
|
+
* Matches universal patterns including async functions and indented method definitions.
|
|
224
|
+
* @param {string} content - Source file content to parse
|
|
225
|
+
* @param {string} ext - File extension including dot (e.g. '.js', '.py')
|
|
226
|
+
* @param {Map<string, {score: number, source: string}>} terms - Accumulator map for discovered terms
|
|
227
|
+
* @returns {void}
|
|
228
|
+
*/
|
|
229
|
+
export function extractDefinitions(content, ext, terms) {
|
|
230
|
+
// Universal: class/function/method patterns
|
|
231
|
+
const defPatterns = [
|
|
232
|
+
// JS/TS: class X, function X, const X =
|
|
233
|
+
/(?:class|interface|enum|type)\s+([A-Z]\w+)/g,
|
|
234
|
+
/(?:function)\s+([a-zA-Z_$]\w+)/g,
|
|
235
|
+
/(?:async\s+function)\s+([a-zA-Z_$]\w+)/g,
|
|
236
|
+
// Method definitions: x(params) {
|
|
237
|
+
/^\s+(?:async\s+)?([a-zA-Z_$]\w+)\s*\([^)]*\)\s*(?::\s*\w+)?\s*\{/gm,
|
|
238
|
+
];
|
|
239
|
+
|
|
240
|
+
for (const pattern of defPatterns) {
|
|
241
|
+
let match;
|
|
242
|
+
while ((match = pattern.exec(content))) {
|
|
243
|
+
const name = match[1];
|
|
244
|
+
if (name && name.length > 1 && !STOP_WORDS.has(name.toLowerCase())) {
|
|
245
|
+
addTerm(terms, name, 0.5, 'definition');
|
|
246
|
+
for (const part of splitIdentifier(name)) {
|
|
247
|
+
if (part.length > 2 && !STOP_WORDS.has(part)) {
|
|
248
|
+
addTerm(terms, part, 0.3, 'definition-part');
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
pattern.lastIndex = 0; // Reset global regex
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ---------------------------------------------------------------------------
|
|
258
|
+
// Constant Extraction
|
|
259
|
+
// ---------------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Extract SCREAMING_SNAKE_CASE constants as vocabulary terms.
|
|
263
|
+
* Splits compound constant names into sub-parts and skips common annotation tokens (TODO, FIXME, etc.).
|
|
264
|
+
* @param {string} content - Source file content to parse
|
|
265
|
+
* @param {Map<string, {score: number, source: string}>} terms - Accumulator map for discovered terms
|
|
266
|
+
* @returns {void}
|
|
267
|
+
*/
|
|
268
|
+
export function extractConstants(content, terms) {
|
|
269
|
+
// SCREAMING_SNAKE_CASE constants (minimum 4 chars total: 1 leading + 3 more)
|
|
270
|
+
const constRe = /\b([A-Z][A-Z0-9_]{3,})\b/g;
|
|
271
|
+
let match;
|
|
272
|
+
while ((match = constRe.exec(content))) {
|
|
273
|
+
const name = match[1];
|
|
274
|
+
// Skip common annotation tokens and generic abbreviations that add no search value
|
|
275
|
+
if (NON_TERM_CONSTANTS.has(name)) continue;
|
|
276
|
+
addTerm(terms, name, 0.4, 'constant');
|
|
277
|
+
for (const part of splitIdentifier(name)) {
|
|
278
|
+
if (part.length > 2 && !STOP_WORDS.has(part)) {
|
|
279
|
+
addTerm(terms, part, 0.2, 'constant-part');
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
// Manifest Extractors
|
|
287
|
+
// ---------------------------------------------------------------------------
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Parse a package.json file and return dependency names from both dependencies and devDependencies.
|
|
291
|
+
* @param {string} content - Raw JSON string of a package.json file
|
|
292
|
+
* @returns {string[]} Array of dependency package names (length > 1), or empty array on parse failure
|
|
293
|
+
*/
|
|
294
|
+
export function extractNpmDeps(content) {
|
|
295
|
+
try {
|
|
296
|
+
const pkg = JSON.parse(content);
|
|
297
|
+
const deps = [
|
|
298
|
+
...Object.keys(pkg.dependencies || {}),
|
|
299
|
+
...Object.keys(pkg.devDependencies || {}),
|
|
300
|
+
];
|
|
301
|
+
return deps.filter(d => d.length > 1);
|
|
302
|
+
} catch (err) {
|
|
303
|
+
if (process.env.DEBUG_CATCHES) process.stderr.write(`[non-fatal] ${err?.message || err}\n`);
|
|
304
|
+
return [];
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Parse a Cargo.toml file and return crate dependency names from [dependencies] sections.
|
|
310
|
+
* @param {string} content - Raw content of a Cargo.toml file
|
|
311
|
+
* @returns {string[]} Array of crate dependency names
|
|
312
|
+
*/
|
|
313
|
+
export function extractCargoDeps(content) {
|
|
314
|
+
const deps = [];
|
|
315
|
+
const depRe = /^\[dependencies(?:\.[^\]]+)?\]\s*\n([\s\S]*?)(?=\n\[|\n*$)/gm;
|
|
316
|
+
let match;
|
|
317
|
+
while ((match = depRe.exec(content))) {
|
|
318
|
+
const block = match[1];
|
|
319
|
+
for (const line of block.split('\n')) {
|
|
320
|
+
const nameMatch = line.match(/^(\w[\w-]*)\s*=/);
|
|
321
|
+
if (nameMatch) deps.push(nameMatch[1]);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return deps;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Parse a go.mod file and return the last path segment of each required module.
|
|
329
|
+
* @param {string} content - Raw content of a go.mod file
|
|
330
|
+
* @returns {string[]} Array of Go module short names (last path segment)
|
|
331
|
+
*/
|
|
332
|
+
export function extractGoDeps(content) {
|
|
333
|
+
const deps = [];
|
|
334
|
+
const modRe = /require\s*\(\s*([\s\S]*?)\)/g;
|
|
335
|
+
let match;
|
|
336
|
+
while ((match = modRe.exec(content))) {
|
|
337
|
+
for (const line of match[1].split('\n')) {
|
|
338
|
+
const parts = line.trim().split(/\s+/);
|
|
339
|
+
if (parts[0] && parts[0].includes('/')) {
|
|
340
|
+
deps.push(parts[0].split('/').pop());
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
return deps.filter(Boolean);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Parse a requirements.txt file and return package names (stripped of version specifiers and comments).
|
|
349
|
+
* @param {string} content - Raw content of a requirements.txt file
|
|
350
|
+
* @returns {string[]} Array of Python package names (length > 1)
|
|
351
|
+
*/
|
|
352
|
+
export function extractPipDeps(content) {
|
|
353
|
+
return content.split('\n')
|
|
354
|
+
.map(line => line.replace(/#.*$/, '').trim())
|
|
355
|
+
.filter(line => line && !line.startsWith('-'))
|
|
356
|
+
.map(line => line.split(/[>=<~!]/)[0].trim())
|
|
357
|
+
.filter(d => d.length > 1);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Parse a pyproject.toml file and return dependency names from the [project] dependencies array.
|
|
362
|
+
* @param {string} content - Raw content of a pyproject.toml file
|
|
363
|
+
* @returns {string[]} Array of Python package names (length > 1)
|
|
364
|
+
*/
|
|
365
|
+
export function extractPyprojectDeps(content) {
|
|
366
|
+
const deps = [];
|
|
367
|
+
const match = content.match(/\[project\][\s\S]*?dependencies\s*=\s*\[([\s\S]*?)\]/);
|
|
368
|
+
if (match) {
|
|
369
|
+
for (const line of match[1].split('\n')) {
|
|
370
|
+
const depMatch = line.match(/["']([^"'>=<~!]+)/);
|
|
371
|
+
if (depMatch) deps.push(depMatch[1].trim());
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
return deps.filter(d => d.length > 1);
|
|
375
|
+
}
|