@kolmo/scout 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/api/kolmoApiClient.d.ts +2 -0
  2. package/dist/api/kolmoApiClient.js +25 -0
  3. package/dist/commands/auth.d.ts +6 -0
  4. package/dist/commands/auth.js +41 -0
  5. package/dist/commands/explain.d.ts +2 -0
  6. package/dist/commands/explain.js +95 -0
  7. package/dist/commands/scan.d.ts +2 -0
  8. package/dist/commands/scan.js +151 -0
  9. package/dist/commands/sync.d.ts +2 -0
  10. package/dist/commands/sync.js +50 -0
  11. package/dist/config/authConfig.d.ts +8 -0
  12. package/dist/config/authConfig.js +26 -0
  13. package/dist/config/writeKolmoConfig.d.ts +2 -0
  14. package/dist/config/writeKolmoConfig.js +6 -0
  15. package/dist/detectors/assetDetector.d.ts +8 -0
  16. package/dist/detectors/assetDetector.js +7 -0
  17. package/dist/detectors/fileTypeDetector.d.ts +1 -0
  18. package/dist/detectors/fileTypeDetector.js +38 -0
  19. package/dist/detectors/projectTypeDetector.d.ts +4 -0
  20. package/dist/detectors/projectTypeDetector.js +36 -0
  21. package/dist/index.d.ts +2 -0
  22. package/dist/index.js +65 -0
  23. package/dist/manifest/buildScoutManifest.d.ts +15 -0
  24. package/dist/manifest/buildScoutManifest.js +219 -0
  25. package/dist/profilers/tabularProfiler.d.ts +15 -0
  26. package/dist/profilers/tabularProfiler.js +131 -0
  27. package/dist/scanners/contentScanner.d.ts +6 -0
  28. package/dist/scanners/contentScanner.js +22 -0
  29. package/dist/scanners/csvScanner.d.ts +5 -0
  30. package/dist/scanners/csvScanner.js +26 -0
  31. package/dist/scanners/excelScanner.d.ts +5 -0
  32. package/dist/scanners/excelScanner.js +25 -0
  33. package/dist/scanners/fileScanner.d.ts +1 -0
  34. package/dist/scanners/fileScanner.js +24 -0
  35. package/dist/scanners/notebookScanner.d.ts +5 -0
  36. package/dist/scanners/notebookScanner.js +32 -0
  37. package/dist/scanners/packageScanner.d.ts +4 -0
  38. package/dist/scanners/packageScanner.js +46 -0
  39. package/dist/scanners/pythonScanner.d.ts +4 -0
  40. package/dist/scanners/pythonScanner.js +30 -0
  41. package/dist/summarizers/repoSummary.d.ts +16 -0
  42. package/dist/summarizers/repoSummary.js +101 -0
  43. package/dist/taxonomy/energyTaxonomy.d.ts +27 -0
  44. package/dist/taxonomy/energyTaxonomy.js +250 -0
  45. package/dist/types.d.ts +78 -0
  46. package/dist/types.js +1 -0
  47. package/dist/workflows/workflowSuggestions.d.ts +2 -0
  48. package/dist/workflows/workflowSuggestions.js +61 -0
  49. package/package.json +38 -0
@@ -0,0 +1,219 @@
1
+ import path from 'path';
2
+ import { existsSync } from 'fs';
3
+ import { scanFiles } from '../scanners/fileScanner.js';
4
+ import { scanPackages } from '../scanners/packageScanner.js';
5
+ import { scanCsv } from '../scanners/csvScanner.js';
6
+ import { scanExcel } from '../scanners/excelScanner.js';
7
+ import { scanPython } from '../scanners/pythonScanner.js';
8
+ import { scanNotebook } from '../scanners/notebookScanner.js';
9
+ import { readFileContent } from '../scanners/contentScanner.js';
10
+ import { detectFileType } from '../detectors/fileTypeDetector.js';
11
+ import { detectAssetDetails } from '../detectors/assetDetector.js';
12
+ import { detectProjectType } from '../detectors/projectTypeDetector.js';
13
+ import { profileTabularData } from '../profilers/tabularProfiler.js';
14
+ import { buildRepoSummary } from '../summarizers/repoSummary.js';
15
+ import { buildSuggestedWorkflows, explainWorkflows } from '../workflows/workflowSuggestions.js';
16
+ export async function buildScoutManifest(options) {
17
+ const cwd = options.dir ? path.resolve(options.dir) : process.cwd();
18
+ const projectName = path.basename(cwd);
19
+ const allFiles = await scanFiles(cwd);
20
+ const packages = await scanPackages(cwd);
21
+ const hasEnv = existsSync(path.join(cwd, '.env'));
22
+ const detectedFiles = [];
23
+ const fileContents = [];
24
+ const assetDetailsBySymbol = new Map();
25
+ const fileTypeReasons = [];
26
+ for (const relPath of allFiles) {
27
+ const absPath = path.join(cwd, relPath);
28
+ const ext = path.extname(relPath).toLowerCase();
29
+ const basename = path.basename(relPath);
30
+ let columns = [];
31
+ let signals = [];
32
+ let sampleRows = [];
33
+ const metadata = {};
34
+ try {
35
+ if (ext === '.csv') {
36
+ const result = await scanCsv(absPath);
37
+ columns = result.columns;
38
+ sampleRows = result.sampleRows;
39
+ metadata.rowCount = result.rowCount;
40
+ metadata.sampleRowCount = result.sampleRows.length;
41
+ metadata.profile = profileTabularData(columns, sampleRows);
42
+ }
43
+ else if (ext === '.xlsx' || ext === '.xls') {
44
+ const result = await scanExcel(absPath);
45
+ columns = result.columns;
46
+ sampleRows = result.sampleRows;
47
+ metadata.sheetNames = result.sheetNames;
48
+ metadata.sampleRowCount = result.sampleRows.length;
49
+ metadata.profile = profileTabularData(columns, sampleRows);
50
+ }
51
+ else if (ext === '.py') {
52
+ const result = await scanPython(absPath);
53
+ signals = result.signals;
54
+ metadata.importCount = result.imports.length;
55
+ }
56
+ else if (ext === '.ipynb') {
57
+ const result = await scanNotebook(absPath);
58
+ signals = result.signals;
59
+ metadata.cellCount = result.cellCount;
60
+ metadata.hasMarkdown = result.hasMarkdown;
61
+ }
62
+ }
63
+ catch {
64
+ // Keep Scout resilient on unreadable or malformed user files.
65
+ }
66
+ // Read file content for LLM analysis (unless --no-content)
67
+ if (!options.noContent) {
68
+ const fc = await readFileContent(absPath, ext);
69
+ if (fc) {
70
+ fileContents.push({ path: relPath, content: fc.content, truncated: fc.truncated });
71
+ }
72
+ }
73
+ const detected_type = detectFileType(basename, columns);
74
+ const fileAssetDetails = detectAssetDetails({
75
+ file: relPath,
76
+ filenames: [basename, relPath],
77
+ columnNames: columns,
78
+ textValues: signals,
79
+ });
80
+ const fileAssets = fileAssetDetails.map((asset) => asset.symbol);
81
+ for (const asset of fileAssetDetails) {
82
+ mergeAsset(assetDetailsBySymbol, asset);
83
+ }
84
+ const fileInfo = {
85
+ path: relPath,
86
+ extension: ext,
87
+ detected_type,
88
+ detected_assets: fileAssets,
89
+ columns,
90
+ signals,
91
+ metadata,
92
+ };
93
+ detectedFiles.push(fileInfo);
94
+ fileTypeReasons.push(explainFileType(fileInfo));
95
+ }
96
+ const projectType = detectProjectType(allFiles, packages);
97
+ const detailedAssets = Array.from(assetDetailsBySymbol.values()).sort((a, b) => a.symbol.localeCompare(b.symbol));
98
+ const detectedAssets = detailedAssets.map((asset) => asset.symbol);
99
+ const suggestedWorkflows = buildSuggestedWorkflows(projectType, detectedAssets);
100
+ const evidenceCitations = buildEvidenceCitations(detectedFiles);
101
+ const explanations = {
102
+ project_type_reasons: explainProjectType(projectType, allFiles, packages),
103
+ asset_evidence: detailedAssets,
104
+ file_type_reasons: fileTypeReasons,
105
+ workflow_reasons: explainWorkflows(suggestedWorkflows, detectedAssets, projectType),
106
+ evidence_citations: evidenceCitations,
107
+ };
108
+ const generatedSummary = buildRepoSummary({
109
+ projectName,
110
+ projectType,
111
+ detectedAssets,
112
+ detailedAssets,
113
+ detectedFiles,
114
+ detectedPackages: packages,
115
+ suggestedWorkflows,
116
+ evidenceCitations,
117
+ });
118
+ const manifest = {
119
+ schema_version: '0.1.0',
120
+ project_name: projectName,
121
+ created_by: 'kolmo-scout',
122
+ privacy_mode: options.noContent ? 'metadata_only' : 'content_excerpts',
123
+ project_type: projectType,
124
+ repo_summary: options.summary?.trim() ? normalizeSummary(options.summary) : generatedSummary,
125
+ summary_source: options.summary?.trim() ? 'provided' : 'generated',
126
+ detected_assets: detectedAssets,
127
+ detailed_assets: detailedAssets,
128
+ detected_files: detectedFiles,
129
+ detected_packages: packages,
130
+ suggested_workflows: suggestedWorkflows,
131
+ explanations,
132
+ file_contents: fileContents,
133
+ portfolio_name: options.portfolioName,
134
+ };
135
+ return { cwd, allFiles, hasEnv, manifest };
136
+ }
137
+ export function normalizeSummary(summary) {
138
+ return summary.replace(/\s+/g, ' ').trim();
139
+ }
140
+ function mergeAsset(assetDetailsBySymbol, asset) {
141
+ const existing = assetDetailsBySymbol.get(asset.symbol);
142
+ if (!existing) {
143
+ assetDetailsBySymbol.set(asset.symbol, asset);
144
+ return;
145
+ }
146
+ const seen = new Set(existing.evidence.map((item) => evidenceKey(item)));
147
+ for (const evidence of asset.evidence) {
148
+ const key = evidenceKey(evidence);
149
+ if (!seen.has(key)) {
150
+ existing.evidence.push(evidence);
151
+ seen.add(key);
152
+ }
153
+ }
154
+ }
155
+ function evidenceKey(evidence) {
156
+ return `${evidence.file ?? ''}:${evidence.source}:${evidence.value}:${evidence.pattern}`;
157
+ }
158
+ function explainProjectType(projectType, files, packages) {
159
+ if (projectType === 'python_analytics_project') {
160
+ return ['requirements.txt and Python files were detected.'];
161
+ }
162
+ if (projectType === 'notebook_research_project') {
163
+ return ['One or more Jupyter notebooks were detected.'];
164
+ }
165
+ if (projectType === 'frontend_project') {
166
+ return ['package.json plus React or Vite dependencies were detected.'];
167
+ }
168
+ if (projectType === 'fastapi_backend_project') {
169
+ return ['main.py plus FastAPI dependency were detected.'];
170
+ }
171
+ if (projectType === 'supabase_project') {
172
+ return ['A supabase/ project directory was detected.'];
173
+ }
174
+ if (projectType === 'data_folder_project') {
175
+ return ['More than half of scanned files are CSV or Excel files.'];
176
+ }
177
+ const fileText = files.length === 1 ? '1 file' : `${files.length} files`;
178
+ const packageText = [...packages.python, ...packages.node].length
179
+ ? 'with some dependencies'
180
+ : 'with no strong dependency signal';
181
+ return [`Scanned ${fileText} ${packageText}, but no specialized project pattern dominated.`];
182
+ }
183
+ function explainFileType(file) {
184
+ const evidence = [
185
+ ...file.columns.slice(0, 5).map((column) => `column:${column}`),
186
+ ...file.signals.slice(0, 5).map((signal) => `signal:${signal}`),
187
+ ];
188
+ const reasons = {
189
+ historical_prices: 'Date/timestamp and price-like columns were found.',
190
+ forward_curve: 'Maturity/expiry/contract and price/value columns were found.',
191
+ positions: 'Position, volume, book, or notional columns were found.',
192
+ risk_results: 'Risk, VaR, PnL, loss, or return fields were found.',
193
+ forecast_output: 'Forecast, prediction, yhat, or model output fields were found.',
194
+ news_events: 'Headline/title/source/url and published/date fields were found.',
195
+ notebook: 'The file is a Jupyter notebook.',
196
+ risk_model: 'The Python filename suggests risk, VaR, or PnL modelling.',
197
+ forecast_model: 'The Python filename suggests forecast or prediction modelling.',
198
+ unknown: 'No strong Scout file-type rule matched this file.',
199
+ };
200
+ return {
201
+ path: file.path,
202
+ detected_type: file.detected_type,
203
+ reason: reasons[file.detected_type] ?? reasons.unknown,
204
+ evidence,
205
+ };
206
+ }
207
+ function buildEvidenceCitations(files) {
208
+ return files
209
+ .filter((file) => file.columns.length > 0 || file.signals.length > 0 || file.detected_assets.length > 0)
210
+ .slice(0, 8)
211
+ .map((file) => {
212
+ const parts = [
213
+ ...file.columns.slice(0, 4),
214
+ ...file.signals.slice(0, 3).map((signal) => `signal:${signal}`),
215
+ ];
216
+ const evidence = parts.length ? parts.join(', ') : file.detected_assets.join(', ');
217
+ return `${file.path}: ${evidence}`;
218
+ });
219
+ }
@@ -0,0 +1,15 @@
1
+ export interface TabularProfile {
2
+ sample_row_count: number;
3
+ column_count: number;
4
+ date_column?: string;
5
+ frequency_guess?: string;
6
+ latest_date?: string;
7
+ stale_days?: number;
8
+ is_stale?: boolean;
9
+ missingness_estimate?: number;
10
+ tenor_column?: string;
11
+ tenor_count?: number;
12
+ detected_tenors?: string[];
13
+ tenor_gaps?: string[];
14
+ }
15
+ export declare function profileTabularData(columns: string[], rows: Array<Record<string, unknown>>): TabularProfile;
@@ -0,0 +1,131 @@
1
+ import { parseTenor } from '../taxonomy/energyTaxonomy.js';
2
+ const STALE_DAYS = 45;
3
+ export function profileTabularData(columns, rows) {
4
+ const profile = {
5
+ sample_row_count: rows.length,
6
+ column_count: columns.length,
7
+ };
8
+ if (columns.length === 0 || rows.length === 0) {
9
+ return profile;
10
+ }
11
+ profile.missingness_estimate = estimateMissingness(columns, rows);
12
+ const dateColumn = guessDateColumn(columns, rows);
13
+ if (dateColumn) {
14
+ profile.date_column = dateColumn;
15
+ const dates = rows
16
+ .map((row) => parseDate(row[dateColumn]))
17
+ .filter((date) => date != null)
18
+ .sort((a, b) => a.getTime() - b.getTime());
19
+ if (dates.length > 0) {
20
+ const latestDate = dates[dates.length - 1];
21
+ profile.latest_date = latestDate.toISOString().slice(0, 10);
22
+ profile.frequency_guess = guessFrequency(dates);
23
+ profile.stale_days = Math.max(0, Math.floor((Date.now() - latestDate.getTime()) / 86_400_000));
24
+ profile.is_stale = profile.stale_days > STALE_DAYS;
25
+ }
26
+ }
27
+ const tenorColumn = guessTenorColumn(columns, rows);
28
+ if (tenorColumn) {
29
+ const parsedTenors = rows
30
+ .map((row) => parseTenor(String(row[tenorColumn] ?? '')))
31
+ .filter((tenor) => tenor != null);
32
+ if (parsedTenors.length > 0) {
33
+ const labels = Array.from(new Set(parsedTenors.map((tenor) => tenor.label))).sort();
34
+ profile.tenor_column = tenorColumn;
35
+ profile.tenor_count = labels.length;
36
+ profile.detected_tenors = labels.slice(0, 24);
37
+ profile.tenor_gaps = detectTenorGaps(parsedTenors);
38
+ }
39
+ }
40
+ return profile;
41
+ }
42
+ function estimateMissingness(columns, rows) {
43
+ let missing = 0;
44
+ let total = 0;
45
+ for (const row of rows) {
46
+ for (const column of columns) {
47
+ total += 1;
48
+ const value = row[column];
49
+ if (value == null || String(value).trim() === '') {
50
+ missing += 1;
51
+ }
52
+ }
53
+ }
54
+ return total === 0 ? 0 : Number((missing / total).toFixed(3));
55
+ }
56
+ function guessDateColumn(columns, rows) {
57
+ const named = columns.find((column) => /(^|_|\s)(date|timestamp|ts|asof|as_of)(_|$|\s)/i.test(column));
58
+ if (named)
59
+ return named;
60
+ return columns.find((column) => {
61
+ const parsed = rows.slice(0, 10).filter((row) => parseDate(row[column]) != null).length;
62
+ return parsed >= Math.max(2, Math.ceil(Math.min(rows.length, 10) * 0.6));
63
+ });
64
+ }
65
+ function guessTenorColumn(columns, rows) {
66
+ const named = columns.find((column) => /(tenor|maturity|contract|expiry|delivery|period)/i.test(column));
67
+ if (named)
68
+ return named;
69
+ return columns.find((column) => {
70
+ const parsed = rows.slice(0, 10).filter((row) => parseTenor(String(row[column] ?? '')) != null).length;
71
+ return parsed >= Math.max(2, Math.ceil(Math.min(rows.length, 10) * 0.5));
72
+ });
73
+ }
74
+ function parseDate(value) {
75
+ if (value instanceof Date && !Number.isNaN(value.getTime()))
76
+ return value;
77
+ if (typeof value !== 'string' && typeof value !== 'number')
78
+ return null;
79
+ const date = new Date(value);
80
+ return Number.isNaN(date.getTime()) ? null : date;
81
+ }
82
+ function guessFrequency(dates) {
83
+ if (dates.length < 3)
84
+ return 'unknown';
85
+ const diffs = dates
86
+ .slice(1)
87
+ .map((date, index) => Math.round((date.getTime() - dates[index].getTime()) / 86_400_000))
88
+ .filter((days) => days > 0);
89
+ if (diffs.length === 0)
90
+ return 'unknown';
91
+ const median = diffs.sort((a, b) => a - b)[Math.floor(diffs.length / 2)];
92
+ if (median <= 2)
93
+ return 'daily';
94
+ if (median >= 5 && median <= 9)
95
+ return 'weekly';
96
+ if (median >= 27 && median <= 35)
97
+ return 'monthly';
98
+ if (median >= 80 && median <= 100)
99
+ return 'quarterly';
100
+ if (median >= 350 && median <= 380)
101
+ return 'annual';
102
+ return 'irregular';
103
+ }
104
+ function detectTenorGaps(tenors) {
105
+ const gaps = [];
106
+ const relativeMonths = tenors
107
+ .filter((tenor) => tenor.kind === 'relative_month' && tenor.sequence != null)
108
+ .map((tenor) => Number(tenor.sequence))
109
+ .sort((a, b) => a - b);
110
+ for (let index = 1; index < relativeMonths.length; index += 1) {
111
+ const prev = relativeMonths[index - 1];
112
+ const current = relativeMonths[index];
113
+ for (let missing = prev + 1; missing < current; missing += 1) {
114
+ gaps.push(`M${missing}`);
115
+ }
116
+ }
117
+ const quarters = tenors
118
+ .filter((tenor) => tenor.kind === 'quarter' && tenor.year != null && tenor.quarter != null)
119
+ .map((tenor) => Number(tenor.year) * 4 + Number(tenor.quarter))
120
+ .sort((a, b) => a - b);
121
+ for (let index = 1; index < quarters.length; index += 1) {
122
+ const prev = quarters[index - 1];
123
+ const current = quarters[index];
124
+ for (let missing = prev + 1; missing < current; missing += 1) {
125
+ const year = Math.floor((missing - 1) / 4);
126
+ const quarter = ((missing - 1) % 4) + 1;
127
+ gaps.push(`Q${quarter}-${String(year).slice(-2)}`);
128
+ }
129
+ }
130
+ return Array.from(new Set(gaps)).slice(0, 24);
131
+ }
@@ -0,0 +1,6 @@
1
+ export interface FileContent {
2
+ path: string;
3
+ content: string;
4
+ truncated: boolean;
5
+ }
6
+ export declare function readFileContent(absPath: string, ext: string): Promise<FileContent | null>;
@@ -0,0 +1,22 @@
1
+ import fs from 'fs/promises';
2
+ const MAX_BYTES = 8_192;
3
+ const READABLE_EXTENSIONS = new Set(['.py', '.ipynb', '.md', '.txt', '.rst', '.r', '.jl', '.sql']);
4
+ export async function readFileContent(absPath, ext) {
5
+ if (!READABLE_EXTENSIONS.has(ext.toLowerCase()))
6
+ return null;
7
+ try {
8
+ const buffer = await fs.readFile(absPath);
9
+ // Skip binary files — check for null bytes in first 512 bytes
10
+ const probe = buffer.slice(0, 512);
11
+ for (let i = 0; i < probe.length; i++) {
12
+ if (probe[i] === 0)
13
+ return null;
14
+ }
15
+ const truncated = buffer.length > MAX_BYTES;
16
+ const content = buffer.slice(0, MAX_BYTES).toString('utf8');
17
+ return { path: absPath, content, truncated };
18
+ }
19
+ catch {
20
+ return null;
21
+ }
22
+ }
@@ -0,0 +1,5 @@
1
+ export declare function scanCsv(filepath: string): Promise<{
2
+ columns: string[];
3
+ rowCount: number;
4
+ sampleRows: Array<Record<string, unknown>>;
5
+ }>;
@@ -0,0 +1,26 @@
1
+ import { open } from 'fs/promises';
2
+ import Papa from 'papaparse';
3
+ const CSV_SNIFF_BYTES = 20_480;
4
+ export async function scanCsv(filepath) {
5
+ let fileHandle;
6
+ try {
7
+ fileHandle = await open(filepath, 'r');
8
+ const buffer = Buffer.alloc(CSV_SNIFF_BYTES);
9
+ const { bytesRead } = await fileHandle.read(buffer, 0, CSV_SNIFF_BYTES, 0);
10
+ const snippet = buffer.subarray(0, bytesRead).toString('utf-8');
11
+ const result = Papa.parse(snippet, {
12
+ header: true,
13
+ skipEmptyLines: true,
14
+ preview: 25,
15
+ });
16
+ const columns = result.meta.fields ?? [];
17
+ const rowCount = result.data.length;
18
+ return { columns, rowCount, sampleRows: result.data };
19
+ }
20
+ catch {
21
+ return { columns: [], rowCount: 0, sampleRows: [] };
22
+ }
23
+ finally {
24
+ await fileHandle?.close();
25
+ }
26
+ }
@@ -0,0 +1,5 @@
1
+ export declare function scanExcel(filepath: string): Promise<{
2
+ columns: string[];
3
+ sheetNames: string[];
4
+ sampleRows: Array<Record<string, unknown>>;
5
+ }>;
@@ -0,0 +1,25 @@
1
+ import { readFile } from 'fs/promises';
2
+ import * as XLSX from 'xlsx';
3
+ export async function scanExcel(filepath) {
4
+ try {
5
+ const buffer = await readFile(filepath);
6
+ const workbook = XLSX.read(buffer, { type: 'buffer', sheetRows: 26 });
7
+ const sheetNames = workbook.SheetNames;
8
+ if (sheetNames.length === 0) {
9
+ return { columns: [], sheetNames: [], sampleRows: [] };
10
+ }
11
+ const firstSheet = workbook.Sheets[sheetNames[0]];
12
+ const rows = XLSX.utils.sheet_to_json(firstSheet, { header: 1 });
13
+ const sampleRows = XLSX.utils.sheet_to_json(firstSheet, {
14
+ defval: '',
15
+ });
16
+ const headerRow = rows[0];
17
+ const columns = Array.isArray(headerRow)
18
+ ? headerRow.map((v) => String(v ?? '').trim()).filter(Boolean)
19
+ : [];
20
+ return { columns, sheetNames, sampleRows };
21
+ }
22
+ catch {
23
+ return { columns: [], sheetNames: [], sampleRows: [] };
24
+ }
25
+ }
@@ -0,0 +1 @@
1
+ export declare function scanFiles(rootDir: string): Promise<string[]>;
@@ -0,0 +1,24 @@
1
+ import fg from 'fast-glob';
2
+ const IGNORE_PATTERNS = [
3
+ '**/node_modules/**',
4
+ '**/.git/**',
5
+ '**/dist/**',
6
+ '**/build/**',
7
+ '**/.venv/**',
8
+ '**/venv/**',
9
+ '**/__pycache__/**',
10
+ '**/.next/**',
11
+ '**/coverage/**',
12
+ '**/kolmo.config.json',
13
+ '**/*.pyc',
14
+ '**/.DS_Store',
15
+ ];
16
+ export async function scanFiles(rootDir) {
17
+ const files = await fg('**/*', {
18
+ cwd: rootDir,
19
+ ignore: IGNORE_PATTERNS,
20
+ dot: true,
21
+ onlyFiles: true,
22
+ });
23
+ return files.sort();
24
+ }
@@ -0,0 +1,5 @@
1
+ export declare function scanNotebook(filepath: string): Promise<{
2
+ cellCount: number;
3
+ hasMarkdown: boolean;
4
+ signals: string[];
5
+ }>;
@@ -0,0 +1,32 @@
1
+ import { readFile } from 'fs/promises';
2
+ const SIGNAL_KEYWORDS = [
3
+ 'var',
4
+ 'pnl',
5
+ 'risk',
6
+ 'forecast',
7
+ 'predict',
8
+ 'backtest',
9
+ 'portfolio',
10
+ 'hedge',
11
+ ];
12
+ export async function scanNotebook(filepath) {
13
+ try {
14
+ const content = await readFile(filepath, 'utf-8');
15
+ const notebook = JSON.parse(content);
16
+ if (!Array.isArray(notebook.cells)) {
17
+ return { cellCount: 0, hasMarkdown: false, signals: [] };
18
+ }
19
+ const cellCount = notebook.cells.length;
20
+ const hasMarkdown = notebook.cells.some((c) => c.cell_type === 'markdown');
21
+ const codeCells = notebook.cells.filter((c) => c.cell_type === 'code');
22
+ const codeText = codeCells
23
+ .map((c) => (Array.isArray(c.source) ? c.source.join('') : c.source ?? ''))
24
+ .join('\n')
25
+ .toLowerCase();
26
+ const signals = SIGNAL_KEYWORDS.filter((kw) => codeText.includes(kw));
27
+ return { cellCount, hasMarkdown, signals };
28
+ }
29
+ catch {
30
+ return { cellCount: 0, hasMarkdown: false, signals: [] };
31
+ }
32
+ }
@@ -0,0 +1,4 @@
1
+ export declare function scanPackages(rootDir: string): Promise<{
2
+ python: string[];
3
+ node: string[];
4
+ }>;
@@ -0,0 +1,46 @@
1
+ import { readFile } from 'fs/promises';
2
+ import { existsSync } from 'fs';
3
+ import path from 'path';
4
+ export async function scanPackages(rootDir) {
5
+ const python = [];
6
+ const node = [];
7
+ // Scan requirements.txt
8
+ const reqPath = path.join(rootDir, 'requirements.txt');
9
+ if (existsSync(reqPath)) {
10
+ try {
11
+ const content = await readFile(reqPath, 'utf-8');
12
+ const lines = content.split('\n');
13
+ for (const line of lines) {
14
+ const trimmed = line.trim();
15
+ if (!trimmed || trimmed.startsWith('#'))
16
+ continue;
17
+ // Strip version specifiers: e.g. "fastapi>=0.100.0" → "fastapi"
18
+ const name = trimmed.split(/[>=<!;[\s]/)[0].trim();
19
+ if (name)
20
+ python.push(name.toLowerCase());
21
+ }
22
+ }
23
+ catch {
24
+ // ignore
25
+ }
26
+ }
27
+ // Scan package.json
28
+ const pkgPath = path.join(rootDir, 'package.json');
29
+ if (existsSync(pkgPath)) {
30
+ try {
31
+ const content = await readFile(pkgPath, 'utf-8');
32
+ const pkg = JSON.parse(content);
33
+ const deps = {
34
+ ...pkg.dependencies,
35
+ ...pkg.devDependencies,
36
+ };
37
+ for (const name of Object.keys(deps)) {
38
+ node.push(name);
39
+ }
40
+ }
41
+ catch {
42
+ // ignore
43
+ }
44
+ }
45
+ return { python: [...new Set(python)], node: [...new Set(node)] };
46
+ }
@@ -0,0 +1,4 @@
1
+ export declare function scanPython(filepath: string): Promise<{
2
+ imports: string[];
3
+ signals: string[];
4
+ }>;
@@ -0,0 +1,30 @@
1
+ import { readFile } from 'fs/promises';
2
+ const SIGNAL_KEYWORDS = [
3
+ 'var',
4
+ 'pnl',
5
+ 'risk',
6
+ 'forecast',
7
+ 'predict',
8
+ 'backtest',
9
+ 'portfolio',
10
+ 'hedge',
11
+ ];
12
+ export async function scanPython(filepath) {
13
+ try {
14
+ const content = await readFile(filepath, 'utf-8');
15
+ const lines = content.split('\n');
16
+ const imports = lines
17
+ .filter((line) => {
18
+ const trimmed = line.trim();
19
+ return trimmed.startsWith('import ') || trimmed.startsWith('from ');
20
+ })
21
+ .map((line) => line.trim());
22
+ const uniqueImports = [...new Set(imports)];
23
+ const contentLower = content.toLowerCase();
24
+ const signals = SIGNAL_KEYWORDS.filter((kw) => contentLower.includes(kw));
25
+ return { imports: uniqueImports, signals };
26
+ }
27
+ catch {
28
+ return { imports: [], signals: [] };
29
+ }
30
+ }
@@ -0,0 +1,16 @@
1
+ import type { ScoutAssetInfo, ScoutFileInfo } from '../types.js';
2
+ interface RepoSummaryInput {
3
+ projectName: string;
4
+ projectType: string;
5
+ detectedAssets: string[];
6
+ detailedAssets?: ScoutAssetInfo[];
7
+ detectedFiles: ScoutFileInfo[];
8
+ detectedPackages: {
9
+ python: string[];
10
+ node: string[];
11
+ };
12
+ suggestedWorkflows: string[];
13
+ evidenceCitations?: string[];
14
+ }
15
+ export declare function buildRepoSummary(input: RepoSummaryInput): string;
16
+ export {};