autoscholar-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +212 -0
- package/dist/agents/euler.js +261 -0
- package/dist/agents/fisher.js +348 -0
- package/dist/agents/gauss.js +177 -0
- package/dist/agents/governor.js +201 -0
- package/dist/agents/newton.js +207 -0
- package/dist/agents/turing.js +307 -0
- package/dist/cli/banner.js +136 -0
- package/dist/cli/configCommand.js +125 -0
- package/dist/cli/interactive.js +115 -0
- package/dist/cli/outputsCommand.js +191 -0
- package/dist/cli/resumeCommand.js +78 -0
- package/dist/cli/runCommand.js +91 -0
- package/dist/config/loader.js +154 -0
- package/dist/config/setup.js +179 -0
- package/dist/connectors/academic.js +307 -0
- package/dist/connectors/eodhd.js +90 -0
- package/dist/connectors/firecrawl.js +94 -0
- package/dist/connectors/fmp.js +115 -0
- package/dist/connectors/fred.js +82 -0
- package/dist/connectors/index.js +24 -0
- package/dist/connectors/websearch.js +117 -0
- package/dist/index.js +72 -0
- package/dist/latex/generator.js +413 -0
- package/dist/python/runner.js +141 -0
- package/dist/utils/llm.js +73 -0
- package/dist/utils/logger.js +83 -0
- package/dist/utils/project.js +100 -0
- package/package.json +63 -0
- package/python/analysis/garch_template.py +131 -0
- package/python/clients/eodhd_client.py +78 -0
- package/python/clients/fmp_client.py +64 -0
- package/python/clients/fred_client.py +57 -0
- package/python/clients/macro_clients.py +81 -0
- package/python/requirements.txt +23 -0
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.runFisher = runFisher;
|
|
40
|
+
const ora_1 = __importDefault(require("ora"));
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const path = __importStar(require("path"));
|
|
43
|
+
const llm_1 = require("../utils/llm");
|
|
44
|
+
const banner_1 = require("../cli/banner");
|
|
45
|
+
const project_1 = require("../utils/project");
|
|
46
|
+
const runner_1 = require("../python/runner");
|
|
47
|
+
async function runFisher(topic, method, projectId, config, logger, datasetInfo, gapAnalysis) {
|
|
48
|
+
const spinner = (0, ora_1.default)({ text: 'Fisher: Designing empirical analysis...', indent: 2 }).start();
|
|
49
|
+
logger.info(`[FISHER] Starting analysis — method: ${method || 'auto'}, dataset: ${datasetInfo.rows} rows`);
|
|
50
|
+
const projectDir = (0, project_1.getProjectDir)(projectId);
|
|
51
|
+
const figuresDir = path.join(projectDir, 'output', 'figures');
|
|
52
|
+
if (!fs.existsSync(figuresDir))
|
|
53
|
+
fs.mkdirSync(figuresDir, { recursive: true });
|
|
54
|
+
spinner.text = 'Fisher: Designing econometric models...';
|
|
55
|
+
const analysisCode = await (0, llm_1.callLLM)(config, `You are Fisher, the empirical analysis agent of AutoScholar.
|
|
56
|
+
|
|
57
|
+
Write a comprehensive Python analysis script that:
|
|
58
|
+
|
|
59
|
+
1. LOADS the dataset from the given path
|
|
60
|
+
2. RUNS the appropriate econometric/statistical models based on the topic and method
|
|
61
|
+
3. GENERATES publication-quality figures (at least 8-10 figures)
|
|
62
|
+
4. CREATES LaTeX-formatted tables
|
|
63
|
+
5. RUNS diagnostic tests
|
|
64
|
+
6. PRINTS structured JSON results at the end
|
|
65
|
+
|
|
66
|
+
Available libraries (always installed):
|
|
67
|
+
- pandas, numpy, scipy, scikit-learn, statsmodels
|
|
68
|
+
- matplotlib (use Agg backend), seaborn
|
|
69
|
+
- arch (for GARCH models)
|
|
70
|
+
- linearmodels (for panel data, IV)
|
|
71
|
+
|
|
72
|
+
Figure requirements:
|
|
73
|
+
- Use matplotlib with Agg backend: import matplotlib; matplotlib.use('Agg')
|
|
74
|
+
- Use seaborn style: sns.set_theme(style='whitegrid', font_scale=1.2)
|
|
75
|
+
- Save all figures to: ${figuresDir}/
|
|
76
|
+
- Use tight_layout() and savefig with dpi=300, bbox_inches='tight'
|
|
77
|
+
- Include proper titles, axis labels, legends
|
|
78
|
+
|
|
79
|
+
Table requirements:
|
|
80
|
+
- Format as LaTeX booktabs tables
|
|
81
|
+
- Include standard errors in parentheses
|
|
82
|
+
- Mark significance: *** p<0.01, ** p<0.05, * p<0.10
|
|
83
|
+
|
|
84
|
+
Model selection guide:
|
|
85
|
+
- Time series: GARCH, DCC-GARCH, VAR, VECM
|
|
86
|
+
- Cross-section: OLS, robust SE, quantile regression
|
|
87
|
+
- Panel: Fixed effects, random effects, Hausman test
|
|
88
|
+
- ML: Random Forest, XGBoost with SHAP
|
|
89
|
+
|
|
90
|
+
At the END, print a JSON object with this structure:
|
|
91
|
+
{
|
|
92
|
+
"models": [{"name": "OLS", "type": "regression", "coefficients": {}, "standardErrors": {}, "rSquared": 0.85, "pValues": {}, "diagnostics": [], "significant": true}],
|
|
93
|
+
"figures": [{"filename": "fig1.png", "caption": "Time series plot"}],
|
|
94
|
+
"tables": [{"name": "Table 1", "latex": "\\\\begin{table}...", "caption": "Regression results"}],
|
|
95
|
+
"summary": "Brief summary of key findings",
|
|
96
|
+
"diagnostics": ["VIF < 5 for all variables", "No autocorrelation (DW = 2.01)"]
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
Write ONLY Python code. Start with imports.`, `Research topic: ${topic}
|
|
100
|
+
Method: ${method || 'auto-detect based on data'}
|
|
101
|
+
Dataset path: ${datasetInfo.finalDatasetPath}
|
|
102
|
+
Columns: ${datasetInfo.columns.join(', ')}
|
|
103
|
+
Rows: ${datasetInfo.rows}
|
|
104
|
+
Data dictionary: ${JSON.stringify(datasetInfo.dataDictionary)}
|
|
105
|
+
${gapAnalysis?.recommendedGap ? `Research gap: ${gapAnalysis.recommendedGap.title}\nSuggested approach: ${gapAnalysis.recommendedGap.suggestedApproach}` : ''}`, { maxTokens: 16384, model: 'claude-sonnet-4-20250514' });
|
|
106
|
+
let pythonCode = analysisCode;
|
|
107
|
+
const codeMatch = analysisCode.match(/```python\s*\n?([\s\S]*?)```/);
|
|
108
|
+
if (codeMatch) {
|
|
109
|
+
pythonCode = codeMatch[1];
|
|
110
|
+
}
|
|
111
|
+
(0, project_1.saveProjectFile)(projectId, 'code/fisher_analysis.py', pythonCode);
|
|
112
|
+
spinner.text = 'Fisher: Running econometric analysis...';
|
|
113
|
+
logger.info(`[FISHER] Executing analysis script`);
|
|
114
|
+
const pyResult = await (0, runner_1.runPython)(pythonCode, projectDir, 300000);
|
|
115
|
+
if (!pyResult.success) {
|
|
116
|
+
logger.warn(`[FISHER] First attempt failed: ${pyResult.error}`);
|
|
117
|
+
spinner.text = 'Fisher: Retrying with simplified analysis...';
|
|
118
|
+
const simpleCode = await generateSimplifiedAnalysis(topic, method, datasetInfo, figuresDir, config);
|
|
119
|
+
(0, project_1.saveProjectFile)(projectId, 'code/fisher_analysis_v2.py', simpleCode);
|
|
120
|
+
const retryResult = await (0, runner_1.runPython)(simpleCode, projectDir, 300000);
|
|
121
|
+
if (!retryResult.success) {
|
|
122
|
+
spinner.stop();
|
|
123
|
+
logger.error(`[FISHER] Retry failed: ${retryResult.error}`);
|
|
124
|
+
return {
|
|
125
|
+
success: false, models: [], figures: [], tables: [],
|
|
126
|
+
summary: 'Analysis failed', diagnostics: [],
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
return parseFisherOutput(retryResult.output, figuresDir, logger, spinner);
|
|
130
|
+
}
|
|
131
|
+
return parseFisherOutput(pyResult.output, figuresDir, logger, spinner);
|
|
132
|
+
}
|
|
133
|
+
function parseFisherOutput(output, figuresDir, logger, spinner) {
|
|
134
|
+
spinner.stop();
|
|
135
|
+
const jsonMatch = output.match(/\{[\s\S]*"models"[\s\S]*\}/);
|
|
136
|
+
if (jsonMatch) {
|
|
137
|
+
try {
|
|
138
|
+
const result = JSON.parse(jsonMatch[0]);
|
|
139
|
+
const figureFiles = fs.existsSync(figuresDir)
|
|
140
|
+
? fs.readdirSync(figuresDir).filter((f) => /\.(png|pdf|svg)$/i.test(f))
|
|
141
|
+
: [];
|
|
142
|
+
const figures = (result.figures || []).map((f) => ({
|
|
143
|
+
...f,
|
|
144
|
+
path: path.join(figuresDir, f.filename),
|
|
145
|
+
}));
|
|
146
|
+
for (const file of figureFiles) {
|
|
147
|
+
if (!figures.find((f) => f.filename === file)) {
|
|
148
|
+
figures.push({
|
|
149
|
+
filename: file,
|
|
150
|
+
caption: file.replace(/\.[^.]+$/, '').replace(/[_-]/g, ' '),
|
|
151
|
+
path: path.join(figuresDir, file),
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
(0, banner_1.printSuccess)(`Analysis complete: ${(result.models || []).length} models estimated`);
|
|
156
|
+
(0, banner_1.printSuccess)(`${figures.length} figures generated`);
|
|
157
|
+
if (result.summary)
|
|
158
|
+
(0, banner_1.printInfo)(result.summary);
|
|
159
|
+
for (const diag of (result.diagnostics || []).slice(0, 5)) {
|
|
160
|
+
(0, banner_1.printInfo)(` Diagnostic: ${diag}`);
|
|
161
|
+
}
|
|
162
|
+
logger.info(`[FISHER] Complete — ${(result.models || []).length} models, ${figures.length} figures`);
|
|
163
|
+
return {
|
|
164
|
+
success: true,
|
|
165
|
+
models: result.models || [],
|
|
166
|
+
figures,
|
|
167
|
+
tables: result.tables || [],
|
|
168
|
+
summary: result.summary || '',
|
|
169
|
+
diagnostics: result.diagnostics || [],
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
catch (err) {
|
|
173
|
+
logger.warn(`[FISHER] JSON parse error: ${err}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
const figureFiles = fs.existsSync(figuresDir)
|
|
177
|
+
? fs.readdirSync(figuresDir).filter((f) => /\.(png|pdf|svg)$/i.test(f))
|
|
178
|
+
: [];
|
|
179
|
+
(0, banner_1.printSuccess)(`Analysis complete (partial): ${figureFiles.length} figures found`);
|
|
180
|
+
logger.info(`[FISHER] Partial completion — ${figureFiles.length} figures`);
|
|
181
|
+
return {
|
|
182
|
+
success: figureFiles.length > 0,
|
|
183
|
+
models: [],
|
|
184
|
+
figures: figureFiles.map((f) => ({
|
|
185
|
+
filename: f,
|
|
186
|
+
caption: f.replace(/\.[^.]+$/, '').replace(/[_-]/g, ' '),
|
|
187
|
+
path: path.join(figuresDir, f),
|
|
188
|
+
})),
|
|
189
|
+
tables: [],
|
|
190
|
+
summary: output.substring(0, 500),
|
|
191
|
+
diagnostics: [],
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
async function generateSimplifiedAnalysis(topic, method, datasetInfo, figuresDir, config) {
|
|
195
|
+
return `import matplotlib
|
|
196
|
+
matplotlib.use('Agg')
|
|
197
|
+
import pandas as pd
|
|
198
|
+
import numpy as np
|
|
199
|
+
import matplotlib.pyplot as plt
|
|
200
|
+
import seaborn as sns
|
|
201
|
+
import json
|
|
202
|
+
import os
|
|
203
|
+
from scipy import stats
|
|
204
|
+
|
|
205
|
+
sns.set_theme(style='whitegrid', font_scale=1.2)
|
|
206
|
+
figures_dir = "${figuresDir}"
|
|
207
|
+
os.makedirs(figures_dir, exist_ok=True)
|
|
208
|
+
|
|
209
|
+
# Load data
|
|
210
|
+
df = pd.read_csv("${datasetInfo.finalDatasetPath}")
|
|
211
|
+
print(f"Dataset: {df.shape[0]} rows, {df.shape[1]} columns")
|
|
212
|
+
|
|
213
|
+
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
|
|
214
|
+
date_cols = [c for c in df.columns if 'date' in c.lower() or 'time' in c.lower()]
|
|
215
|
+
|
|
216
|
+
figures = []
|
|
217
|
+
models = []
|
|
218
|
+
tables = []
|
|
219
|
+
diagnostics = []
|
|
220
|
+
|
|
221
|
+
# 1. Descriptive statistics
|
|
222
|
+
desc = df[numeric_cols].describe()
|
|
223
|
+
print("\\nDescriptive Statistics:")
|
|
224
|
+
print(desc)
|
|
225
|
+
|
|
226
|
+
# 2. Correlation matrix
|
|
227
|
+
if len(numeric_cols) >= 2:
|
|
228
|
+
fig, ax = plt.subplots(figsize=(12, 10))
|
|
229
|
+
corr = df[numeric_cols[:15]].corr()
|
|
230
|
+
sns.heatmap(corr, annot=True, fmt='.2f', cmap='RdBu_r', center=0, ax=ax)
|
|
231
|
+
ax.set_title('Correlation Matrix')
|
|
232
|
+
plt.tight_layout()
|
|
233
|
+
plt.savefig(os.path.join(figures_dir, 'correlation_matrix.png'), dpi=300, bbox_inches='tight')
|
|
234
|
+
plt.close()
|
|
235
|
+
figures.append({"filename": "correlation_matrix.png", "caption": "Correlation matrix of key variables"})
|
|
236
|
+
|
|
237
|
+
# 3. Distribution plots
|
|
238
|
+
for i, col in enumerate(numeric_cols[:6]):
|
|
239
|
+
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
|
|
240
|
+
axes[0].hist(df[col].dropna(), bins=50, edgecolor='black', alpha=0.7)
|
|
241
|
+
axes[0].set_title(f'Distribution of {col}')
|
|
242
|
+
axes[0].set_xlabel(col)
|
|
243
|
+
axes[0].set_ylabel('Frequency')
|
|
244
|
+
stats.probplot(df[col].dropna(), plot=axes[1])
|
|
245
|
+
axes[1].set_title(f'Q-Q Plot: {col}')
|
|
246
|
+
plt.tight_layout()
|
|
247
|
+
fname = f'dist_{col.replace(" ", "_")[:20]}.png'
|
|
248
|
+
plt.savefig(os.path.join(figures_dir, fname), dpi=300, bbox_inches='tight')
|
|
249
|
+
plt.close()
|
|
250
|
+
figures.append({"filename": fname, "caption": f"Distribution and Q-Q plot of {col}"})
|
|
251
|
+
|
|
252
|
+
# 4. Time series plot if date column exists
|
|
253
|
+
if date_cols and len(numeric_cols) >= 1:
|
|
254
|
+
df_sorted = df.sort_values(date_cols[0])
|
|
255
|
+
fig, ax = plt.subplots(figsize=(14, 6))
|
|
256
|
+
for col in numeric_cols[:5]:
|
|
257
|
+
normalized = (df_sorted[col] - df_sorted[col].mean()) / df_sorted[col].std()
|
|
258
|
+
ax.plot(range(len(df_sorted)), normalized, label=col, alpha=0.8)
|
|
259
|
+
ax.set_title('Normalized Time Series')
|
|
260
|
+
ax.set_xlabel('Observation')
|
|
261
|
+
ax.set_ylabel('Standardized Value')
|
|
262
|
+
ax.legend()
|
|
263
|
+
plt.tight_layout()
|
|
264
|
+
plt.savefig(os.path.join(figures_dir, 'time_series.png'), dpi=300, bbox_inches='tight')
|
|
265
|
+
plt.close()
|
|
266
|
+
figures.append({"filename": "time_series.png", "caption": "Normalized time series of key variables"})
|
|
267
|
+
|
|
268
|
+
# 5. OLS Regression (if enough numeric columns)
|
|
269
|
+
if len(numeric_cols) >= 2:
|
|
270
|
+
try:
|
|
271
|
+
import statsmodels.api as sm
|
|
272
|
+
y = df[numeric_cols[0]].dropna()
|
|
273
|
+
X = df[numeric_cols[1:min(6, len(numeric_cols))]].loc[y.index].dropna()
|
|
274
|
+
y = y.loc[X.index]
|
|
275
|
+
X = sm.add_constant(X)
|
|
276
|
+
|
|
277
|
+
model = sm.OLS(y, X).fit(cov_type='HC3')
|
|
278
|
+
print("\\nOLS Results:")
|
|
279
|
+
print(model.summary())
|
|
280
|
+
|
|
281
|
+
coefficients = {k: float(v) for k, v in model.params.items()}
|
|
282
|
+
std_errors = {k: float(v) for k, v in model.bse.items()}
|
|
283
|
+
p_values = {k: float(v) for k, v in model.pvalues.items()}
|
|
284
|
+
|
|
285
|
+
models.append({
|
|
286
|
+
"name": "OLS with robust SE",
|
|
287
|
+
"type": "regression",
|
|
288
|
+
"coefficients": coefficients,
|
|
289
|
+
"standardErrors": std_errors,
|
|
290
|
+
"rSquared": float(model.rsquared),
|
|
291
|
+
"pValues": p_values,
|
|
292
|
+
"diagnostics": [f"R-squared: {model.rsquared:.4f}", f"F-statistic: {model.fvalue:.2f}", f"Observations: {model.nobs}"],
|
|
293
|
+
"significant": any(v < 0.05 for v in p_values.values() if v != p_values.get('const', 1))
|
|
294
|
+
})
|
|
295
|
+
|
|
296
|
+
diagnostics.append(f"R-squared: {model.rsquared:.4f}")
|
|
297
|
+
diagnostics.append(f"F-statistic: {model.fvalue:.2f} (p={model.f_pvalue:.4f})")
|
|
298
|
+
diagnostics.append(f"Durbin-Watson: {float(sm.stats.durbin_watson(model.resid)):.2f}")
|
|
299
|
+
|
|
300
|
+
# Residual plot
|
|
301
|
+
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
|
302
|
+
axes[0].scatter(model.fittedvalues, model.resid, alpha=0.5)
|
|
303
|
+
axes[0].axhline(y=0, color='r', linestyle='--')
|
|
304
|
+
axes[0].set_xlabel('Fitted Values')
|
|
305
|
+
axes[0].set_ylabel('Residuals')
|
|
306
|
+
axes[0].set_title('Residuals vs Fitted')
|
|
307
|
+
axes[1].hist(model.resid, bins=40, edgecolor='black', alpha=0.7)
|
|
308
|
+
axes[1].set_title('Residual Distribution')
|
|
309
|
+
plt.tight_layout()
|
|
310
|
+
plt.savefig(os.path.join(figures_dir, 'ols_diagnostics.png'), dpi=300, bbox_inches='tight')
|
|
311
|
+
plt.close()
|
|
312
|
+
figures.append({"filename": "ols_diagnostics.png", "caption": "OLS diagnostic plots: residuals vs fitted and residual distribution"})
|
|
313
|
+
except Exception as e:
|
|
314
|
+
print(f"OLS failed: {e}")
|
|
315
|
+
|
|
316
|
+
# 6. Scatter matrix
|
|
317
|
+
if len(numeric_cols) >= 3:
|
|
318
|
+
cols_for_scatter = numeric_cols[:5]
|
|
319
|
+
fig = plt.figure(figsize=(14, 14))
|
|
320
|
+
pd.plotting.scatter_matrix(df[cols_for_scatter].dropna(), figsize=(14, 14), alpha=0.5)
|
|
321
|
+
plt.suptitle('Scatter Matrix', y=1.02)
|
|
322
|
+
plt.tight_layout()
|
|
323
|
+
plt.savefig(os.path.join(figures_dir, 'scatter_matrix.png'), dpi=300, bbox_inches='tight')
|
|
324
|
+
plt.close()
|
|
325
|
+
figures.append({"filename": "scatter_matrix.png", "caption": "Scatter matrix of key variables"})
|
|
326
|
+
|
|
327
|
+
# 7. Box plots
|
|
328
|
+
if len(numeric_cols) >= 2:
|
|
329
|
+
fig, ax = plt.subplots(figsize=(14, 6))
|
|
330
|
+
df[numeric_cols[:8]].boxplot(ax=ax)
|
|
331
|
+
ax.set_title('Box Plots of Key Variables')
|
|
332
|
+
plt.xticks(rotation=45)
|
|
333
|
+
plt.tight_layout()
|
|
334
|
+
plt.savefig(os.path.join(figures_dir, 'boxplots.png'), dpi=300, bbox_inches='tight')
|
|
335
|
+
plt.close()
|
|
336
|
+
figures.append({"filename": "boxplots.png", "caption": "Box plots of key variables"})
|
|
337
|
+
|
|
338
|
+
# Output JSON summary
|
|
339
|
+
result = {
|
|
340
|
+
"models": models,
|
|
341
|
+
"figures": figures,
|
|
342
|
+
"tables": tables,
|
|
343
|
+
"summary": f"Analysis of {len(df)} observations across {len(numeric_cols)} numeric variables. {len(models)} models estimated, {len(figures)} figures generated.",
|
|
344
|
+
"diagnostics": diagnostics
|
|
345
|
+
}
|
|
346
|
+
print("\\n" + json.dumps(result))
|
|
347
|
+
`;
|
|
348
|
+
}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.runGauss = runGauss;
|
|
7
|
+
const ora_1 = __importDefault(require("ora"));
|
|
8
|
+
const llm_1 = require("../utils/llm");
|
|
9
|
+
const academic_1 = require("../connectors/academic");
|
|
10
|
+
const banner_1 = require("../cli/banner");
|
|
11
|
+
async function runGauss(topic, config, logger) {
|
|
12
|
+
const spinner = (0, ora_1.default)({ text: 'Gauss: Searching academic literature...', indent: 2 }).start();
|
|
13
|
+
logger.info(`[GAUSS] Starting gap analysis for: ${topic}`);
|
|
14
|
+
spinner.text = 'Gauss: Planning search strategy...';
|
|
15
|
+
const queries = await generateSearchQueries(topic, config);
|
|
16
|
+
logger.info(`[GAUSS] Generated ${queries.length} search queries`);
|
|
17
|
+
spinner.text = 'Gauss: Searching OpenAlex, arXiv, CrossRef...';
|
|
18
|
+
let allPapers = [];
|
|
19
|
+
let allBibtex = '';
|
|
20
|
+
const allSourcesUsed = [];
|
|
21
|
+
const allSourcesFailed = [];
|
|
22
|
+
for (const query of queries.slice(0, 5)) {
|
|
23
|
+
try {
|
|
24
|
+
const result = await (0, academic_1.multiSourceSearch)(query, {
|
|
25
|
+
limit: 20,
|
|
26
|
+
sources: ['openalex', 'arxiv', 'crossref'],
|
|
27
|
+
serpApiKey: config.SERPAPI_API_KEY,
|
|
28
|
+
});
|
|
29
|
+
allPapers.push(...result.papers);
|
|
30
|
+
allSourcesUsed.push(...result.sourcesUsed);
|
|
31
|
+
allSourcesFailed.push(...result.sourcesFailed);
|
|
32
|
+
}
|
|
33
|
+
catch (err) {
|
|
34
|
+
logger.warn(`[GAUSS] Search failed for query: ${query}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const seen = new Set();
|
|
38
|
+
allPapers = allPapers.filter((p) => {
|
|
39
|
+
const key = p.doi || p.title.toLowerCase().substring(0, 60);
|
|
40
|
+
if (seen.has(key))
|
|
41
|
+
return false;
|
|
42
|
+
seen.add(key);
|
|
43
|
+
return true;
|
|
44
|
+
});
|
|
45
|
+
allPapers.sort((a, b) => b.citationCount - a.citationCount);
|
|
46
|
+
allPapers = allPapers.slice(0, 80);
|
|
47
|
+
allBibtex = allPapers.map((p) => p.bibtex).join('\n\n');
|
|
48
|
+
spinner.text = `Gauss: Found ${allPapers.length} papers. Expanding citation network...`;
|
|
49
|
+
logger.info(`[GAUSS] Found ${allPapers.length} unique papers`);
|
|
50
|
+
const topPapers = allPapers.filter((p) => p.doi).slice(0, 5);
|
|
51
|
+
for (const paper of topPapers) {
|
|
52
|
+
try {
|
|
53
|
+
const refs = await (0, academic_1.getOpenAlexReferences)(paper.doi, 10);
|
|
54
|
+
for (const ref of refs) {
|
|
55
|
+
const key = ref.doi || ref.title.toLowerCase().substring(0, 60);
|
|
56
|
+
if (!seen.has(key)) {
|
|
57
|
+
seen.add(key);
|
|
58
|
+
allPapers.push(ref);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
catch { }
|
|
63
|
+
}
|
|
64
|
+
spinner.text = `Gauss: Analyzing ${allPapers.length} papers for gaps...`;
|
|
65
|
+
const papersContext = allPapers.slice(0, 50).map((p) => `- ${p.citeKey}: "${p.title}" (${p.year || 'n.d.'}) [${p.citationCount} citations] — ${p.abstract.substring(0, 200)}`).join('\n');
|
|
66
|
+
const gapAnalysis = await (0, llm_1.callLLM)(config, `You are Gauss, the literature gap finder agent of AutoScholar. You are an expert in identifying unexplored research opportunities in academic literature.
|
|
67
|
+
|
|
68
|
+
Analyze the provided papers and identify research gaps. Focus on:
|
|
69
|
+
1. Methods not yet applied to this domain
|
|
70
|
+
2. Datasets not yet explored
|
|
71
|
+
3. Temporal gaps (recent phenomena not yet studied)
|
|
72
|
+
4. Contradictions between papers
|
|
73
|
+
5. Shared limitations that could be addressed
|
|
74
|
+
6. Cross-disciplinary opportunities
|
|
75
|
+
|
|
76
|
+
Respond in JSON format with this exact structure:
|
|
77
|
+
{
|
|
78
|
+
"gaps": [
|
|
79
|
+
{
|
|
80
|
+
"title": "Gap title",
|
|
81
|
+
"description": "Detailed description",
|
|
82
|
+
"evidence": ["paper1 says X", "paper2 contradicts"],
|
|
83
|
+
"novelty": 0.8,
|
|
84
|
+
"feasibility": 0.7,
|
|
85
|
+
"citationPotential": 0.9,
|
|
86
|
+
"overallScore": 0.8,
|
|
87
|
+
"gapType": "method|dataset|temporal|contradiction|limitation|cross_disciplinary",
|
|
88
|
+
"suggestedApproach": "How to address this gap",
|
|
89
|
+
"relevantPapers": ["citekey1", "citekey2"],
|
|
90
|
+
"validationStatus": "validated|partially_explored|uncertain"
|
|
91
|
+
}
|
|
92
|
+
],
|
|
93
|
+
"suggestedTitle": "Suggested paper title",
|
|
94
|
+
"suggestedAbstract": "150-word abstract",
|
|
95
|
+
"suggestedContributions": ["Contribution 1", "Contribution 2", "Contribution 3"],
|
|
96
|
+
"literatureSynthesis": "3000+ char synthesis of the field",
|
|
97
|
+
"thematicClusters": [
|
|
98
|
+
{ "theme": "Theme name", "papers": ["key1"], "keyFindings": "Summary" }
|
|
99
|
+
],
|
|
100
|
+
"scientificLandscape": {
|
|
101
|
+
"dominantMethods": ["OLS", "GARCH"],
|
|
102
|
+
"commonDatasets": ["CRSP", "Compustat"],
|
|
103
|
+
"sharedLimitations": ["Short sample period"],
|
|
104
|
+
"futureDirections": ["High-frequency analysis"]
|
|
105
|
+
}
|
|
106
|
+
}`, `Research topic: ${topic}\n\nPapers found (${allPapers.length} total, showing top 50):\n${papersContext}`, { maxTokens: 8192 });
|
|
107
|
+
spinner.stop();
|
|
108
|
+
const parsed = (0, llm_1.parseJsonResponse)(gapAnalysis);
|
|
109
|
+
if (!parsed) {
|
|
110
|
+
logger.error('[GAUSS] Failed to parse gap analysis response');
|
|
111
|
+
return {
|
|
112
|
+
success: false,
|
|
113
|
+
topic,
|
|
114
|
+
totalPapersAnalyzed: allPapers.length,
|
|
115
|
+
sourcesUsed: [...new Set(allSourcesUsed)],
|
|
116
|
+
sourcesFailed: [...new Set(allSourcesFailed)],
|
|
117
|
+
gaps: [],
|
|
118
|
+
recommendedGap: null,
|
|
119
|
+
suggestedTitle: topic,
|
|
120
|
+
suggestedAbstract: '',
|
|
121
|
+
suggestedContributions: [],
|
|
122
|
+
literatureSynthesis: '',
|
|
123
|
+
thematicClusters: [],
|
|
124
|
+
scientificLandscape: { dominantMethods: [], commonDatasets: [], sharedLimitations: [], futureDirections: [] },
|
|
125
|
+
bibtexBlock: allBibtex,
|
|
126
|
+
papers: allPapers,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
const gaps = (parsed.gaps || []).sort((a, b) => b.overallScore - a.overallScore);
|
|
130
|
+
const result = {
|
|
131
|
+
success: true,
|
|
132
|
+
topic,
|
|
133
|
+
totalPapersAnalyzed: allPapers.length,
|
|
134
|
+
sourcesUsed: [...new Set(allSourcesUsed)],
|
|
135
|
+
sourcesFailed: [...new Set(allSourcesFailed)],
|
|
136
|
+
gaps,
|
|
137
|
+
recommendedGap: gaps[0] || null,
|
|
138
|
+
suggestedTitle: parsed.suggestedTitle || topic,
|
|
139
|
+
suggestedAbstract: parsed.suggestedAbstract || '',
|
|
140
|
+
suggestedContributions: parsed.suggestedContributions || [],
|
|
141
|
+
literatureSynthesis: parsed.literatureSynthesis || '',
|
|
142
|
+
thematicClusters: parsed.thematicClusters || [],
|
|
143
|
+
scientificLandscape: parsed.scientificLandscape || { dominantMethods: [], commonDatasets: [], sharedLimitations: [], futureDirections: [] },
|
|
144
|
+
bibtexBlock: allBibtex,
|
|
145
|
+
papers: allPapers,
|
|
146
|
+
};
|
|
147
|
+
(0, banner_1.printSuccess)(`Analyzed ${allPapers.length} papers from ${[...new Set(allSourcesUsed)].join(', ')}`);
|
|
148
|
+
(0, banner_1.printSuccess)(`Found ${gaps.length} research gaps`);
|
|
149
|
+
if (result.recommendedGap) {
|
|
150
|
+
(0, banner_1.printInfo)(`Recommended: ${result.recommendedGap.title} (score: ${result.recommendedGap.overallScore.toFixed(2)})`);
|
|
151
|
+
}
|
|
152
|
+
(0, banner_1.printSuccess)(`Suggested title: "${result.suggestedTitle}"`);
|
|
153
|
+
logger.info(`[GAUSS] Complete — ${gaps.length} gaps identified, ${allPapers.length} papers analyzed`);
|
|
154
|
+
return result;
|
|
155
|
+
}
|
|
156
|
+
async function generateSearchQueries(topic, config) {
|
|
157
|
+
const response = await (0, llm_1.callLLM)(config, `Generate 7 diverse academic search queries for the given research topic. Cover:
|
|
158
|
+
1. Core theoretical angle
|
|
159
|
+
2. Methodological angle
|
|
160
|
+
3. Recent developments (2023-2025)
|
|
161
|
+
4. Contradictions/debates
|
|
162
|
+
5. Adjacent field connection
|
|
163
|
+
6. Applied/empirical angle
|
|
164
|
+
7. Measurement/data angle
|
|
165
|
+
|
|
166
|
+
Respond as a JSON array of strings, nothing else.`, topic, { maxTokens: 1024 });
|
|
167
|
+
const parsed = (0, llm_1.parseJsonResponse)(response);
|
|
168
|
+
if (Array.isArray(parsed))
|
|
169
|
+
return parsed;
|
|
170
|
+
return [
|
|
171
|
+
topic,
|
|
172
|
+
`${topic} methodology`,
|
|
173
|
+
`${topic} recent 2024`,
|
|
174
|
+
`${topic} empirical evidence`,
|
|
175
|
+
`${topic} literature review`,
|
|
176
|
+
];
|
|
177
|
+
}
|