edgar-cli 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/dist/cli.js +18 -0
- package/dist/commands/research.d.ts +5 -0
- package/dist/commands/research.js +118 -43
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -73,6 +73,9 @@ npx edgar-cli --user-agent "Your Name your.email@example.com" research sync --id
|
|
|
73
73
|
|
|
74
74
|
# Query by ticker against cached corpus (auto-syncs on cache miss)
|
|
75
75
|
npx edgar-cli --user-agent "Your Name your.email@example.com" research ask "what changed on the board?" --id NVDA --profile core
|
|
76
|
+
|
|
77
|
+
# Query latest filing(s) in one shot: discover -> fetch/cache -> search
|
|
78
|
+
npx edgar-cli --user-agent "Your Name your.email@example.com" research ask "gross margin drivers" --id AAPL --form 10-Q --latest 1
|
|
76
79
|
```
|
|
77
80
|
|
|
78
81
|
## Research Profiles and Cache
|
|
@@ -91,6 +94,11 @@ By default, cached corpora are stored in:
|
|
|
91
94
|
|
|
92
95
|
Override per command with `--cache-dir`.
|
|
93
96
|
|
|
97
|
+
When using `research ask --id`, you can also scope discovery directly:
|
|
98
|
+
|
|
99
|
+
- `--form <form>` to filter filings by form type (e.g. `10-Q`)
|
|
100
|
+
- `--latest <n>` to limit selection to the latest N filings after filters
|
|
101
|
+
|
|
94
102
|
## Output Contract (default)
|
|
95
103
|
|
|
96
104
|
All JSON-mode commands emit:
|
package/dist/cli.js
CHANGED
|
@@ -239,6 +239,8 @@ export function buildProgram(io) {
|
|
|
239
239
|
.argument('<query>', 'Natural language query')
|
|
240
240
|
.option('--id <id>', 'Ticker or CIK for cached/profile-based research')
|
|
241
241
|
.option('--profile <profile>', 'core|events|financials (used with --id)', 'core')
|
|
242
|
+
.option('--form <form>', 'SEC form filter for scoped filing selection with --id, e.g. 10-Q')
|
|
243
|
+
.option('--latest <n>', 'With --id, limit to latest N filings after filters')
|
|
242
244
|
.option('--cache-dir <path>', 'Override cache directory')
|
|
243
245
|
.option('--refresh', 'With --id, force refetch of filings before querying')
|
|
244
246
|
.option('--doc <path>', 'Path to a local document (repeatable)', collectValues, [])
|
|
@@ -250,6 +252,18 @@ export function buildProgram(io) {
|
|
|
250
252
|
const topK = parsePositiveInt(options.topK, '--top-k');
|
|
251
253
|
const chunkLines = parsePositiveInt(options.chunkLines, '--chunk-lines');
|
|
252
254
|
const chunkOverlap = parseNonNegativeInt(options.chunkOverlap, '--chunk-overlap');
|
|
255
|
+
const latest = options.latest === undefined
|
|
256
|
+
? undefined
|
|
257
|
+
: parsePositiveInt(options.latest, '--latest');
|
|
258
|
+
if (!options.id && (options.form || latest !== undefined)) {
|
|
259
|
+
throw new CLIAbortError(emitError({
|
|
260
|
+
command: 'research ask',
|
|
261
|
+
err: new CLIError(ErrorCode.VALIDATION_ERROR, '--form and --latest require --id'),
|
|
262
|
+
runtimeView: 'summary',
|
|
263
|
+
humanMode: false,
|
|
264
|
+
io
|
|
265
|
+
}));
|
|
266
|
+
}
|
|
253
267
|
const requiresSecIdentity = Boolean(options.id);
|
|
254
268
|
const profile = parseResearchProfile(options.profile);
|
|
255
269
|
await executeCommand('research ask', this, io, async (context) => options.id
|
|
@@ -257,6 +271,10 @@ export function buildProgram(io) {
|
|
|
257
271
|
id: options.id,
|
|
258
272
|
query,
|
|
259
273
|
profile,
|
|
274
|
+
scope: {
|
|
275
|
+
form: options.form,
|
|
276
|
+
latest
|
|
277
|
+
},
|
|
260
278
|
cacheDir: options.cacheDir,
|
|
261
279
|
refresh: Boolean(options.refresh),
|
|
262
280
|
topK,
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
import { CommandContext, CommandResult } from '../core/runtime.js';
|
|
2
2
|
type ResearchProfile = 'core' | 'events' | 'financials';
|
|
3
|
+
interface AskScope {
|
|
4
|
+
form?: string;
|
|
5
|
+
latest?: number;
|
|
6
|
+
}
|
|
3
7
|
export declare function parseResearchProfile(value: string): ResearchProfile;
|
|
4
8
|
export declare function runResearchSync(params: {
|
|
5
9
|
id: string;
|
|
@@ -19,6 +23,7 @@ export declare function runResearchAskById(params: {
|
|
|
19
23
|
id: string;
|
|
20
24
|
query: string;
|
|
21
25
|
profile: ResearchProfile;
|
|
26
|
+
scope?: AskScope;
|
|
22
27
|
cacheDir?: string;
|
|
23
28
|
refresh?: boolean;
|
|
24
29
|
topK: number;
|
|
@@ -64,6 +64,10 @@ function parseCachedManifest(value) {
|
|
|
64
64
|
}
|
|
65
65
|
return manifest;
|
|
66
66
|
}
|
|
67
|
+
function normalizeForm(value) {
|
|
68
|
+
const normalized = value?.trim().toUpperCase();
|
|
69
|
+
return normalized && normalized.length > 0 ? normalized : undefined;
|
|
70
|
+
}
|
|
67
71
|
async function readCachedManifest(cacheRoot, cik, profile) {
|
|
68
72
|
const manifestPath = profileManifestPath(cacheRoot, cik, profile);
|
|
69
73
|
let raw;
|
|
@@ -105,6 +109,57 @@ async function fileExists(filePath) {
|
|
|
105
109
|
throw new CLIError(ErrorCode.VALIDATION_ERROR, `Unable to stat ${filePath}: ${err.message}`);
|
|
106
110
|
}
|
|
107
111
|
}
|
|
112
|
+
async function materializeCachedDocs(params) {
|
|
113
|
+
const docs = [];
|
|
114
|
+
const skipped = [];
|
|
115
|
+
let fetchedCount = 0;
|
|
116
|
+
let reusedCount = 0;
|
|
117
|
+
for (const row of params.rows) {
|
|
118
|
+
const docPath = filingDocPath(params.cacheRoot, params.cik, row.accession);
|
|
119
|
+
const shouldUseCache = !params.refresh && (await fileExists(docPath));
|
|
120
|
+
if (!shouldUseCache) {
|
|
121
|
+
try {
|
|
122
|
+
const filingResult = await runFilingsGet({
|
|
123
|
+
id: params.cik,
|
|
124
|
+
accession: row.accession,
|
|
125
|
+
format: 'markdown'
|
|
126
|
+
}, params.context);
|
|
127
|
+
const filingData = filingResult.data;
|
|
128
|
+
if (typeof filingData.content !== 'string') {
|
|
129
|
+
throw new CLIError(ErrorCode.PARSE_ERROR, `Unable to parse markdown content for accession ${row.accession}`);
|
|
130
|
+
}
|
|
131
|
+
await mkdir(path.dirname(docPath), { recursive: true });
|
|
132
|
+
const content = filingData.content.endsWith('\n') ? filingData.content : `${filingData.content}\n`;
|
|
133
|
+
await writeFile(docPath, content, 'utf8');
|
|
134
|
+
fetchedCount += 1;
|
|
135
|
+
}
|
|
136
|
+
catch (error) {
|
|
137
|
+
if (error instanceof CLIError && error.code === ErrorCode.NOT_FOUND) {
|
|
138
|
+
skipped.push({ accession: row.accession, reason: error.message });
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
throw error;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
reusedCount += 1;
|
|
146
|
+
}
|
|
147
|
+
docs.push({
|
|
148
|
+
accession: row.accession,
|
|
149
|
+
form: row.form,
|
|
150
|
+
filing_date: row.filingDate,
|
|
151
|
+
report_date: row.reportDate,
|
|
152
|
+
filing_url: row.filingUrl,
|
|
153
|
+
path: docPath
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
docs,
|
|
158
|
+
fetchedCount,
|
|
159
|
+
reusedCount,
|
|
160
|
+
skipped
|
|
161
|
+
};
|
|
162
|
+
}
|
|
108
163
|
export function parseResearchProfile(value) {
|
|
109
164
|
const normalized = value.trim().toLowerCase();
|
|
110
165
|
if (normalized === 'core' || normalized === 'events' || normalized === 'financials') {
|
|
@@ -480,49 +535,13 @@ export async function runResearchSync(params, context) {
|
|
|
480
535
|
}
|
|
481
536
|
}
|
|
482
537
|
const selectedRows = [...selectedByAccession.values()].sort((a, b) => (b.filingDate ?? '').localeCompare(a.filingDate ?? ''));
|
|
483
|
-
const docs =
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
if (!shouldUseCache) {
|
|
491
|
-
try {
|
|
492
|
-
const filingResult = await runFilingsGet({
|
|
493
|
-
id: entity.cik,
|
|
494
|
-
accession: row.accession,
|
|
495
|
-
format: 'markdown'
|
|
496
|
-
}, context);
|
|
497
|
-
const filingData = filingResult.data;
|
|
498
|
-
if (typeof filingData.content !== 'string') {
|
|
499
|
-
throw new CLIError(ErrorCode.PARSE_ERROR, `Unable to parse markdown content for accession ${row.accession}`);
|
|
500
|
-
}
|
|
501
|
-
await mkdir(path.dirname(docPath), { recursive: true });
|
|
502
|
-
const content = filingData.content.endsWith('\n') ? filingData.content : `${filingData.content}\n`;
|
|
503
|
-
await writeFile(docPath, content, 'utf8');
|
|
504
|
-
fetchedCount += 1;
|
|
505
|
-
}
|
|
506
|
-
catch (error) {
|
|
507
|
-
if (error instanceof CLIError && error.code === ErrorCode.NOT_FOUND) {
|
|
508
|
-
skipped.push({ accession: row.accession, reason: error.message });
|
|
509
|
-
continue;
|
|
510
|
-
}
|
|
511
|
-
throw error;
|
|
512
|
-
}
|
|
513
|
-
}
|
|
514
|
-
else {
|
|
515
|
-
reusedCount += 1;
|
|
516
|
-
}
|
|
517
|
-
docs.push({
|
|
518
|
-
accession: row.accession,
|
|
519
|
-
form: row.form,
|
|
520
|
-
filing_date: row.filingDate,
|
|
521
|
-
report_date: row.reportDate,
|
|
522
|
-
filing_url: row.filingUrl,
|
|
523
|
-
path: docPath
|
|
524
|
-
});
|
|
525
|
-
}
|
|
538
|
+
const { docs, fetchedCount, reusedCount, skipped } = await materializeCachedDocs({
|
|
539
|
+
cacheRoot,
|
|
540
|
+
cik: entity.cik,
|
|
541
|
+
rows: selectedRows,
|
|
542
|
+
refresh: params.refresh,
|
|
543
|
+
context
|
|
544
|
+
});
|
|
526
545
|
const manifest = {
|
|
527
546
|
version: 1,
|
|
528
547
|
id_input: params.id,
|
|
@@ -569,6 +588,62 @@ export async function runResearchAsk(params, context) {
|
|
|
569
588
|
export async function runResearchAskById(params, context) {
|
|
570
589
|
const cacheRoot = resolveCacheRoot(params.cacheDir);
|
|
571
590
|
const entity = await resolveEntity(params.id, context.secClient, { strictMapMatch: false });
|
|
591
|
+
const form = normalizeForm(params.scope?.form);
|
|
592
|
+
if (form || params.scope?.latest !== undefined) {
|
|
593
|
+
const latest = params.scope?.latest;
|
|
594
|
+
const listResult = await runFilingsList({
|
|
595
|
+
id: entity.cik,
|
|
596
|
+
form,
|
|
597
|
+
queryLimit: latest
|
|
598
|
+
}, context);
|
|
599
|
+
const selectedRows = listResult.data;
|
|
600
|
+
if (selectedRows.length === 0) {
|
|
601
|
+
const formLabel = form ?? 'any form';
|
|
602
|
+
throw new CLIError(ErrorCode.NOT_FOUND, `No filings found for ${params.id} using ${formLabel}.`);
|
|
603
|
+
}
|
|
604
|
+
const { docs, fetchedCount, reusedCount, skipped } = await materializeCachedDocs({
|
|
605
|
+
cacheRoot,
|
|
606
|
+
cik: entity.cik,
|
|
607
|
+
rows: selectedRows,
|
|
608
|
+
refresh: params.refresh,
|
|
609
|
+
context
|
|
610
|
+
});
|
|
611
|
+
if (docs.length === 0) {
|
|
612
|
+
throw new CLIError(ErrorCode.DOCS_REQUIRED, `No queryable filings were fetched for ${params.id}.`);
|
|
613
|
+
}
|
|
614
|
+
const docPaths = docs.map((doc) => doc.path);
|
|
615
|
+
const searchResult = await runLexicalSearch({
|
|
616
|
+
query: params.query,
|
|
617
|
+
docPaths,
|
|
618
|
+
topK: params.topK,
|
|
619
|
+
chunkLines: params.chunkLines,
|
|
620
|
+
chunkOverlap: params.chunkOverlap
|
|
621
|
+
});
|
|
622
|
+
const searchData = searchResult.data;
|
|
623
|
+
return {
|
|
624
|
+
data: {
|
|
625
|
+
...searchData,
|
|
626
|
+
id: params.id,
|
|
627
|
+
cik: entity.cik,
|
|
628
|
+
ticker: entity.ticker,
|
|
629
|
+
title: entity.title,
|
|
630
|
+
cache_root: cacheRoot,
|
|
631
|
+
scope: {
|
|
632
|
+
form: form ?? null,
|
|
633
|
+
latest: latest ?? null
|
|
634
|
+
},
|
|
635
|
+
corpus_docs_count: docs.length,
|
|
636
|
+
selected_filings: docs,
|
|
637
|
+
sync: {
|
|
638
|
+
fetched_count: fetchedCount,
|
|
639
|
+
reused_count: reusedCount,
|
|
640
|
+
docs_count: docs.length,
|
|
641
|
+
skipped_count: skipped.length,
|
|
642
|
+
skipped
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
};
|
|
646
|
+
}
|
|
572
647
|
let manifest = !params.refresh
|
|
573
648
|
? await readCachedManifest(cacheRoot, entity.cik, params.profile)
|
|
574
649
|
: null;
|