secaudit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +150 -0
- package/dist/chunk-AXVYBLOA.js +594 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +431 -0
- package/dist/llm-router-JZRXUHBF.js +103 -0
- package/package.json +67 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Simon Ouyang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# secaudit — Deterministic 10-K Filing Analyzer
|
|
2
|
+
|
|
3
|
+
A TypeScript CLI that demonstrates the difference between **command-driven (deterministic)** and **intent-based (probabilistic)** invocation using public SEC 10-K filings.
|
|
4
|
+
|
|
5
|
+
**Core thesis:** Intent-based invocation is probabilistic; command-driven invocation is deterministic and auditable.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install
|
|
11
|
+
npm run dev -- analyze-10k --ticker AAPL --year 2023 --mode command
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Output lands in `./out/`:
|
|
15
|
+
- `{invocationId}-analysis.json` — structured section analysis
|
|
16
|
+
- `{invocationId}-ledger.json` — audit ledger proving which steps ran
|
|
17
|
+
|
|
18
|
+
## Two Invocation Modes
|
|
19
|
+
|
|
20
|
+
### Command Mode (Deterministic)
|
|
21
|
+
|
|
22
|
+
Every required step must pass. Missing sections = hard failure (exit code 2). The audit ledger proves 100% step coverage.
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# Deterministic analysis — fails loudly if anything is missing
|
|
26
|
+
npm run dev -- analyze-10k --ticker AAPL --year 2023 --mode command
|
|
27
|
+
|
|
28
|
+
# Markdown output
|
|
29
|
+
npm run dev -- analyze-10k --ticker TSLA --year 2023 --mode command --format md
|
|
30
|
+
|
|
31
|
+
# Direct URL override
|
|
32
|
+
npm run dev -- analyze-10k --ticker MSFT --year 2023 --source url \
|
|
33
|
+
--url "https://www.sec.gov/Archives/edgar/data/..." --mode command
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Intent Mode (Probabilistic)
|
|
37
|
+
|
|
38
|
+
The system interprets natural language. Required steps may be skipped — the ledger exposes this gap.
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Intent-based — may skip validation
|
|
42
|
+
npm run dev -- intent "analyze apple 10-k for 2023 and summarize risks"
|
|
43
|
+
|
|
44
|
+
# Override ticker/year if intent parsing misses
|
|
45
|
+
npm run dev -- intent "summarize tesla financial risks" --ticker TSLA --year 2023
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## CLI Reference
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
secaudit analyze-10k [options] Deterministic workflow
|
|
52
|
+
secaudit intent <text> [options] Probabilistic intent routing
|
|
53
|
+
|
|
54
|
+
Options (analyze-10k):
|
|
55
|
+
--ticker <string> Company ticker (e.g., AAPL, TSLA, MSFT)
|
|
56
|
+
--year <number> Filing year
|
|
57
|
+
--mode <command|intent> Invocation mode (default: command)
|
|
58
|
+
--format <json|md> Output format (default: json)
|
|
59
|
+
--out <path> Output directory (default: ./out)
|
|
60
|
+
--source <sec|url> Fetch source (default: sec)
|
|
61
|
+
--url <string> Direct filing URL
|
|
62
|
+
--require <list> Required sections (default: risk-factors,mdna,financials)
|
|
63
|
+
--no-strict Lower confidence thresholds
|
|
64
|
+
--no-cache Skip document cache
|
|
65
|
+
--invocation-id <string> Explicit invocation ID
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Workflow: `analyze_10k_v1`
|
|
69
|
+
|
|
70
|
+
Six steps, executed in order:
|
|
71
|
+
|
|
72
|
+
| Step | Description | Command Mode | Intent Mode |
|
|
73
|
+
|------|-------------|-------------|-------------|
|
|
74
|
+
| fetch | Download filing from SEC EDGAR | Required | Required |
|
|
75
|
+
| extract | Parse HTML/PDF to text | Required | Required |
|
|
76
|
+
| locate_sections | Find Item 1A, 7, 8 headings | Required | May skip |
|
|
77
|
+
| validate | Check section presence & confidence | Required (hard fail) | May skip |
|
|
78
|
+
| generate | Extractive summarization | Required | Best-effort |
|
|
79
|
+
| emit_ledger | Write audit record | Always | Always |
|
|
80
|
+
|
|
81
|
+
## Audit Ledger
|
|
82
|
+
|
|
83
|
+
Every run produces a ledger showing exactly what happened:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"mode": "command",
|
|
88
|
+
"deterministic": true,
|
|
89
|
+
"requiredSteps": ["fetch", "extract", "locate_sections", "validate", "generate", "emit_ledger"],
|
|
90
|
+
"executedSteps": ["fetch", "extract", "locate_sections", "validate", "generate", "emit_ledger"],
|
|
91
|
+
"skippedSteps": [],
|
|
92
|
+
"sectionValidation": {
|
|
93
|
+
"risk_factors": { "found": true, "confidence": 0.95, "lengthChars": 68735 },
|
|
94
|
+
"mdna": { "found": true, "confidence": 0.90, "lengthChars": 15092 },
|
|
95
|
+
"financials": { "found": true, "confidence": 0.95, "lengthChars": 19148 }
|
|
96
|
+
},
|
|
97
|
+
"passed": true
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
In intent mode, you'll see `"deterministic": false` and potentially `"skippedSteps": ["validate"]` — making the reliability gap visible.
|
|
102
|
+
|
|
103
|
+
## Exit Codes
|
|
104
|
+
|
|
105
|
+
- `0` — success
|
|
106
|
+
- `1` — general error (network, parse failure)
|
|
107
|
+
- `2` — validation failure (missing required sections, command mode only)
|
|
108
|
+
|
|
109
|
+
## Architecture
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
src/
|
|
113
|
+
cli/commands.ts Commander setup + flag parsing
|
|
114
|
+
control-plane/
|
|
115
|
+
orchestrator.ts Workflow engine: step sequencing + enforcement
|
|
116
|
+
workflow.ts Step definitions for analyze_10k_v1
|
|
117
|
+
types.ts Core type definitions
|
|
118
|
+
intent-router/
|
|
119
|
+
router.ts Keyword-based intent classifier
|
|
120
|
+
patterns.ts Ticker/year extraction patterns
|
|
121
|
+
tools/
|
|
122
|
+
fetcher.ts SEC EDGAR fetch + caching
|
|
123
|
+
extractor.ts HTML (cheerio) + PDF (pdfjs-dist) extraction
|
|
124
|
+
cache.ts File-based cache
|
|
125
|
+
analysis/
|
|
126
|
+
locator.ts Section heading detection via DOM traversal
|
|
127
|
+
validator.ts Section presence + confidence validation
|
|
128
|
+
summarizer.ts Extractive keyword-scored summarization
|
|
129
|
+
ledger/
|
|
130
|
+
ledger.ts Audit ledger builder
|
|
131
|
+
types.ts Ledger schema
|
|
132
|
+
utils/
|
|
133
|
+
id.ts Invocation ID generation
|
|
134
|
+
timer.ts Step timing
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Development
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
npm install
|
|
141
|
+
npm run typecheck # Type-check without emitting
|
|
142
|
+
npm run build # Build with tsup
|
|
143
|
+
npm test # Run tests
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Data Source
|
|
147
|
+
|
|
148
|
+
Uses the [SEC EDGAR API](https://www.sec.gov/developer) to fetch public 10-K filings. All requests comply with SEC rate limits (<10 req/sec) and include a User-Agent header.
|
|
149
|
+
|
|
150
|
+
Fetched documents are cached in `.cache/` for offline use.
|
|
@@ -0,0 +1,594 @@
|
|
|
1
|
+
// src/tools/cache.ts
|
|
2
|
+
import { createHash } from "crypto";
|
|
3
|
+
import { mkdir, readFile, writeFile, stat } from "fs/promises";
|
|
4
|
+
import { join } from "path";
|
|
5
|
+
var CACHE_DIR = ".cache";
|
|
6
|
+
function sanitize(key) {
|
|
7
|
+
return key.replace(/[^a-zA-Z0-9_-]/g, "_");
|
|
8
|
+
}
|
|
9
|
+
function cacheKey(ticker, year, accession) {
|
|
10
|
+
if (accession) {
|
|
11
|
+
return `sec_${ticker}_${year}_${accession}`;
|
|
12
|
+
}
|
|
13
|
+
return `sec_${ticker}_${year}`;
|
|
14
|
+
}
|
|
15
|
+
function urlCacheKey(url) {
|
|
16
|
+
const hash = createHash("sha256").update(url).digest("hex").slice(0, 16);
|
|
17
|
+
return `url_${hash}`;
|
|
18
|
+
}
|
|
19
|
+
async function readCache(key) {
|
|
20
|
+
const path = join(CACHE_DIR, sanitize(key));
|
|
21
|
+
try {
|
|
22
|
+
await stat(path);
|
|
23
|
+
return await readFile(path, "utf-8");
|
|
24
|
+
} catch {
|
|
25
|
+
return null;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
async function writeCache(key, content) {
|
|
29
|
+
await mkdir(CACHE_DIR, { recursive: true });
|
|
30
|
+
const path = join(CACHE_DIR, sanitize(key));
|
|
31
|
+
await writeFile(path, content, "utf-8");
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// src/tools/fetcher.ts
|
|
35
|
+
var SEC_USER_AGENT = "secaudit-cli simonouyang@yahoo.com";
|
|
36
|
+
var SEC_RATE_LIMIT_MS = 120;
|
|
37
|
+
var lastRequestTime = 0;
|
|
38
|
+
async function throttle() {
|
|
39
|
+
const now = Date.now();
|
|
40
|
+
const elapsed = now - lastRequestTime;
|
|
41
|
+
if (elapsed < SEC_RATE_LIMIT_MS) {
|
|
42
|
+
await new Promise((r) => setTimeout(r, SEC_RATE_LIMIT_MS - elapsed));
|
|
43
|
+
}
|
|
44
|
+
lastRequestTime = Date.now();
|
|
45
|
+
}
|
|
46
|
+
async function secFetch(url) {
|
|
47
|
+
await throttle();
|
|
48
|
+
const res = await fetch(url, {
|
|
49
|
+
headers: { "User-Agent": SEC_USER_AGENT, Accept: "text/html,application/json" }
|
|
50
|
+
});
|
|
51
|
+
if (!res.ok) {
|
|
52
|
+
throw new Error(`SEC request failed: ${res.status} ${res.statusText} for ${url}`);
|
|
53
|
+
}
|
|
54
|
+
return res;
|
|
55
|
+
}
|
|
56
|
+
async function resolveCik(ticker) {
|
|
57
|
+
const res = await secFetch("https://www.sec.gov/files/company_tickers.json");
|
|
58
|
+
const data = await res.json();
|
|
59
|
+
for (const entry of Object.values(data)) {
|
|
60
|
+
if (entry.ticker.toUpperCase() === ticker.toUpperCase()) {
|
|
61
|
+
return String(entry.cik_str).padStart(10, "0");
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
throw new Error(`Ticker "${ticker}" not found in SEC company tickers`);
|
|
65
|
+
}
|
|
66
|
+
async function findFiling(cik, year) {
|
|
67
|
+
const url = `https://data.sec.gov/submissions/CIK${cik}.json`;
|
|
68
|
+
const res = await secFetch(url);
|
|
69
|
+
const data = await res.json();
|
|
70
|
+
const recent = data.filings.recent;
|
|
71
|
+
for (let i = 0; i < recent.form.length; i++) {
|
|
72
|
+
const form = recent.form[i];
|
|
73
|
+
if (form !== "10-K" && form !== "10-K/A") continue;
|
|
74
|
+
const filingDate = recent.filingDate[i];
|
|
75
|
+
const filingYear = parseInt(filingDate.slice(0, 4), 10);
|
|
76
|
+
if (filingYear === year || filingYear === year + 1) {
|
|
77
|
+
return {
|
|
78
|
+
accessionNumber: recent.accessionNumber[i],
|
|
79
|
+
primaryDocument: recent.primaryDocument[i],
|
|
80
|
+
filingDate
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
throw new Error(
|
|
85
|
+
`No 10-K filing found for CIK ${cik} around year ${year}. Try a different --year or use --source url with a direct URL.`
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
function buildDocUrl(cik, accession, primaryDoc) {
|
|
89
|
+
const accessionPath = accession.replace(/-/g, "");
|
|
90
|
+
return `https://www.sec.gov/Archives/edgar/data/${cik}/${accessionPath}/${primaryDoc}`;
|
|
91
|
+
}
|
|
92
|
+
function detectContentType(content, url) {
|
|
93
|
+
if (url.endsWith(".pdf") || content.startsWith("%PDF")) return "pdf";
|
|
94
|
+
if (content.includes("<html") || content.includes("<HTML") || content.includes("<DOCUMENT>")) {
|
|
95
|
+
return "html";
|
|
96
|
+
}
|
|
97
|
+
return "text";
|
|
98
|
+
}
|
|
99
|
+
async function fetchFiling(options) {
|
|
100
|
+
if (options.url) {
|
|
101
|
+
return fetchByUrl(options.url, options.cache);
|
|
102
|
+
}
|
|
103
|
+
return fetchFromEdgar(options.ticker, options.year, options.cache);
|
|
104
|
+
}
|
|
105
|
+
async function fetchByUrl(url, useCache) {
|
|
106
|
+
const key = urlCacheKey(url);
|
|
107
|
+
if (useCache) {
|
|
108
|
+
const cached = await readCache(key);
|
|
109
|
+
if (cached) {
|
|
110
|
+
console.log(" (cache hit)");
|
|
111
|
+
return { content: cached, contentType: detectContentType(cached, url) };
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
const res = await secFetch(url);
|
|
115
|
+
const content = await res.text();
|
|
116
|
+
if (useCache) {
|
|
117
|
+
await writeCache(key, content);
|
|
118
|
+
}
|
|
119
|
+
return { content, contentType: detectContentType(content, url) };
|
|
120
|
+
}
|
|
121
|
+
async function fetchFromEdgar(ticker, year, useCache) {
|
|
122
|
+
const cik = await resolveCik(ticker);
|
|
123
|
+
console.log(` CIK: ${cik}`);
|
|
124
|
+
const filing = await findFiling(cik, year);
|
|
125
|
+
console.log(` Filing: ${filing.accessionNumber} (${filing.filingDate})`);
|
|
126
|
+
const key = cacheKey(ticker, year, filing.accessionNumber);
|
|
127
|
+
if (useCache) {
|
|
128
|
+
const cached = await readCache(key);
|
|
129
|
+
if (cached) {
|
|
130
|
+
console.log(" (cache hit)");
|
|
131
|
+
return { content: cached, contentType: detectContentType(cached, "") };
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const docUrl = buildDocUrl(cik, filing.accessionNumber, filing.primaryDocument);
|
|
135
|
+
console.log(` URL: ${docUrl}`);
|
|
136
|
+
const res = await secFetch(docUrl);
|
|
137
|
+
const content = await res.text();
|
|
138
|
+
if (useCache) {
|
|
139
|
+
await writeCache(key, content);
|
|
140
|
+
}
|
|
141
|
+
return { content, contentType: detectContentType(content, docUrl) };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// src/tools/extractor.ts
|
|
145
|
+
import * as cheerio from "cheerio";
|
|
146
|
+
async function extractText(rawContent, contentType) {
|
|
147
|
+
switch (contentType) {
|
|
148
|
+
case "html":
|
|
149
|
+
return extractFromHtml(rawContent);
|
|
150
|
+
case "pdf":
|
|
151
|
+
return extractFromPdf(rawContent);
|
|
152
|
+
case "text":
|
|
153
|
+
return rawContent;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
function extractFromHtml(html) {
|
|
157
|
+
const $ = cheerio.load(html);
|
|
158
|
+
$("script, style, noscript, meta, link").remove();
|
|
159
|
+
const blocks = [];
|
|
160
|
+
const seen = /* @__PURE__ */ new Set();
|
|
161
|
+
$("p, td, th, li, h1, h2, h3, h4, h5, h6, dt, dd, blockquote").each((_, el) => {
|
|
162
|
+
const text = $(el).contents().toArray().map((n) => $(n).text()).join(" ").replace(/\s+/g, " ").trim();
|
|
163
|
+
if (text.length > 2 && !seen.has(text)) {
|
|
164
|
+
seen.add(text);
|
|
165
|
+
blocks.push(text);
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
if (blocks.length === 0) {
|
|
169
|
+
const body = $("body").text().replace(/\s+/g, " ").trim();
|
|
170
|
+
if (body.length > 0) return body;
|
|
171
|
+
return $.text().replace(/\s+/g, " ").trim();
|
|
172
|
+
}
|
|
173
|
+
return blocks.join("\n");
|
|
174
|
+
}
|
|
175
|
+
async function extractFromPdf(content) {
|
|
176
|
+
try {
|
|
177
|
+
const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
178
|
+
const data = new Uint8Array(Buffer.from(content, "binary"));
|
|
179
|
+
const doc = await pdfjs.getDocument({ data }).promise;
|
|
180
|
+
const pages = [];
|
|
181
|
+
for (let i = 1; i <= doc.numPages; i++) {
|
|
182
|
+
const page = await doc.getPage(i);
|
|
183
|
+
const textContent = await page.getTextContent();
|
|
184
|
+
const pageText = textContent.items.filter((item) => "str" in item).map((item) => item.str).join(" ");
|
|
185
|
+
pages.push(pageText);
|
|
186
|
+
}
|
|
187
|
+
return pages.join("\n\n");
|
|
188
|
+
} catch (err) {
|
|
189
|
+
throw new Error(
|
|
190
|
+
`PDF extraction failed: ${err instanceof Error ? err.message : String(err)}. Try providing an HTML filing URL instead.`
|
|
191
|
+
);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// src/analysis/locator.ts
|
|
196
|
+
import * as cheerio2 from "cheerio";
|
|
197
|
+
var SECTION_PATTERNS = [
|
|
198
|
+
{
|
|
199
|
+
key: "risk_factors",
|
|
200
|
+
requireKeys: ["risk-factors", "risk_factors"],
|
|
201
|
+
headingPatterns: [
|
|
202
|
+
/^item\s+1a[\.\s\-—–]+risk\s+factors/i,
|
|
203
|
+
/^item\s+1a\b/i
|
|
204
|
+
]
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
key: "mdna",
|
|
208
|
+
requireKeys: ["mdna", "md&a", "mda"],
|
|
209
|
+
headingPatterns: [
|
|
210
|
+
/^item\s+7[\.\s\-—–]+management'?s?\s+discussion/i,
|
|
211
|
+
/^item\s+7\b(?!\s*a)/i
|
|
212
|
+
]
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
key: "financials",
|
|
216
|
+
requireKeys: ["financials", "financial-statements", "financial_statements"],
|
|
217
|
+
headingPatterns: [
|
|
218
|
+
/^item\s+8[\.\s\-—–]+financial\s+statements/i,
|
|
219
|
+
/^item\s+8\b/i
|
|
220
|
+
]
|
|
221
|
+
}
|
|
222
|
+
];
|
|
223
|
+
var NEXT_ITEM_HEADING = /^item\s+\d+[a-z]?[\.\s\-—–]/i;
|
|
224
|
+
function locateSections(rawHtml, extractedText, contentType) {
|
|
225
|
+
if (contentType === "html") {
|
|
226
|
+
return locateInHtml(rawHtml);
|
|
227
|
+
}
|
|
228
|
+
return locateInText(extractedText);
|
|
229
|
+
}
|
|
230
|
+
function locateInHtml(html) {
|
|
231
|
+
const $ = cheerio2.load(html);
|
|
232
|
+
const results = [];
|
|
233
|
+
const bodyDivs = $("body > div").toArray();
|
|
234
|
+
for (const pattern of SECTION_PATTERNS) {
|
|
235
|
+
const match = findSectionByDomSiblings($, bodyDivs, pattern);
|
|
236
|
+
results.push(match);
|
|
237
|
+
}
|
|
238
|
+
return results;
|
|
239
|
+
}
|
|
240
|
+
function findSectionByDomSiblings($, bodyDivs, pattern) {
|
|
241
|
+
let headingIdx = -1;
|
|
242
|
+
let confidence = 0;
|
|
243
|
+
for (let i = 0; i < bodyDivs.length; i++) {
|
|
244
|
+
const text = $(bodyDivs[i]).text().replace(/\s+/g, " ").trim();
|
|
245
|
+
if (text.length > 150 || text.length < 3) continue;
|
|
246
|
+
for (let pi = 0; pi < pattern.headingPatterns.length; pi++) {
|
|
247
|
+
if (pattern.headingPatterns[pi].test(text)) {
|
|
248
|
+
headingIdx = i;
|
|
249
|
+
confidence = pi === 0 ? 0.95 : 0.9;
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
if (headingIdx >= 0) break;
|
|
254
|
+
}
|
|
255
|
+
if (headingIdx < 0) {
|
|
256
|
+
return makeNotFound(pattern.key);
|
|
257
|
+
}
|
|
258
|
+
const contentBlocks = [];
|
|
259
|
+
let endIdx = bodyDivs.length;
|
|
260
|
+
for (let i = headingIdx + 1; i < bodyDivs.length; i++) {
|
|
261
|
+
const text = $(bodyDivs[i]).text().replace(/\s+/g, " ").trim();
|
|
262
|
+
if (text.length < 100 && NEXT_ITEM_HEADING.test(text)) {
|
|
263
|
+
endIdx = i;
|
|
264
|
+
break;
|
|
265
|
+
}
|
|
266
|
+
if (text.length > 2) {
|
|
267
|
+
contentBlocks.push(text);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
const content = contentBlocks.join("\n");
|
|
271
|
+
return {
|
|
272
|
+
name: pattern.key,
|
|
273
|
+
found: true,
|
|
274
|
+
confidence,
|
|
275
|
+
startOffset: headingIdx,
|
|
276
|
+
endOffset: endIdx,
|
|
277
|
+
lengthChars: content.length,
|
|
278
|
+
content
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
function locateInText(text) {
|
|
282
|
+
const allPatterns = [
|
|
283
|
+
...SECTION_PATTERNS.map((p) => ({
|
|
284
|
+
...p,
|
|
285
|
+
headingPatterns: p.headingPatterns.map(
|
|
286
|
+
(r) => new RegExp(r.source.replace(/^\^/, ""), r.flags)
|
|
287
|
+
)
|
|
288
|
+
}))
|
|
289
|
+
];
|
|
290
|
+
return allPatterns.map((pattern) => {
|
|
291
|
+
for (let pi = 0; pi < pattern.headingPatterns.length; pi++) {
|
|
292
|
+
const match = pattern.headingPatterns[pi].exec(text);
|
|
293
|
+
if (match) {
|
|
294
|
+
const confidence = pi === 0 ? 0.9 : 0.85;
|
|
295
|
+
const startOffset = match.index;
|
|
296
|
+
const sectionContent = extractTextSection(text, startOffset);
|
|
297
|
+
return {
|
|
298
|
+
name: pattern.key,
|
|
299
|
+
found: true,
|
|
300
|
+
confidence,
|
|
301
|
+
startOffset,
|
|
302
|
+
endOffset: startOffset + sectionContent.length,
|
|
303
|
+
lengthChars: sectionContent.length,
|
|
304
|
+
content: sectionContent
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
return makeNotFound(pattern.key);
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
function extractTextSection(text, startOffset) {
|
|
312
|
+
const afterHeading = text.slice(startOffset);
|
|
313
|
+
const lines = afterHeading.split("\n");
|
|
314
|
+
let endIdx = -1;
|
|
315
|
+
for (let i = 3; i < lines.length; i++) {
|
|
316
|
+
const trimmed = lines[i].trim();
|
|
317
|
+
if (trimmed.length < 3) continue;
|
|
318
|
+
if (NEXT_ITEM_HEADING.test(trimmed) && trimmed.length < 150) {
|
|
319
|
+
endIdx = i;
|
|
320
|
+
break;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
const sectionLines = endIdx > 0 ? lines.slice(0, endIdx) : lines.slice(0, 800);
|
|
324
|
+
const content = sectionLines.join("\n").trim();
|
|
325
|
+
if (content.length > 1e5) {
|
|
326
|
+
return content.slice(0, 1e5);
|
|
327
|
+
}
|
|
328
|
+
return content;
|
|
329
|
+
}
|
|
330
|
+
function makeNotFound(name) {
|
|
331
|
+
return {
|
|
332
|
+
name,
|
|
333
|
+
found: false,
|
|
334
|
+
confidence: 0,
|
|
335
|
+
startOffset: -1,
|
|
336
|
+
endOffset: -1,
|
|
337
|
+
lengthChars: 0,
|
|
338
|
+
content: ""
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
function matchesSectionKey(sectionName, requireKey) {
|
|
342
|
+
const pattern = SECTION_PATTERNS.find((p) => p.key === sectionName);
|
|
343
|
+
if (!pattern) return false;
|
|
344
|
+
return pattern.requireKeys.includes(requireKey.toLowerCase());
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// src/analysis/validator.ts
|
|
348
|
+
var MIN_SECTION_LENGTH = 500;
|
|
349
|
+
function validateSections(sections, requiredKeys, confidenceThreshold, hardFail) {
|
|
350
|
+
const failures = [];
|
|
351
|
+
for (const reqKey of requiredKeys) {
|
|
352
|
+
const section = sections.find((s) => matchesSectionKey(s.name, reqKey));
|
|
353
|
+
if (!section || !section.found) {
|
|
354
|
+
failures.push(`Section "${reqKey}" not found in filing`);
|
|
355
|
+
continue;
|
|
356
|
+
}
|
|
357
|
+
if (section.confidence < confidenceThreshold) {
|
|
358
|
+
failures.push(
|
|
359
|
+
`Section "${reqKey}" confidence ${section.confidence.toFixed(2)} below threshold ${confidenceThreshold.toFixed(2)}`
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
if (section.lengthChars < MIN_SECTION_LENGTH) {
|
|
363
|
+
failures.push(
|
|
364
|
+
`Section "${reqKey}" too short (${section.lengthChars} chars, minimum ${MIN_SECTION_LENGTH})`
|
|
365
|
+
);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
if (failures.length > 0) {
|
|
369
|
+
const message = `Validation failed:
|
|
370
|
+
- ${failures.join("\n - ")}`;
|
|
371
|
+
if (hardFail) {
|
|
372
|
+
throw new Error(message);
|
|
373
|
+
}
|
|
374
|
+
console.warn(` [warn] ${message}`);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// src/analysis/summarizer.ts
|
|
379
|
+
var MAX_SUMMARY_POINTS = 5;
|
|
380
|
+
var MAX_EVIDENCE_SNIPPETS = 3;
|
|
381
|
+
var SENTENCE_END = /(?<=[.!?])\s+/;
|
|
382
|
+
var RISK_KEYWORDS = [
|
|
383
|
+
"risk",
|
|
384
|
+
"adverse",
|
|
385
|
+
"uncertainty",
|
|
386
|
+
"litigation",
|
|
387
|
+
"regulatory",
|
|
388
|
+
"competition",
|
|
389
|
+
"cybersecurity",
|
|
390
|
+
"supply chain",
|
|
391
|
+
"economic",
|
|
392
|
+
"volatility",
|
|
393
|
+
"liability",
|
|
394
|
+
"compliance",
|
|
395
|
+
"disruption"
|
|
396
|
+
];
|
|
397
|
+
var FINANCIAL_KEYWORDS = [
|
|
398
|
+
"revenue",
|
|
399
|
+
"income",
|
|
400
|
+
"loss",
|
|
401
|
+
"margin",
|
|
402
|
+
"earnings",
|
|
403
|
+
"cash flow",
|
|
404
|
+
"assets",
|
|
405
|
+
"liabilities",
|
|
406
|
+
"debt",
|
|
407
|
+
"capital",
|
|
408
|
+
"dividend",
|
|
409
|
+
"operating",
|
|
410
|
+
"growth",
|
|
411
|
+
"decline",
|
|
412
|
+
"increase",
|
|
413
|
+
"decrease"
|
|
414
|
+
];
|
|
415
|
+
var MDNA_KEYWORDS = [
|
|
416
|
+
"revenue",
|
|
417
|
+
"growth",
|
|
418
|
+
"margin",
|
|
419
|
+
"decline",
|
|
420
|
+
"increase",
|
|
421
|
+
"segment",
|
|
422
|
+
"operating",
|
|
423
|
+
"strategy",
|
|
424
|
+
"outlook",
|
|
425
|
+
"trend",
|
|
426
|
+
"driver",
|
|
427
|
+
"year-over-year",
|
|
428
|
+
"compared to",
|
|
429
|
+
"primarily due",
|
|
430
|
+
"result of"
|
|
431
|
+
];
|
|
432
|
+
function generateAnalysis(sections, requiredKeys) {
|
|
433
|
+
const analyses = [];
|
|
434
|
+
for (const reqKey of requiredKeys) {
|
|
435
|
+
const section = sections.find((s) => matchesSectionKey(s.name, reqKey));
|
|
436
|
+
if (!section || !section.found) {
|
|
437
|
+
analyses.push({
|
|
438
|
+
name: sectionKeyToName(reqKey),
|
|
439
|
+
found: false,
|
|
440
|
+
confidence: 0,
|
|
441
|
+
summary: [],
|
|
442
|
+
evidence: []
|
|
443
|
+
});
|
|
444
|
+
continue;
|
|
445
|
+
}
|
|
446
|
+
const keywords = getKeywordsForSection(section.name);
|
|
447
|
+
const sentences = splitSentences(section.content);
|
|
448
|
+
const scored = scoreSentences(sentences, keywords);
|
|
449
|
+
analyses.push({
|
|
450
|
+
name: section.name,
|
|
451
|
+
found: true,
|
|
452
|
+
confidence: section.confidence,
|
|
453
|
+
summary: scored.slice(0, MAX_SUMMARY_POINTS).map((s) => s.text),
|
|
454
|
+
evidence: scored.slice(0, MAX_EVIDENCE_SNIPPETS).map((s) => truncate(s.text, 200))
|
|
455
|
+
});
|
|
456
|
+
}
|
|
457
|
+
const overallSummary = buildOverallSummary(analyses);
|
|
458
|
+
return { sections: analyses, overallSummary };
|
|
459
|
+
}
|
|
460
|
+
function scoreSentences(sentences, keywords) {
|
|
461
|
+
const scored = sentences.filter((s) => s.length > 30 && s.length < 500).map((text) => {
|
|
462
|
+
const lower = text.toLowerCase();
|
|
463
|
+
let score = 0;
|
|
464
|
+
for (const kw of keywords) {
|
|
465
|
+
if (lower.includes(kw.toLowerCase())) {
|
|
466
|
+
score += 1;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
if (/\$[\d,.]+/.test(text) || /\d+(\.\d+)?%/.test(text)) {
|
|
470
|
+
score += 0.5;
|
|
471
|
+
}
|
|
472
|
+
return { text, score };
|
|
473
|
+
});
|
|
474
|
+
scored.sort((a, b) => b.score - a.score);
|
|
475
|
+
return scored;
|
|
476
|
+
}
|
|
477
|
+
function splitSentences(text) {
|
|
478
|
+
return text.split(SENTENCE_END).map((s) => s.trim()).filter((s) => s.length > 0);
|
|
479
|
+
}
|
|
480
|
+
function getKeywordsForSection(name) {
|
|
481
|
+
switch (name) {
|
|
482
|
+
case "risk_factors":
|
|
483
|
+
return RISK_KEYWORDS;
|
|
484
|
+
case "mdna":
|
|
485
|
+
return MDNA_KEYWORDS;
|
|
486
|
+
case "financials":
|
|
487
|
+
return FINANCIAL_KEYWORDS;
|
|
488
|
+
default:
|
|
489
|
+
return [...RISK_KEYWORDS, ...FINANCIAL_KEYWORDS];
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
function sectionKeyToName(key) {
|
|
493
|
+
const map = {
|
|
494
|
+
"risk-factors": "risk_factors",
|
|
495
|
+
"risk_factors": "risk_factors",
|
|
496
|
+
"mdna": "mdna",
|
|
497
|
+
"md&a": "mdna",
|
|
498
|
+
"mda": "mdna",
|
|
499
|
+
"financials": "financials",
|
|
500
|
+
"financial-statements": "financials",
|
|
501
|
+
"financial_statements": "financials"
|
|
502
|
+
};
|
|
503
|
+
return map[key.toLowerCase()] ?? key;
|
|
504
|
+
}
|
|
505
|
+
function truncate(text, maxLen) {
|
|
506
|
+
if (text.length <= maxLen) return text;
|
|
507
|
+
return text.slice(0, maxLen - 3) + "...";
|
|
508
|
+
}
|
|
509
|
+
function buildOverallSummary(analyses) {
|
|
510
|
+
const summary = [];
|
|
511
|
+
const found = analyses.filter((a) => a.found);
|
|
512
|
+
const missing = analyses.filter((a) => !a.found);
|
|
513
|
+
if (found.length > 0) {
|
|
514
|
+
summary.push(
|
|
515
|
+
`Analyzed ${found.length} section(s): ${found.map((a) => a.name).join(", ")}.`
|
|
516
|
+
);
|
|
517
|
+
}
|
|
518
|
+
if (missing.length > 0) {
|
|
519
|
+
summary.push(
|
|
520
|
+
`Missing section(s): ${missing.map((a) => a.name).join(", ")}.`
|
|
521
|
+
);
|
|
522
|
+
}
|
|
523
|
+
for (const a of found) {
|
|
524
|
+
if (a.summary.length > 0) {
|
|
525
|
+
summary.push(`${a.name}: ${a.summary[0]}`);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
return summary;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// src/control-plane/workflow.ts
|
|
532
|
+
var COMMAND_CONFIDENCE_THRESHOLD = 0.75;
|
|
533
|
+
var INTENT_CONFIDENCE_THRESHOLD = 0.5;
|
|
534
|
+
function buildWorkflow(options) {
|
|
535
|
+
const isCommand = options.mode === "command";
|
|
536
|
+
const threshold = isCommand ? COMMAND_CONFIDENCE_THRESHOLD : INTENT_CONFIDENCE_THRESHOLD;
|
|
537
|
+
const steps = [
|
|
538
|
+
{
|
|
539
|
+
name: "fetch",
|
|
540
|
+
required: true,
|
|
541
|
+
execute: async (ctx) => {
|
|
542
|
+
const result = await fetchFiling(ctx.options);
|
|
543
|
+
ctx.rawContent = result.content;
|
|
544
|
+
ctx.contentType = result.contentType;
|
|
545
|
+
}
|
|
546
|
+
},
|
|
547
|
+
{
|
|
548
|
+
name: "extract",
|
|
549
|
+
required: true,
|
|
550
|
+
execute: async (ctx) => {
|
|
551
|
+
ctx.extractedText = await extractText(ctx.rawContent, ctx.contentType);
|
|
552
|
+
if (ctx.extractedText.length < 1e3) {
|
|
553
|
+
throw new Error(
|
|
554
|
+
`Extracted text too short (${ctx.extractedText.length} chars). Filing may be malformed.`
|
|
555
|
+
);
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
},
|
|
559
|
+
{
|
|
560
|
+
name: "locate_sections",
|
|
561
|
+
required: true,
|
|
562
|
+
execute: async (ctx) => {
|
|
563
|
+
ctx.sections = locateSections(ctx.rawContent, ctx.extractedText, ctx.contentType);
|
|
564
|
+
}
|
|
565
|
+
},
|
|
566
|
+
{
|
|
567
|
+
name: "validate",
|
|
568
|
+
required: isCommand,
|
|
569
|
+
execute: async (ctx) => {
|
|
570
|
+
validateSections(ctx.sections, ctx.options.require, threshold, isCommand);
|
|
571
|
+
}
|
|
572
|
+
},
|
|
573
|
+
{
|
|
574
|
+
name: "generate",
|
|
575
|
+
required: isCommand,
|
|
576
|
+
execute: async (ctx) => {
|
|
577
|
+
const result = generateAnalysis(ctx.sections, ctx.options.require);
|
|
578
|
+
ctx.analyses = result.sections;
|
|
579
|
+
ctx.overallSummary = result.overallSummary;
|
|
580
|
+
}
|
|
581
|
+
},
|
|
582
|
+
{
|
|
583
|
+
name: "emit_ledger",
|
|
584
|
+
required: true,
|
|
585
|
+
execute: async () => {
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
];
|
|
589
|
+
return { steps, skippedSteps: [], confidenceThreshold: threshold };
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
export {
|
|
593
|
+
buildWorkflow
|
|
594
|
+
};
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
buildWorkflow
|
|
4
|
+
} from "./chunk-AXVYBLOA.js";
|
|
5
|
+
|
|
6
|
+
// src/index.ts
|
|
7
|
+
import "dotenv/config";
|
|
8
|
+
|
|
9
|
+
// src/cli/commands.ts
|
|
10
|
+
import { Command } from "commander";
|
|
11
|
+
|
|
12
|
+
// src/utils/id.ts
|
|
13
|
+
import { randomBytes } from "crypto";
|
|
14
|
+
function generateInvocationId() {
|
|
15
|
+
const now = /* @__PURE__ */ new Date();
|
|
16
|
+
const date = now.toISOString().slice(0, 10).replace(/-/g, "");
|
|
17
|
+
const rand = randomBytes(3).toString("hex");
|
|
18
|
+
return `inv_${date}_${rand}`;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// src/control-plane/orchestrator.ts
|
|
22
|
+
import { mkdir, writeFile } from "fs/promises";
|
|
23
|
+
import { join } from "path";
|
|
24
|
+
|
|
25
|
+
// src/utils/timer.ts
|
|
26
|
+
var StepTimer = class {
|
|
27
|
+
start = 0n;
|
|
28
|
+
begin() {
|
|
29
|
+
this.start = process.hrtime.bigint();
|
|
30
|
+
}
|
|
31
|
+
elapsed() {
|
|
32
|
+
const end = process.hrtime.bigint();
|
|
33
|
+
return Number((end - this.start) / 1000000n);
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
// src/ledger/ledger.ts
|
|
38
|
+
var ALL_REQUIRED_STEPS = [
|
|
39
|
+
"fetch",
|
|
40
|
+
"extract",
|
|
41
|
+
"locate_sections",
|
|
42
|
+
"validate",
|
|
43
|
+
"generate",
|
|
44
|
+
"emit_ledger"
|
|
45
|
+
];
|
|
46
|
+
function buildLedger(ctx, plan, passed, failureReason) {
|
|
47
|
+
const durationsMs = {};
|
|
48
|
+
const executedSteps = [];
|
|
49
|
+
const skippedSteps = [...plan.skippedSteps];
|
|
50
|
+
for (const result of ctx.stepResults) {
|
|
51
|
+
durationsMs[result.name] = result.durationMs;
|
|
52
|
+
if (result.status === "passed" || result.status === "failed") {
|
|
53
|
+
executedSteps.push(result.name);
|
|
54
|
+
} else if (result.status === "skipped" && !skippedSteps.includes(result.name)) {
|
|
55
|
+
skippedSteps.push(result.name);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const sectionValidation = {};
|
|
59
|
+
for (const section of ctx.sections) {
|
|
60
|
+
sectionValidation[section.name] = {
|
|
61
|
+
found: section.found,
|
|
62
|
+
confidence: section.confidence,
|
|
63
|
+
lengthChars: section.lengthChars
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
return {
|
|
67
|
+
invocationId: ctx.options.invocationId,
|
|
68
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
69
|
+
mode: ctx.options.mode,
|
|
70
|
+
deterministic: ctx.options.mode === "command",
|
|
71
|
+
workflow: "analyze_10k_v1",
|
|
72
|
+
requiredSteps: ALL_REQUIRED_STEPS,
|
|
73
|
+
executedSteps,
|
|
74
|
+
skippedSteps,
|
|
75
|
+
durationsMs,
|
|
76
|
+
sectionValidation,
|
|
77
|
+
passed,
|
|
78
|
+
failureReason
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// src/control-plane/orchestrator.ts
|
|
83
|
+
async function runWorkflow(options, intentPlan) {
|
|
84
|
+
const ctx = {
|
|
85
|
+
options,
|
|
86
|
+
rawContent: "",
|
|
87
|
+
contentType: "html",
|
|
88
|
+
extractedText: "",
|
|
89
|
+
sections: [],
|
|
90
|
+
analyses: [],
|
|
91
|
+
stepResults: [],
|
|
92
|
+
overallSummary: []
|
|
93
|
+
};
|
|
94
|
+
const plan = intentPlan ?? buildWorkflow(options);
|
|
95
|
+
const skippedSet = new Set(plan.skippedSteps);
|
|
96
|
+
const timer = new StepTimer();
|
|
97
|
+
console.log(
|
|
98
|
+
`
|
|
99
|
+
[secaudit] mode=${options.mode} workflow=analyze_10k_v1 id=${options.invocationId}`
|
|
100
|
+
);
|
|
101
|
+
console.log(
|
|
102
|
+
`[secaudit] ticker=${options.ticker} year=${options.year} strict=${options.strict}
|
|
103
|
+
`
|
|
104
|
+
);
|
|
105
|
+
for (const step of plan.steps) {
|
|
106
|
+
if (skippedSet.has(step.name)) {
|
|
107
|
+
ctx.stepResults.push({
|
|
108
|
+
name: step.name,
|
|
109
|
+
status: "skipped",
|
|
110
|
+
durationMs: 0
|
|
111
|
+
});
|
|
112
|
+
console.log(` [skip] ${step.name}`);
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
timer.begin();
|
|
116
|
+
console.log(` [run] ${step.name}...`);
|
|
117
|
+
try {
|
|
118
|
+
await step.execute(ctx);
|
|
119
|
+
const duration = timer.elapsed();
|
|
120
|
+
ctx.stepResults.push({
|
|
121
|
+
name: step.name,
|
|
122
|
+
status: "passed",
|
|
123
|
+
durationMs: duration
|
|
124
|
+
});
|
|
125
|
+
console.log(` [pass] ${step.name} (${duration}ms)`);
|
|
126
|
+
} catch (err) {
|
|
127
|
+
const duration = timer.elapsed();
|
|
128
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
129
|
+
ctx.stepResults.push({
|
|
130
|
+
name: step.name,
|
|
131
|
+
status: "failed",
|
|
132
|
+
durationMs: duration,
|
|
133
|
+
error: message
|
|
134
|
+
});
|
|
135
|
+
console.error(` [FAIL] ${step.name}: ${message}`);
|
|
136
|
+
if (options.mode === "command" && step.required) {
|
|
137
|
+
await emitOutputs(ctx, plan, false, message);
|
|
138
|
+
console.error(
|
|
139
|
+
`
|
|
140
|
+
[secaudit] FATAL: required step "${step.name}" failed in command mode`
|
|
141
|
+
);
|
|
142
|
+
printRemediation(step.name, message);
|
|
143
|
+
process.exit(2);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
await emitOutputs(ctx, plan, true);
|
|
148
|
+
console.log(`
|
|
149
|
+
[secaudit] done. Output written to ${options.out}/`);
|
|
150
|
+
}
|
|
151
|
+
async function emitOutputs(ctx, plan, passed, failureReason) {
|
|
152
|
+
const { options } = ctx;
|
|
153
|
+
await mkdir(options.out, { recursive: true });
|
|
154
|
+
const ledger = buildLedger(ctx, plan, passed, failureReason);
|
|
155
|
+
const ledgerPath = join(options.out, `${options.invocationId}-ledger.json`);
|
|
156
|
+
await writeFile(ledgerPath, JSON.stringify(ledger, null, 2));
|
|
157
|
+
if (passed) {
|
|
158
|
+
const analysis = buildAnalysisOutput(ctx);
|
|
159
|
+
const ext = options.format === "md" ? "md" : "json";
|
|
160
|
+
const analysisPath = join(options.out, `${options.invocationId}-analysis.${ext}`);
|
|
161
|
+
if (options.format === "md") {
|
|
162
|
+
await writeFile(analysisPath, renderMarkdown(analysis));
|
|
163
|
+
} else {
|
|
164
|
+
await writeFile(analysisPath, JSON.stringify(analysis, null, 2));
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
function buildAnalysisOutput(ctx) {
|
|
169
|
+
return {
|
|
170
|
+
invocationId: ctx.options.invocationId,
|
|
171
|
+
mode: ctx.options.mode,
|
|
172
|
+
workflow: "analyze_10k_v1",
|
|
173
|
+
input: { ticker: ctx.options.ticker, year: ctx.options.year },
|
|
174
|
+
sections: ctx.analyses,
|
|
175
|
+
overallSummary: ctx.overallSummary
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
function renderMarkdown(analysis) {
|
|
179
|
+
const lines = [
|
|
180
|
+
`# 10-K Analysis: ${analysis.input.ticker} (${analysis.input.year})`,
|
|
181
|
+
"",
|
|
182
|
+
`**Mode:** ${analysis.mode}`,
|
|
183
|
+
`**Workflow:** ${analysis.workflow}`,
|
|
184
|
+
`**Invocation ID:** ${analysis.invocationId}`,
|
|
185
|
+
""
|
|
186
|
+
];
|
|
187
|
+
for (const section of analysis.sections) {
|
|
188
|
+
lines.push(`## ${formatSectionName(section.name)}`);
|
|
189
|
+
lines.push("");
|
|
190
|
+
lines.push(`**Found:** ${section.found} | **Confidence:** ${section.confidence}`);
|
|
191
|
+
lines.push("");
|
|
192
|
+
if (section.summary.length > 0) {
|
|
193
|
+
lines.push("### Summary");
|
|
194
|
+
for (const s of section.summary) {
|
|
195
|
+
lines.push(`- ${s}`);
|
|
196
|
+
}
|
|
197
|
+
lines.push("");
|
|
198
|
+
}
|
|
199
|
+
if (section.evidence.length > 0) {
|
|
200
|
+
lines.push("### Evidence");
|
|
201
|
+
for (const e of section.evidence) {
|
|
202
|
+
lines.push(`> ${e}`);
|
|
203
|
+
}
|
|
204
|
+
lines.push("");
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
if (analysis.overallSummary.length > 0) {
|
|
208
|
+
lines.push("## Overall Summary");
|
|
209
|
+
lines.push("");
|
|
210
|
+
for (const s of analysis.overallSummary) {
|
|
211
|
+
lines.push(`- ${s}`);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return lines.join("\n");
|
|
215
|
+
}
|
|
216
|
+
function formatSectionName(name) {
|
|
217
|
+
const map = {
|
|
218
|
+
risk_factors: "Risk Factors (Item 1A)",
|
|
219
|
+
mdna: "Management's Discussion & Analysis (Item 7)",
|
|
220
|
+
financials: "Financial Statements (Item 8)"
|
|
221
|
+
};
|
|
222
|
+
return map[name] ?? name;
|
|
223
|
+
}
|
|
224
|
+
function printRemediation(stepName, error) {
|
|
225
|
+
console.error("\n[secaudit] Remediation suggestions:");
|
|
226
|
+
if (stepName === "fetch") {
|
|
227
|
+
console.error(" - Try --source url --url <direct-filing-url>");
|
|
228
|
+
console.error(" - Check ticker spelling and year availability");
|
|
229
|
+
} else if (stepName === "extract") {
|
|
230
|
+
console.error(" - The filing format may be unsupported");
|
|
231
|
+
console.error(" - Try a different --source or provide --url to an HTML filing");
|
|
232
|
+
} else if (stepName === "validate") {
|
|
233
|
+
console.error(" - Try --no-strict to lower confidence thresholds");
|
|
234
|
+
console.error(" - Try a different filing year");
|
|
235
|
+
console.error(` - Details: ${error}`);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// src/intent-router/patterns.ts
|
|
240
|
+
var TICKER_PATTERNS = [
|
|
241
|
+
[/\bAAPL\b/i, "AAPL"],
|
|
242
|
+
[/\bapple\b/i, "AAPL"],
|
|
243
|
+
[/\bGOOG(?:L)?\b/i, "GOOGL"],
|
|
244
|
+
[/\bgoogle\b/i, "GOOGL"],
|
|
245
|
+
[/\balphabet\b/i, "GOOGL"],
|
|
246
|
+
[/\bMSFT\b/i, "MSFT"],
|
|
247
|
+
[/\bmicrosoft\b/i, "MSFT"],
|
|
248
|
+
[/\bAMZN\b/i, "AMZN"],
|
|
249
|
+
[/\bamazon\b/i, "AMZN"],
|
|
250
|
+
[/\bTSLA\b/i, "TSLA"],
|
|
251
|
+
[/\btesla\b/i, "TSLA"],
|
|
252
|
+
[/\bMETA\b/i, "META"],
|
|
253
|
+
[/\bmeta\b/i, "META"],
|
|
254
|
+
[/\bfacebook\b/i, "META"],
|
|
255
|
+
[/\bNVDA\b/i, "NVDA"],
|
|
256
|
+
[/\bnvidia\b/i, "NVDA"]
|
|
257
|
+
];
|
|
258
|
+
var GENERIC_TICKER = /\b([A-Z]{1,5})\b/;
|
|
259
|
+
function extractTicker(text) {
|
|
260
|
+
for (const [pattern, ticker] of TICKER_PATTERNS) {
|
|
261
|
+
if (pattern.test(text)) return ticker;
|
|
262
|
+
}
|
|
263
|
+
const match = GENERIC_TICKER.exec(text);
|
|
264
|
+
if (match) {
|
|
265
|
+
const candidate = match[1];
|
|
266
|
+
const stopWords = /* @__PURE__ */ new Set([
|
|
267
|
+
"THE",
|
|
268
|
+
"AND",
|
|
269
|
+
"FOR",
|
|
270
|
+
"ARE",
|
|
271
|
+
"BUT",
|
|
272
|
+
"NOT",
|
|
273
|
+
"YOU",
|
|
274
|
+
"ALL",
|
|
275
|
+
"CAN",
|
|
276
|
+
"HER",
|
|
277
|
+
"WAS",
|
|
278
|
+
"ONE",
|
|
279
|
+
"OUR",
|
|
280
|
+
"OUT",
|
|
281
|
+
"SEC",
|
|
282
|
+
"PDF"
|
|
283
|
+
]);
|
|
284
|
+
if (!stopWords.has(candidate) && candidate.length >= 2) {
|
|
285
|
+
return candidate;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
return null;
|
|
289
|
+
}
|
|
290
|
+
function extractYear(text) {
|
|
291
|
+
const yearPattern = /\b(20[1-3]\d)\b/g;
|
|
292
|
+
let match;
|
|
293
|
+
const years = [];
|
|
294
|
+
while ((match = yearPattern.exec(text)) !== null) {
|
|
295
|
+
years.push(parseInt(match[1], 10));
|
|
296
|
+
}
|
|
297
|
+
if (years.length === 0) return null;
|
|
298
|
+
years.sort((a, b) => b - a);
|
|
299
|
+
return years[0];
|
|
300
|
+
}
|
|
301
|
+
function extractIntentSignals(text) {
|
|
302
|
+
const lower = text.toLowerCase();
|
|
303
|
+
return {
|
|
304
|
+
wantsRiskFactors: lower.includes("risk") || lower.includes("item 1a"),
|
|
305
|
+
wantsMdna: lower.includes("md&a") || lower.includes("discussion") || lower.includes("management") || lower.includes("item 7"),
|
|
306
|
+
wantsFinancials: lower.includes("financial") || lower.includes("revenue") || lower.includes("earnings") || lower.includes("item 8") || lower.includes("balance sheet"),
|
|
307
|
+
isVague: !lower.includes("risk") && !lower.includes("financial") && !lower.includes("md&a") && !lower.includes("discussion") && !lower.includes("item")
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// src/intent-router/router.ts
|
|
312
|
+
function routeIntent(text, overrides) {
|
|
313
|
+
const ticker = overrides.ticker ?? extractTicker(text);
|
|
314
|
+
const year = overrides.year ?? extractYear(text);
|
|
315
|
+
if (!ticker) {
|
|
316
|
+
throw new Error(
|
|
317
|
+
'Could not determine ticker from intent. Try: secaudit intent "analyze AAPL 10-K 2023", or provide --ticker explicitly.'
|
|
318
|
+
);
|
|
319
|
+
}
|
|
320
|
+
if (!year) {
|
|
321
|
+
throw new Error(
|
|
322
|
+
'Could not determine year from intent. Try including a year like "2023" or provide --year explicitly.'
|
|
323
|
+
);
|
|
324
|
+
}
|
|
325
|
+
const signals = extractIntentSignals(text);
|
|
326
|
+
const requiredSections = resolveRequiredSections(signals);
|
|
327
|
+
const stubOptions = {
|
|
328
|
+
ticker,
|
|
329
|
+
year,
|
|
330
|
+
mode: "intent",
|
|
331
|
+
format: "json",
|
|
332
|
+
out: "./out",
|
|
333
|
+
source: "sec",
|
|
334
|
+
require: requiredSections,
|
|
335
|
+
strict: false,
|
|
336
|
+
cache: true,
|
|
337
|
+
invocationId: ""
|
|
338
|
+
};
|
|
339
|
+
const plan = buildWorkflow(stubOptions);
|
|
340
|
+
const skipped = computeProbabilisticSkips(signals);
|
|
341
|
+
plan.skippedSteps = skipped;
|
|
342
|
+
console.log(` [intent] Router: heuristic (keyword-based)`);
|
|
343
|
+
console.log(` [intent] Resolved: ticker=${ticker} year=${year}`);
|
|
344
|
+
console.log(` [intent] Sections: ${requiredSections.join(", ")}`);
|
|
345
|
+
if (skipped.length > 0) {
|
|
346
|
+
console.log(` [intent] Probabilistic skips: ${skipped.join(", ")}`);
|
|
347
|
+
}
|
|
348
|
+
return { ticker, year, requiredSections, plan };
|
|
349
|
+
}
|
|
350
|
+
function resolveRequiredSections(signals) {
|
|
351
|
+
if (signals.isVague) {
|
|
352
|
+
return ["risk-factors", "mdna", "financials"];
|
|
353
|
+
}
|
|
354
|
+
const sections = [];
|
|
355
|
+
if (signals.wantsRiskFactors) sections.push("risk-factors");
|
|
356
|
+
if (signals.wantsMdna) sections.push("mdna");
|
|
357
|
+
if (signals.wantsFinancials) sections.push("financials");
|
|
358
|
+
return sections.length > 0 ? sections : ["risk-factors", "mdna", "financials"];
|
|
359
|
+
}
|
|
360
|
+
function computeProbabilisticSkips(signals) {
|
|
361
|
+
const skips = [];
|
|
362
|
+
if (Math.random() < 0.5) {
|
|
363
|
+
skips.push("validate");
|
|
364
|
+
}
|
|
365
|
+
if (signals.isVague && Math.random() < 0.4) {
|
|
366
|
+
skips.push("locate_sections");
|
|
367
|
+
}
|
|
368
|
+
if (Math.random() < 0.2) {
|
|
369
|
+
skips.push("generate");
|
|
370
|
+
}
|
|
371
|
+
return skips;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// src/cli/commands.ts
|
|
375
|
+
var DEFAULT_REQUIRE = ["risk-factors", "mdna", "financials"];
|
|
376
|
+
function buildCli() {
|
|
377
|
+
const program2 = new Command();
|
|
378
|
+
program2.name("secaudit").description(
|
|
379
|
+
"Deterministic 10-K filing analyzer.\n\nDemonstrates command-driven (deterministic) vs intent-based (probabilistic) invocation.\nProcesses public SEC 10-K filings and produces structured analysis with an audit ledger."
|
|
380
|
+
).version("0.1.0");
|
|
381
|
+
program2.command("analyze-10k").description("Analyze a 10-K filing with full workflow enforcement").requiredOption("--ticker <string>", "Company ticker symbol (e.g., AAPL)").requiredOption("--year <number>", "Filing year", parseInt).option("--mode <mode>", "Invocation mode: command or intent", "command").option("--format <format>", "Output format: json or md", "json").option("--out <path>", "Output directory", "./out").option("--source <source>", "Fetch source: sec, edgar-archive, or url", "sec").option("--url <string>", "Direct filing URL (overrides ticker/year lookup)").option("--require <list>", "Required sections (comma-separated)", DEFAULT_REQUIRE.join(",")).option("--no-strict", "Disable strict parsing thresholds").option("--no-cache", "Disable document caching").option("--invocation-id <string>", "Explicit invocation ID").action(async (opts) => {
|
|
382
|
+
const options = {
|
|
383
|
+
ticker: opts.ticker.toUpperCase(),
|
|
384
|
+
year: opts.year,
|
|
385
|
+
mode: opts.mode,
|
|
386
|
+
format: opts.format,
|
|
387
|
+
out: opts.out,
|
|
388
|
+
source: opts.source,
|
|
389
|
+
url: opts.url,
|
|
390
|
+
require: opts.require.split(",").map((s) => s.trim()),
|
|
391
|
+
strict: opts.strict !== false,
|
|
392
|
+
cache: opts.cache !== false,
|
|
393
|
+
invocationId: opts.invocationId || generateInvocationId()
|
|
394
|
+
};
|
|
395
|
+
await runWorkflow(options);
|
|
396
|
+
});
|
|
397
|
+
program2.command("intent").description("Analyze a filing via natural language intent (probabilistic mode)").argument("<text>", 'Natural language request (e.g., "analyze apple 10-k 2023 risks")').option("--llm", "Use OpenAI GPT to route intent (requires OPENAI_API_KEY)").option("--model <model>", "OpenAI model to use with --llm (default: gpt-4o-mini)", "gpt-4o-mini").option("--format <format>", "Output format: json or md", "json").option("--out <path>", "Output directory", "./out").option("--ticker <string>", "Optional ticker override").option("--year <number>", "Optional year override", parseInt).option("--no-cache", "Disable document caching").option("--invocation-id <string>", "Explicit invocation ID").action(async (text, opts) => {
|
|
398
|
+
const overrides = {
|
|
399
|
+
ticker: opts.ticker?.toUpperCase(),
|
|
400
|
+
year: opts.year
|
|
401
|
+
};
|
|
402
|
+
let resolved;
|
|
403
|
+
if (opts.llm) {
|
|
404
|
+
const { routeIntentWithLlm } = await import("./llm-router-JZRXUHBF.js");
|
|
405
|
+
resolved = await routeIntentWithLlm(text, overrides, opts.model);
|
|
406
|
+
} else {
|
|
407
|
+
resolved = routeIntent(text, overrides);
|
|
408
|
+
}
|
|
409
|
+
const options = {
|
|
410
|
+
ticker: resolved.ticker,
|
|
411
|
+
year: resolved.year,
|
|
412
|
+
mode: "intent",
|
|
413
|
+
format: opts.format,
|
|
414
|
+
out: opts.out,
|
|
415
|
+
source: "sec",
|
|
416
|
+
require: resolved.requiredSections,
|
|
417
|
+
strict: false,
|
|
418
|
+
cache: opts.cache !== false,
|
|
419
|
+
invocationId: opts.invocationId || generateInvocationId()
|
|
420
|
+
};
|
|
421
|
+
await runWorkflow(options, resolved.plan);
|
|
422
|
+
});
|
|
423
|
+
return program2;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// src/index.ts
|
|
427
|
+
var program = buildCli();
|
|
428
|
+
program.parseAsync(process.argv).catch((err) => {
|
|
429
|
+
console.error(`secaudit: ${err.message}`);
|
|
430
|
+
process.exit(1);
|
|
431
|
+
});
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import {
|
|
2
|
+
buildWorkflow
|
|
3
|
+
} from "./chunk-AXVYBLOA.js";
|
|
4
|
+
|
|
5
|
+
// src/intent-router/llm-router.ts
|
|
6
|
+
import OpenAI from "openai";
|
|
7
|
+
var ALL_STEPS = [
|
|
8
|
+
"fetch",
|
|
9
|
+
"extract",
|
|
10
|
+
"locate_sections",
|
|
11
|
+
"validate",
|
|
12
|
+
"generate",
|
|
13
|
+
"emit_ledger"
|
|
14
|
+
];
|
|
15
|
+
var SYSTEM_PROMPT = `You are a workflow planner for a SEC 10-K filing analyzer.
|
|
16
|
+
|
|
17
|
+
Given a user's natural language request, decide which workflow steps to execute.
|
|
18
|
+
|
|
19
|
+
Available steps:
|
|
20
|
+
- fetch: Download the 10-K filing from SEC EDGAR
|
|
21
|
+
- extract: Parse the HTML/PDF document into text
|
|
22
|
+
- locate_sections: Find required sections (Risk Factors, MD&A, Financial Statements)
|
|
23
|
+
- validate: Verify that all required sections were found with sufficient confidence
|
|
24
|
+
- generate: Produce extractive summaries for each section
|
|
25
|
+
- emit_ledger: Write an audit record of what ran
|
|
26
|
+
|
|
27
|
+
Respond with ONLY a JSON object in this exact format:
|
|
28
|
+
{
|
|
29
|
+
"ticker": "AAPL",
|
|
30
|
+
"year": 2023,
|
|
31
|
+
"steps": ["fetch", "extract", ...],
|
|
32
|
+
"sections": ["risk-factors", "mdna", "financials"],
|
|
33
|
+
"reasoning": "brief explanation of your choices"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
IMPORTANT: Only include steps you believe are necessary to fulfill the user's request.
|
|
37
|
+
If the user only asks about risks, you may not need financials.
|
|
38
|
+
If the request seems straightforward, you may skip validation.
|
|
39
|
+
Use your judgment \u2014 include only what's needed.`;
|
|
40
|
+
var DEFAULT_MODEL = "gpt-4o-mini";
|
|
41
|
+
async function routeIntentWithLlm(text, overrides, model) {
|
|
42
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
43
|
+
if (!apiKey) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
"OPENAI_API_KEY environment variable is required for --llm mode.\nSet it with: export OPENAI_API_KEY=sk-..."
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
const selectedModel = model ?? DEFAULT_MODEL;
|
|
49
|
+
const client = new OpenAI({ apiKey });
|
|
50
|
+
console.log(` [llm] Sending intent to ${selectedModel}...`);
|
|
51
|
+
const response = await client.chat.completions.create({
|
|
52
|
+
model: selectedModel,
|
|
53
|
+
temperature: 0.7,
|
|
54
|
+
messages: [
|
|
55
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
56
|
+
{ role: "user", content: text }
|
|
57
|
+
],
|
|
58
|
+
response_format: { type: "json_object" }
|
|
59
|
+
});
|
|
60
|
+
const raw = response.choices[0]?.message?.content;
|
|
61
|
+
if (!raw) {
|
|
62
|
+
throw new Error("LLM returned empty response");
|
|
63
|
+
}
|
|
64
|
+
let parsed;
|
|
65
|
+
try {
|
|
66
|
+
parsed = JSON.parse(raw);
|
|
67
|
+
} catch {
|
|
68
|
+
throw new Error(`LLM returned invalid JSON: ${raw.slice(0, 200)}`);
|
|
69
|
+
}
|
|
70
|
+
const ticker = overrides.ticker ?? parsed.ticker;
|
|
71
|
+
const year = overrides.year ?? parsed.year;
|
|
72
|
+
if (!ticker) throw new Error("LLM could not determine ticker from intent");
|
|
73
|
+
if (!year) throw new Error("LLM could not determine year from intent");
|
|
74
|
+
const llmSteps = new Set(parsed.steps ?? []);
|
|
75
|
+
const skippedSteps = ALL_STEPS.filter((s) => !llmSteps.has(s));
|
|
76
|
+
const sections = parsed.sections?.length > 0 ? parsed.sections : ["risk-factors", "mdna", "financials"];
|
|
77
|
+
const stubOptions = {
|
|
78
|
+
ticker,
|
|
79
|
+
year,
|
|
80
|
+
mode: "intent",
|
|
81
|
+
format: "json",
|
|
82
|
+
out: "./out",
|
|
83
|
+
source: "sec",
|
|
84
|
+
require: sections,
|
|
85
|
+
strict: false,
|
|
86
|
+
cache: true,
|
|
87
|
+
invocationId: ""
|
|
88
|
+
};
|
|
89
|
+
const plan = buildWorkflow(stubOptions);
|
|
90
|
+
plan.skippedSteps = skippedSteps;
|
|
91
|
+
console.log(` [llm] Model: ${selectedModel}`);
|
|
92
|
+
console.log(` [llm] Resolved: ticker=${ticker} year=${year}`);
|
|
93
|
+
console.log(` [llm] Steps chosen: ${parsed.steps?.join(", ") ?? "(none)"}`);
|
|
94
|
+
console.log(` [llm] Sections: ${sections.join(", ")}`);
|
|
95
|
+
if (skippedSteps.length > 0) {
|
|
96
|
+
console.log(` [llm] Skipped by LLM: ${skippedSteps.join(", ")}`);
|
|
97
|
+
}
|
|
98
|
+
console.log(` [llm] Reasoning: ${parsed.reasoning}`);
|
|
99
|
+
return { ticker, year, requiredSections: sections, plan, reasoning: parsed.reasoning };
|
|
100
|
+
}
|
|
101
|
+
export {
|
|
102
|
+
routeIntentWithLlm
|
|
103
|
+
};
|
package/package.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "secaudit",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Deterministic 10-K filing analyzer — command-driven vs intent-based invocation. Proves that intent-based invocation is probabilistic; command-driven invocation is deterministic and auditable.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"secaudit": "./dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"main": "./dist/index.js",
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"exports": {
|
|
12
|
+
".": {
|
|
13
|
+
"import": "./dist/index.js",
|
|
14
|
+
"types": "./dist/index.d.ts"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist",
|
|
19
|
+
"README.md",
|
|
20
|
+
"LICENSE"
|
|
21
|
+
],
|
|
22
|
+
"scripts": {
|
|
23
|
+
"build": "tsup src/index.ts --format esm --dts --clean",
|
|
24
|
+
"dev": "tsx src/index.ts",
|
|
25
|
+
"test": "vitest run",
|
|
26
|
+
"test:watch": "vitest",
|
|
27
|
+
"lint": "eslint src/",
|
|
28
|
+
"typecheck": "tsc --noEmit",
|
|
29
|
+
"prepublishOnly": "npm run typecheck && npm test && npm run build"
|
|
30
|
+
},
|
|
31
|
+
"engines": {
|
|
32
|
+
"node": ">=20.0.0"
|
|
33
|
+
},
|
|
34
|
+
"repository": {
|
|
35
|
+
"type": "git",
|
|
36
|
+
"url": "https://github.com/simonouyang/secaudit"
|
|
37
|
+
},
|
|
38
|
+
"author": "Simon Ouyang",
|
|
39
|
+
"license": "MIT",
|
|
40
|
+
"keywords": [
|
|
41
|
+
"sec",
|
|
42
|
+
"10-k",
|
|
43
|
+
"edgar",
|
|
44
|
+
"cli",
|
|
45
|
+
"deterministic",
|
|
46
|
+
"audit",
|
|
47
|
+
"intent",
|
|
48
|
+
"command-pattern",
|
|
49
|
+
"workflow",
|
|
50
|
+
"agent",
|
|
51
|
+
"llm"
|
|
52
|
+
],
|
|
53
|
+
"dependencies": {
|
|
54
|
+
"cheerio": "^1.0.0",
|
|
55
|
+
"commander": "^12.1.0",
|
|
56
|
+
"dotenv": "^17.3.1",
|
|
57
|
+
"openai": "^6.22.0",
|
|
58
|
+
"pdfjs-dist": "^4.9.155"
|
|
59
|
+
},
|
|
60
|
+
"devDependencies": {
|
|
61
|
+
"@types/node": "^20.14.0",
|
|
62
|
+
"tsup": "^8.3.0",
|
|
63
|
+
"tsx": "^4.19.0",
|
|
64
|
+
"typescript": "^5.6.0",
|
|
65
|
+
"vitest": "^2.1.0"
|
|
66
|
+
}
|
|
67
|
+
}
|