sec-engine 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(ls -la /Users/taika/Documents/Github/Python/sec-engine/.git*)"
5
+ ]
6
+ }
7
+ }
@@ -0,0 +1,24 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ *.egg
6
+ dist/
7
+ build/
8
+
9
+ # Virtual environments
10
+ .venv/
11
+ venv/
12
+
13
+ # IDE
14
+ .idea/
15
+ .vscode/
16
+ *.swp
17
+ *.swo
18
+
19
+ # OS
20
+ .DS_Store
21
+ Thumbs.db
22
+
23
+ # Environment
24
+ .env
@@ -0,0 +1,32 @@
1
+ # CLAUDE.md
2
+
3
+ ## Project
4
+
5
+ sec-engine is an MCP server that exposes SEC EDGAR financial data to LLMs. It runs over stdio and provides 6 tools for company lookup, filing navigation, XBRL metrics, and narrative text extraction.
6
+
7
+ ## Structure
8
+
9
+ ```
10
+ sec_engine/
11
+ server.py — FastMCP server, defines all 6 tools
12
+ edgar.py — SEC EDGAR API client (async httpx, rate-limited)
13
+ __main__.py — CLI entry point
14
+ __init__.py
15
+ ```
16
+
17
+ ## Build & Run
18
+
19
+ ```bash
20
+ uv pip install -e . # install in dev mode
21
+ sec-engine # run the MCP server (stdio)
22
+ ```
23
+
24
+ Build system is hatchling. Dependencies: mcp, httpx, beautifulsoup4, lxml.
25
+
26
+ ## Key Details
27
+
28
+ - Python >= 3.11 required
29
+ - SEC API has a 10 req/sec rate limit; EdgarClient handles this
30
+ - SEC requires a descriptive User-Agent header (configurable via `SEC_EDGAR_USER_AGENT` env var)
31
+ - No tests yet
32
+ - No auth needed — all SEC APIs are public
@@ -0,0 +1,8 @@
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+ COPY pyproject.toml .
5
+ COPY sec_engine/ sec_engine/
6
+ RUN pip install --no-cache-dir .
7
+
8
+ ENTRYPOINT ["sec-engine"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 taika-st
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: sec-engine
3
+ Version: 0.1.0
4
+ Summary: LLM-optimized SEC EDGAR MCP server
5
+ Author-email: taika-st <taika-st@users.noreply.github.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: beautifulsoup4>=4.12.0
10
+ Requires-Dist: httpx>=0.27.0
11
+ Requires-Dist: lxml>=5.0.0
12
+ Requires-Dist: mcp>=1.0.0
@@ -0,0 +1,83 @@
1
+ # sec-engine
2
+
3
+ LLM-optimized SEC EDGAR MCP server. Provides six tools that wrap the SEC's public JSON APIs for financial data extraction and analysis — no authentication required.
4
+
5
+ ## Tools
6
+
7
+ | Tool | Purpose |
8
+ |------|---------|
9
+ | `resolve` | Ticker/CIK to company identity (name, SIC, exchanges, fiscal year end) |
10
+ | `filings` | List SEC filings with form type and date filtering |
11
+ | `snapshot` | Latest key metrics (~20) + full XBRL concept index |
12
+ | `concept_history` | Time series for one XBRL metric across all filings |
13
+ | `cross_company` | Compare one metric across ALL filers for peer/sector screening |
14
+ | `filing_section` | Extract narrative text from 10-K/10-Q sections (MD&A, Risk Factors, etc.) |
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ pip install sec-engine
20
+ ```
21
+
22
+ Or with [uv](https://docs.astral.sh/uv/):
23
+
24
+ ```bash
25
+ uv pip install sec-engine
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ ### Claude Desktop
31
+
32
+ Add to your Claude Desktop config (`claude_desktop_config.json`):
33
+
34
+ ```json
35
+ {
36
+ "mcpServers": {
37
+ "sec-engine": {
38
+ "command": "sec-engine"
39
+ }
40
+ }
41
+ }
42
+ ```
43
+
44
+ ### Docker Compose
45
+
46
+ Edit `SEC_EDGAR_USER_AGENT` in `compose.yml` with your name and email, then:
47
+
48
+ ```bash
49
+ docker compose up --build
50
+ ```
51
+
52
+ ### Docker
53
+
54
+ ```bash
55
+ docker build -t sec-engine .
56
+ docker run -i sec-engine
57
+ ```
58
+
59
+ ### Direct
60
+
61
+ ```bash
62
+ sec-engine
63
+ ```
64
+
65
+ The server communicates over stdio using the [Model Context Protocol](https://modelcontextprotocol.io/).
66
+
67
+ ## Configuration
68
+
69
+ | Environment Variable | Default | Description |
70
+ |---------------------|---------|-------------|
71
+ | `SEC_EDGAR_USER_AGENT` | `SEC-Engine/0.1 (sec-engine-mcp)` | User-Agent string sent to SEC.gov (SEC requires a descriptive UA) |
72
+
73
+ ## Requirements
74
+
75
+ - Python >= 3.11
76
+
77
+ ## Disclaimer
78
+
79
+ This software retrieves publicly available data from the SEC EDGAR system. It is provided as-is for informational purposes only and does not constitute financial, legal, or investment advice. The authors are not responsible for how you use this data or any consequences arising from its use. You are solely responsible for complying with all applicable SEC policies, regulations, and terms of service.
80
+
81
+ ## License
82
+
83
+ MIT
@@ -0,0 +1,6 @@
1
+ services:
2
+ sec-engine:
3
+ build: .
4
+ stdin_open: true
5
+ environment:
6
+ - SEC_EDGAR_USER_AGENT=YourName your-email@example.com
@@ -0,0 +1,22 @@
1
+ [project]
2
+ name = "sec-engine"
3
+ version = "0.1.0"
4
+ description = "LLM-optimized SEC EDGAR MCP server"
5
+ license = "MIT"
6
+ authors = [
7
+ { name = "taika-st", email = "taika-st@users.noreply.github.com" },
8
+ ]
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "mcp>=1.0.0",
12
+ "httpx>=0.27.0",
13
+ "beautifulsoup4>=4.12.0",
14
+ "lxml>=5.0.0",
15
+ ]
16
+
17
+ [project.scripts]
18
+ sec-engine = "sec_engine.server:main"
19
+
20
+ [build-system]
21
+ requires = ["hatchling"]
22
+ build-backend = "hatchling.build"
@@ -0,0 +1 @@
1
+ """SEC Engine - LLM-optimized SEC EDGAR MCP server."""
@@ -0,0 +1,4 @@
1
+ from sec_engine.server import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,265 @@
1
+ """SEC EDGAR public API client.
2
+
3
+ Thin wrapper around data.sec.gov JSON APIs. No auth required — just a User-Agent string.
4
+ Rate limited to 10 req/sec per SEC policy.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import re
11
+ from typing import Any
12
+
13
+ import httpx
14
+
15
+ SEC_DATA = "https://data.sec.gov"
16
+ SEC_WWW = "https://www.sec.gov"
17
+ TICKERS_URL = f"{SEC_WWW}/files/company_tickers.json"
18
+
19
+ # Key us-gaap concepts for the snapshot tool
20
+ KEY_CONCEPTS = [
21
+ "Revenues",
22
+ "RevenueFromContractWithCustomerExcludingAssessedTax",
23
+ "NetIncomeLoss",
24
+ "OperatingIncomeLoss",
25
+ "GrossProfit",
26
+ "CostOfGoodsAndServicesSold",
27
+ "Assets",
28
+ "AssetsCurrent",
29
+ "Liabilities",
30
+ "LiabilitiesCurrent",
31
+ "StockholdersEquity",
32
+ "CashAndCashEquivalentsAtCarryingValue",
33
+ "EarningsPerShareBasic",
34
+ "EarningsPerShareDiluted",
35
+ "CommonStockSharesOutstanding",
36
+ "LongTermDebt",
37
+ "LongTermDebtNoncurrent",
38
+ "NetCashProvidedByOperatingActivities",
39
+ "PaymentsOfDividends",
40
+ "ResearchAndDevelopmentExpense",
41
+ ]
42
+
43
+ # Section aliases for filing_section tool
44
+ SECTION_ALIASES: dict[str, str] = {
45
+ "business": "1",
46
+ "risk factors": "1A",
47
+ "risk": "1A",
48
+ "cybersecurity": "1C",
49
+ "properties": "2",
50
+ "legal": "3",
51
+ "legal proceedings": "3",
52
+ "mda": "7",
53
+ "md&a": "7",
54
+ "management discussion": "7",
55
+ "management's discussion": "7",
56
+ "market risk": "7A",
57
+ "quantitative": "7A",
58
+ "financial statements": "8",
59
+ "controls": "9A",
60
+ "controls and procedures": "9A",
61
+ }
62
+
63
+ # Regex for Item headers in SEC filings
64
+ ITEM_HEADER = re.compile(
65
+ r"^[\s]*(?:ITEM|Item)\s+(\d+[A-Ca-c]?)[\.\s:]+[-\u2014\u2013]?\s*(.+?)$",
66
+ re.MULTILINE,
67
+ )
68
+
69
+
70
+ class EdgarClient:
71
+ """Async client for SEC EDGAR public APIs."""
72
+
73
+ def __init__(self, user_agent: str):
74
+ self.user_agent = user_agent
75
+ self._headers = {
76
+ "User-Agent": user_agent,
77
+ "Accept-Encoding": "gzip, deflate",
78
+ }
79
+ self._semaphore = asyncio.Semaphore(10)
80
+ self._ticker_map: dict[str, int] | None = None
81
+ self._http: httpx.AsyncClient | None = None
82
+
83
+ def _client(self) -> httpx.AsyncClient:
84
+ if self._http is None or self._http.is_closed:
85
+ self._http = httpx.AsyncClient(
86
+ headers=self._headers,
87
+ timeout=30.0,
88
+ follow_redirects=True,
89
+ )
90
+ return self._http
91
+
92
+ async def _get(self, url: str) -> httpx.Response:
93
+ async with self._semaphore:
94
+ r = await self._client().get(url)
95
+ r.raise_for_status()
96
+ return r
97
+
98
+ async def _json(self, url: str) -> dict:
99
+ return (await self._get(url)).json()
100
+
101
+ async def _text(self, url: str) -> str:
102
+ return (await self._get(url)).text
103
+
104
+ # ── Identity ──────────────────────────────────────────────────────
105
+
106
+ async def _load_tickers(self) -> dict[str, int]:
107
+ if self._ticker_map is None:
108
+ raw = await self._json(TICKERS_URL)
109
+ self._ticker_map = {
110
+ v["ticker"].upper(): int(v["cik_str"]) for v in raw.values()
111
+ }
112
+ return self._ticker_map
113
+
114
+ async def resolve_cik(self, identifier: str) -> int:
115
+ """Ticker or CIK string -> integer CIK."""
116
+ clean = identifier.strip().upper()
117
+ try:
118
+ return int(clean)
119
+ except ValueError:
120
+ tmap = await self._load_tickers()
121
+ if clean in tmap:
122
+ return tmap[clean]
123
+ raise ValueError(f"Unknown ticker: {identifier}")
124
+
125
+ @staticmethod
126
+ def pad_cik(cik: int) -> str:
127
+ return str(cik).zfill(10)
128
+
129
+ # ── SEC Data APIs ─────────────────────────────────────────────────
130
+
131
+ async def submissions(self, cik: int) -> dict:
132
+ """Filing history and company metadata."""
133
+ return await self._json(
134
+ f"{SEC_DATA}/submissions/CIK{self.pad_cik(cik)}.json"
135
+ )
136
+
137
+ async def company_facts(self, cik: int) -> dict:
138
+ """All XBRL financial facts for a company."""
139
+ return await self._json(
140
+ f"{SEC_DATA}/api/xbrl/companyfacts/CIK{self.pad_cik(cik)}.json"
141
+ )
142
+
143
+ async def company_concept(
144
+ self, cik: int, concept: str, taxonomy: str = "us-gaap"
145
+ ) -> dict:
146
+ """Time series for one XBRL concept."""
147
+ return await self._json(
148
+ f"{SEC_DATA}/api/xbrl/companyconcept/"
149
+ f"CIK{self.pad_cik(cik)}/{taxonomy}/{concept}.json"
150
+ )
151
+
152
+ async def frames(
153
+ self,
154
+ concept: str,
155
+ period: str,
156
+ currency: str = "USD",
157
+ taxonomy: str = "us-gaap",
158
+ ) -> dict:
159
+ """One concept across all companies for a period."""
160
+ return await self._json(
161
+ f"{SEC_DATA}/api/xbrl/frames/{taxonomy}/{concept}/{currency}/{period}.json"
162
+ )
163
+
164
+ async def filing_document(
165
+ self, cik: int, accession: str, filename: str
166
+ ) -> str:
167
+ """Fetch a specific document from a filing archive."""
168
+ acc_clean = accession.replace("-", "")
169
+ url = f"{SEC_WWW}/Archives/edgar/data/{cik}/{acc_clean}/{filename}"
170
+ return await self._text(url)
171
+
172
+ # ── Helpers ────────────────────────────────────────────────────────
173
+
174
+ async def find_primary_doc(self, cik: int, accession: str) -> str | None:
175
+ """Look up the primary document filename from submissions."""
176
+ subs = await self.submissions(cik)
177
+ recent = subs.get("filings", {}).get("recent", {})
178
+ accs = recent.get("accessionNumber", [])
179
+ docs = recent.get("primaryDocument", [])
180
+ for i, acc in enumerate(accs):
181
+ if acc == accession and i < len(docs):
182
+ return docs[i]
183
+ return None
184
+
185
+ def extract_snapshot(self, facts: dict) -> dict[str, Any]:
186
+ """Extract key financial metrics from companyfacts."""
187
+ us_gaap = facts.get("facts", {}).get("us-gaap", {})
188
+ metrics: dict[str, Any] = {}
189
+ for concept in KEY_CONCEPTS:
190
+ if concept not in us_gaap:
191
+ continue
192
+ units = us_gaap[concept].get("units", {})
193
+ for unit_name, datapoints in units.items():
194
+ if not datapoints:
195
+ continue
196
+ latest = max(datapoints, key=lambda d: d.get("end", ""))
197
+ metrics[concept] = {
198
+ "value": latest["val"],
199
+ "end": latest.get("end"),
200
+ "fiscal_year": latest.get("fy"),
201
+ "period": latest.get("fp"),
202
+ "form": latest.get("form"),
203
+ "filed": latest.get("filed"),
204
+ "unit": unit_name,
205
+ "label": us_gaap[concept].get("label", concept),
206
+ }
207
+ break # first unit type only
208
+ return metrics
209
+
210
+ def extract_concept_index(self, facts: dict) -> dict[str, Any]:
211
+ """Build a compact index of all available concepts."""
212
+ result: dict[str, Any] = {}
213
+ for taxonomy, concepts in facts.get("facts", {}).items():
214
+ names = sorted(concepts.keys())
215
+ result[taxonomy] = {
216
+ "count": len(names),
217
+ "concepts": names,
218
+ }
219
+ return result
220
+
221
+ def parse_filing_sections(self, html: str) -> dict[str, dict[str, str]]:
222
+ """Parse 10-K/10-Q HTML into sections by Item number."""
223
+ import warnings
224
+ from bs4 import BeautifulSoup, XMLParsedAsHTMLWarning
225
+
226
+ warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
227
+ soup = BeautifulSoup(html, "lxml")
228
+ for tag in soup(["script", "style"]):
229
+ tag.decompose()
230
+
231
+ text = soup.get_text(separator="\n")
232
+ text = text.replace("\xa0", " ")
233
+ text = re.sub(r"\n{3,}", "\n\n", text)
234
+
235
+ matches = list(ITEM_HEADER.finditer(text))
236
+ if not matches:
237
+ return {}
238
+
239
+ sections: dict[str, dict[str, str]] = {}
240
+ for i, m in enumerate(matches):
241
+ item_num = m.group(1).upper()
242
+ title = m.group(2).strip()
243
+ start = m.end()
244
+ end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
245
+ content = text[start:end].strip()
246
+
247
+ # Skip ToC entries — real sections have substantial content
248
+ if len(content) < 500:
249
+ continue
250
+
251
+ # Keep longest version (handles ToC reference + actual content)
252
+ if item_num not in sections or len(content) > len(
253
+ sections[item_num]["content"]
254
+ ):
255
+ sections[item_num] = {"title": title, "content": content}
256
+
257
+ return sections
258
+
259
+ @staticmethod
260
+ def resolve_section_key(section: str) -> str:
261
+ """Resolve section name/alias to Item number."""
262
+ clean = section.strip().lower()
263
+ if clean in SECTION_ALIASES:
264
+ return SECTION_ALIASES[clean]
265
+ return section.strip().upper()