sec2md 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sec2md might be problematic. Click here for more details.

sec2md/models.py ADDED
@@ -0,0 +1,153 @@
1
+ """Data models for SEC filing parsing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from typing import List, Optional, Literal, Tuple
8
+
9
+
10
+ # Type alias for filing types
11
+ FilingType = Literal["10-K", "10-Q"]
12
+
13
+
14
+ class Item10K(str, Enum):
15
+ """10-K Filing Items - human readable names mapped to item numbers."""
16
+
17
+ # Part I
18
+ BUSINESS = "1"
19
+ RISK_FACTORS = "1A"
20
+ UNRESOLVED_STAFF_COMMENTS = "1B"
21
+ CYBERSECURITY = "1C"
22
+ PROPERTIES = "2"
23
+ LEGAL_PROCEEDINGS = "3"
24
+ MINE_SAFETY = "4"
25
+
26
+ # Part II
27
+ MARKET_FOR_STOCK = "5"
28
+ SELECTED_FINANCIAL_DATA = "6" # Removed in recent years
29
+ MD_AND_A = "7"
30
+ MARKET_RISK = "7A"
31
+ FINANCIAL_STATEMENTS = "8"
32
+ CHANGES_IN_ACCOUNTING = "9"
33
+ CONTROLS_AND_PROCEDURES = "9A"
34
+ OTHER_INFORMATION = "9B"
35
+ CYBERSECURITY_DISCLOSURES = "9C"
36
+
37
+ # Part III
38
+ DIRECTORS_AND_OFFICERS = "10"
39
+ EXECUTIVE_COMPENSATION = "11"
40
+ SECURITY_OWNERSHIP = "12"
41
+ CERTAIN_RELATIONSHIPS = "13"
42
+ PRINCIPAL_ACCOUNTANT = "14"
43
+
44
+ # Part IV
45
+ EXHIBITS = "15"
46
+ FORM_10K_SUMMARY = "16"
47
+
48
+
49
+ class Item10Q(str, Enum):
50
+ """10-Q Filing Items - human readable names with part disambiguation."""
51
+
52
+ # Part I
53
+ FINANCIAL_STATEMENTS_P1 = "1.P1"
54
+ MD_AND_A_P1 = "2.P1"
55
+ MARKET_RISK_P1 = "3.P1"
56
+ CONTROLS_AND_PROCEDURES_P1 = "4.P1"
57
+
58
+ # Part II
59
+ LEGAL_PROCEEDINGS_P2 = "1.P2"
60
+ RISK_FACTORS_P2 = "1A.P2"
61
+ UNREGISTERED_SALES_P2 = "2.P2"
62
+ DEFAULTS_P2 = "3.P2"
63
+ MINE_SAFETY_P2 = "4.P2"
64
+ OTHER_INFORMATION_P2 = "5.P2"
65
+ EXHIBITS_P2 = "6.P2"
66
+
67
+
68
+ # Internal mappings from enum to (part, item) tuples
69
+ ITEM_10K_MAPPING: dict[Item10K, Tuple[str, str]] = {
70
+ # Part I
71
+ Item10K.BUSINESS: ("PART I", "ITEM 1"),
72
+ Item10K.RISK_FACTORS: ("PART I", "ITEM 1A"),
73
+ Item10K.UNRESOLVED_STAFF_COMMENTS: ("PART I", "ITEM 1B"),
74
+ Item10K.CYBERSECURITY: ("PART I", "ITEM 1C"),
75
+ Item10K.PROPERTIES: ("PART I", "ITEM 2"),
76
+ Item10K.LEGAL_PROCEEDINGS: ("PART I", "ITEM 3"),
77
+ Item10K.MINE_SAFETY: ("PART I", "ITEM 4"),
78
+
79
+ # Part II
80
+ Item10K.MARKET_FOR_STOCK: ("PART II", "ITEM 5"),
81
+ Item10K.SELECTED_FINANCIAL_DATA: ("PART II", "ITEM 6"),
82
+ Item10K.MD_AND_A: ("PART II", "ITEM 7"),
83
+ Item10K.MARKET_RISK: ("PART II", "ITEM 7A"),
84
+ Item10K.FINANCIAL_STATEMENTS: ("PART II", "ITEM 8"),
85
+ Item10K.CHANGES_IN_ACCOUNTING: ("PART II", "ITEM 9"),
86
+ Item10K.CONTROLS_AND_PROCEDURES: ("PART II", "ITEM 9A"),
87
+ Item10K.OTHER_INFORMATION: ("PART II", "ITEM 9B"),
88
+ Item10K.CYBERSECURITY_DISCLOSURES: ("PART II", "ITEM 9C"),
89
+
90
+ # Part III
91
+ Item10K.DIRECTORS_AND_OFFICERS: ("PART III", "ITEM 10"),
92
+ Item10K.EXECUTIVE_COMPENSATION: ("PART III", "ITEM 11"),
93
+ Item10K.SECURITY_OWNERSHIP: ("PART III", "ITEM 12"),
94
+ Item10K.CERTAIN_RELATIONSHIPS: ("PART III", "ITEM 13"),
95
+ Item10K.PRINCIPAL_ACCOUNTANT: ("PART III", "ITEM 14"),
96
+
97
+ # Part IV
98
+ Item10K.EXHIBITS: ("PART IV", "ITEM 15"),
99
+ Item10K.FORM_10K_SUMMARY: ("PART IV", "ITEM 16"),
100
+ }
101
+
102
+
103
+ ITEM_10Q_MAPPING: dict[Item10Q, Tuple[str, str]] = {
104
+ # Part I
105
+ Item10Q.FINANCIAL_STATEMENTS_P1: ("PART I", "ITEM 1"),
106
+ Item10Q.MD_AND_A_P1: ("PART I", "ITEM 2"),
107
+ Item10Q.MARKET_RISK_P1: ("PART I", "ITEM 3"),
108
+ Item10Q.CONTROLS_AND_PROCEDURES_P1: ("PART I", "ITEM 4"),
109
+
110
+ # Part II
111
+ Item10Q.LEGAL_PROCEEDINGS_P2: ("PART II", "ITEM 1"),
112
+ Item10Q.RISK_FACTORS_P2: ("PART II", "ITEM 1A"),
113
+ Item10Q.UNREGISTERED_SALES_P2: ("PART II", "ITEM 2"),
114
+ Item10Q.DEFAULTS_P2: ("PART II", "ITEM 3"),
115
+ Item10Q.MINE_SAFETY_P2: ("PART II", "ITEM 4"),
116
+ Item10Q.OTHER_INFORMATION_P2: ("PART II", "ITEM 5"),
117
+ Item10Q.EXHIBITS_P2: ("PART II", "ITEM 6"),
118
+ }
119
+
120
+
121
+ @dataclass
122
+ class Page:
123
+ """Represents a single page of markdown content."""
124
+
125
+ number: int
126
+ content: str
127
+
128
+ def __str__(self) -> str:
129
+ return self.content
130
+
131
+
132
+ @dataclass
133
+ class Section:
134
+ """Represents a filing section (e.g., ITEM 1A - Risk Factors)."""
135
+
136
+ part: Optional[str]
137
+ item: Optional[str]
138
+ item_title: Optional[str]
139
+ pages: List[Page]
140
+
141
+ def markdown(self) -> str:
142
+ """Get section content as single markdown string."""
143
+ return "\n\n".join(p.content for p in self.pages)
144
+
145
+ def __str__(self) -> str:
146
+ return self.markdown()
147
+
148
+ @property
149
+ def page_range(self) -> Tuple[int, int]:
150
+ """Get the start and end page numbers for this section."""
151
+ if not self.pages:
152
+ return (0, 0)
153
+ return (self.pages[0].number, self.pages[-1].number)