datamule 1.0.7__tar.gz → 1.0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datamule-1.0.7 → datamule-1.0.9}/PKG-INFO +1 -1
- datamule-1.0.9/datamule/book/__init__.py +0 -0
- datamule-1.0.9/datamule/book/book.py +34 -0
- datamule-1.0.9/datamule/mapping_dicts/__init__.py +0 -0
- datamule-1.0.9/datamule/mapping_dicts/txt_mapping_dicts.py +234 -0
- datamule-1.0.9/datamule/mapping_dicts/xml_mapping_dicts.py +19 -0
- datamule-1.0.9/datamule/sec/__init__.py +0 -0
- datamule-1.0.9/datamule/sec/infrastructure/__init__.py +0 -0
- datamule-1.0.9/datamule/sec/infrastructure/submissions_metadata.py +386 -0
- datamule-1.0.9/datamule/sec/rss/__init__.py +0 -0
- datamule-1.0.9/datamule/sec/rss/monitor.py +416 -0
- datamule-1.0.9/datamule/sec/submissions/__init__.py +0 -0
- datamule-1.0.9/datamule/sec/submissions/downloader.py +70 -0
- datamule-1.0.9/datamule/sec/submissions/eftsquery.py +502 -0
- datamule-1.0.9/datamule/sec/submissions/monitor.py +126 -0
- datamule-1.0.9/datamule/sec/submissions/streamer.py +228 -0
- datamule-1.0.9/datamule/sec/submissions/textsearch.py +122 -0
- datamule-1.0.9/datamule/sec/utils.py +64 -0
- datamule-1.0.9/datamule/sec/xbrl/__init__.py +0 -0
- datamule-1.0.9/datamule/sec/xbrl/downloadcompanyfacts.py +83 -0
- datamule-1.0.9/datamule/sec/xbrl/filter_xbrl.py +39 -0
- datamule-1.0.9/datamule/sec/xbrl/streamcompanyfacts.py +93 -0
- datamule-1.0.9/datamule/sec/xbrl/xbrlmonitor.py +143 -0
- datamule-1.0.9/datamule/seclibrary/__init__.py +0 -0
- datamule-1.0.9/datamule/seclibrary/downloader.py +286 -0
- datamule-1.0.9/datamule/seclibrary/query.py +181 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule.egg-info/PKG-INFO +1 -1
- datamule-1.0.9/datamule.egg-info/SOURCES.txt +37 -0
- {datamule-1.0.7 → datamule-1.0.9}/setup.py +3 -3
- datamule-1.0.7/datamule.egg-info/SOURCES.txt +0 -12
- {datamule-1.0.7 → datamule-1.0.9}/datamule/__init__.py +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule/config.py +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule/document.py +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule/helper.py +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule/portfolio.py +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule/submission.py +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule.egg-info/dependency_links.txt +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule.egg-info/requires.txt +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/datamule.egg-info/top_level.txt +0 -0
- {datamule-1.0.7 → datamule-1.0.9}/setup.cfg +0 -0
File without changes
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
from ..helper import _process_cik_and_metadata_filters, load_package_dataset
|
3
|
+
from ..sec.xbrl.downloadcompanyfacts import download_company_facts
|
4
|
+
|
5
|
+
class Book:
|
6
|
+
def __init__(self, path):
|
7
|
+
self.path = Path(path)
|
8
|
+
|
9
|
+
def download_xbrl(
|
10
|
+
self,
|
11
|
+
cik=None,
|
12
|
+
ticker=None,
|
13
|
+
**kwargs
|
14
|
+
):
|
15
|
+
# If no CIK or ticker specified, get all companies with tickers
|
16
|
+
if cik is None and ticker is None:
|
17
|
+
cik = [row['cik'] for row in load_package_dataset('company_tickers')]
|
18
|
+
|
19
|
+
# Normalize cik to list format
|
20
|
+
if isinstance(cik, (str, int)):
|
21
|
+
cik = [cik]
|
22
|
+
|
23
|
+
# Process CIK and metadata filters
|
24
|
+
cik_list = _process_cik_and_metadata_filters(cik, ticker, **kwargs)
|
25
|
+
|
26
|
+
# Download facts for all CIKs in parallel
|
27
|
+
download_company_facts(cik=cik_list, output_dir=self.path)
|
28
|
+
|
29
|
+
def query_345():
|
30
|
+
pass
|
31
|
+
def query_xbrl():
|
32
|
+
pass
|
33
|
+
def query_13fhr():
|
34
|
+
pass
|
File without changes
|
@@ -0,0 +1,234 @@
|
|
1
|
+
import copy
|
2
|
+
|
3
|
+
dict_sgml = {
|
4
|
+
"rules": {
|
5
|
+
"join_text": "\n",
|
6
|
+
"remove": [
|
7
|
+
{
|
8
|
+
"pattern": r"^<PAGE>",
|
9
|
+
}
|
10
|
+
],
|
11
|
+
"mappings": [
|
12
|
+
{
|
13
|
+
"name": "table",
|
14
|
+
"pattern": r"^<TABLE>",
|
15
|
+
"end": r"^</TABLE>"
|
16
|
+
},
|
17
|
+
{
|
18
|
+
"name": "caption",
|
19
|
+
"pattern": r"^<CAPTION>",
|
20
|
+
"end": r"^<S>",
|
21
|
+
"keep_end": True
|
22
|
+
},
|
23
|
+
{
|
24
|
+
"name": "footnote",
|
25
|
+
"pattern": r"^<FN>",
|
26
|
+
"end": r"^</FN>"
|
27
|
+
}
|
28
|
+
]
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
|
33
|
+
item_pattern_mapping_8k = r"^\n\n\s*(ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
|
34
|
+
part_pattern_mapping = r"^\n\n\s*(PART|Part)\s+(?:I{1,3}|IV)\.?"
|
35
|
+
|
36
|
+
item_pattern_standardization = r"^\s*(?:ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
|
37
|
+
item_pattern_standardization_8k = r"^\s*(?:ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN)\.?"
|
38
|
+
part_pattern_standardization = r"^\s*(?:PART|Part)\s+([IVX]+)"
|
39
|
+
|
40
|
+
|
41
|
+
dict_10k = copy.deepcopy(dict_sgml)
|
42
|
+
dict_10k["rules"]["mappings"].extend([
|
43
|
+
{
|
44
|
+
"type": "hierarchy",
|
45
|
+
"name": "part",
|
46
|
+
"pattern": part_pattern_mapping,
|
47
|
+
"hierarchy": 0
|
48
|
+
},
|
49
|
+
{
|
50
|
+
"type": "hierarchy",
|
51
|
+
"name": "item",
|
52
|
+
"pattern": item_pattern_mapping,
|
53
|
+
"hierarchy": 1
|
54
|
+
},
|
55
|
+
])
|
56
|
+
|
57
|
+
# In the mapping dict:
|
58
|
+
dict_10k['transformations'] = [
|
59
|
+
{
|
60
|
+
"type": "standardize",
|
61
|
+
"match": {
|
62
|
+
"type": "part",
|
63
|
+
"text_pattern": part_pattern_standardization
|
64
|
+
},
|
65
|
+
"output": {
|
66
|
+
"format": "part{}",
|
67
|
+
"field": "text" # Where to store the standardized value
|
68
|
+
}
|
69
|
+
},
|
70
|
+
{
|
71
|
+
"type": "standardize",
|
72
|
+
"match": {
|
73
|
+
"type": "item",
|
74
|
+
"text_pattern": item_pattern_standardization
|
75
|
+
},
|
76
|
+
"output": {
|
77
|
+
"format": "item{}",
|
78
|
+
"field": "text" # Could also be "text" or any other field name
|
79
|
+
}
|
80
|
+
},
|
81
|
+
{
|
82
|
+
"type": "merge_consecutive",
|
83
|
+
"match": {
|
84
|
+
"types": ["part", "item"] # sections types to check for merging
|
85
|
+
}
|
86
|
+
},
|
87
|
+
{
|
88
|
+
"type": "trim",
|
89
|
+
"match": {
|
90
|
+
"type": "item", # or "item"
|
91
|
+
"expected": 1
|
92
|
+
},
|
93
|
+
"output": {
|
94
|
+
"type": "introduction",
|
95
|
+
"separator": "\n"
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
99
|
+
]
|
100
|
+
|
101
|
+
dict_10q = copy.deepcopy(dict_sgml)
|
102
|
+
dict_10q["rules"]["mappings"].extend([
|
103
|
+
{
|
104
|
+
"type": "hierarchy",
|
105
|
+
"name": "part",
|
106
|
+
"pattern": part_pattern_mapping,
|
107
|
+
"hierarchy": 0
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"type": "hierarchy",
|
111
|
+
"name": "item",
|
112
|
+
"pattern": item_pattern_mapping,
|
113
|
+
"hierarchy": 1
|
114
|
+
},
|
115
|
+
])
|
116
|
+
|
117
|
+
# In the mapping dict:
|
118
|
+
dict_10q['transformations'] = [
|
119
|
+
{
|
120
|
+
"type": "standardize",
|
121
|
+
"match": {
|
122
|
+
"type": "part",
|
123
|
+
"text_pattern": part_pattern_standardization
|
124
|
+
},
|
125
|
+
"output": {
|
126
|
+
"format": "part{}",
|
127
|
+
"field": "text" # Where to store the standardized value
|
128
|
+
}
|
129
|
+
},
|
130
|
+
{
|
131
|
+
"type": "standardize",
|
132
|
+
"match": {
|
133
|
+
"type": "item",
|
134
|
+
"text_pattern": item_pattern_standardization
|
135
|
+
},
|
136
|
+
"output": {
|
137
|
+
"format": "item{}",
|
138
|
+
"field": "text" # Could also be "text" or any other field name
|
139
|
+
}
|
140
|
+
},
|
141
|
+
{
|
142
|
+
"type": "merge_consecutive",
|
143
|
+
"match": {
|
144
|
+
"types": ["part", "item"] # sections types to check for merging
|
145
|
+
}
|
146
|
+
},
|
147
|
+
{
|
148
|
+
"type": "trim",
|
149
|
+
"match": {
|
150
|
+
"type": "item", # or "item"
|
151
|
+
"expected": 2
|
152
|
+
},
|
153
|
+
"output": {
|
154
|
+
"type": "introduction",
|
155
|
+
"separator": "\n"
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
]
|
160
|
+
|
161
|
+
dict_13d = copy.deepcopy(dict_sgml)
|
162
|
+
dict_13d["rules"]["mappings"].extend([
|
163
|
+
{
|
164
|
+
"type": "hierarchy",
|
165
|
+
"name": "item",
|
166
|
+
"pattern": item_pattern_mapping,
|
167
|
+
"hierarchy": 0
|
168
|
+
},
|
169
|
+
])
|
170
|
+
|
171
|
+
dict_13d['transformations'] = [
|
172
|
+
{
|
173
|
+
"type": "standardize",
|
174
|
+
"match": {
|
175
|
+
"type": "item",
|
176
|
+
"text_pattern": item_pattern_standardization
|
177
|
+
},
|
178
|
+
"output": {
|
179
|
+
"format": "item{}",
|
180
|
+
"field": "text" # Could also be "text" or any other field name
|
181
|
+
}
|
182
|
+
},
|
183
|
+
{
|
184
|
+
"type": "merge_consecutive",
|
185
|
+
"match": {
|
186
|
+
"types": ["item"] # sections types to check for merging
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
]
|
191
|
+
|
192
|
+
dict_13g = copy.deepcopy(dict_13d)
|
193
|
+
|
194
|
+
dict_8k = copy.deepcopy(dict_sgml)
|
195
|
+
dict_8k["rules"]["mappings"].extend([
|
196
|
+
{
|
197
|
+
"type": "hierarchy",
|
198
|
+
"name": "item",
|
199
|
+
"pattern": item_pattern_mapping_8k,
|
200
|
+
"hierarchy": 0
|
201
|
+
},
|
202
|
+
])
|
203
|
+
|
204
|
+
dict_8k['transformations'] = [
|
205
|
+
{
|
206
|
+
"type": "standardize",
|
207
|
+
"match": {
|
208
|
+
"type": "item",
|
209
|
+
"text_pattern": item_pattern_standardization_8k
|
210
|
+
},
|
211
|
+
"output": {
|
212
|
+
"format": "item{}",
|
213
|
+
"field": "text" # Could also be "text" or any other field name
|
214
|
+
}
|
215
|
+
},
|
216
|
+
{
|
217
|
+
"type": "merge_consecutive",
|
218
|
+
"match": {
|
219
|
+
"types": ["item"] # sections types to check for merging
|
220
|
+
}
|
221
|
+
},
|
222
|
+
{
|
223
|
+
"type": "trim",
|
224
|
+
"match": {
|
225
|
+
"type": "item", # or "item"
|
226
|
+
"expected": 1
|
227
|
+
},
|
228
|
+
"output": {
|
229
|
+
"type": "introduction",
|
230
|
+
"separator": "\n"
|
231
|
+
}
|
232
|
+
}
|
233
|
+
|
234
|
+
]
|
@@ -0,0 +1,19 @@
|
|
1
|
+
dict_345 = {
|
2
|
+
"transformations": [
|
3
|
+
{
|
4
|
+
"search": {
|
5
|
+
"key": "footnoteId",
|
6
|
+
"identifier": "@id"
|
7
|
+
},
|
8
|
+
"match": {
|
9
|
+
"identifier": "@id",
|
10
|
+
"content": "#text",
|
11
|
+
"remove_after_use": True
|
12
|
+
},
|
13
|
+
"output": {
|
14
|
+
"key": "footnote",
|
15
|
+
"value": "content"
|
16
|
+
}
|
17
|
+
}
|
18
|
+
]
|
19
|
+
}
|
File without changes
|
File without changes
|