datamule 0.418__cp311-cp311-macosx_10_9_universal2.whl → 0.420__cp311-cp311-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamule might be problematic. Click here for more details.
- datamule/parser/sgml_parsing/sgml_parser_cy.cpython-311-darwin.so +0 -0
- datamule/portfolio.py +12 -7
- datamule/submission.py +7 -1
- {datamule-0.418.dist-info → datamule-0.420.dist-info}/METADATA +3 -3
- {datamule-0.418.dist-info → datamule-0.420.dist-info}/RECORD +7 -8
- datamule/parser/document_parsing/company_concepts_parser.py +0 -0
- {datamule-0.418.dist-info → datamule-0.420.dist-info}/WHEEL +0 -0
- {datamule-0.418.dist-info → datamule-0.420.dist-info}/top_level.txt +0 -0
|
Binary file
|
datamule/portfolio.py
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
from tqdm import tqdm
|
|
3
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
2
4
|
from .submission import Submission
|
|
3
5
|
|
|
4
6
|
class Portfolio:
|
|
5
7
|
def __init__(self, path):
|
|
6
8
|
self.path = Path(path)
|
|
7
|
-
self.
|
|
8
|
-
|
|
9
|
-
# Load
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
folders = [f for f in self.path.iterdir() if f.is_dir()]
|
|
10
|
+
print(f"Loading {len(folders)} submissions")
|
|
11
|
+
# Load submissions in parallel
|
|
12
|
+
with ProcessPoolExecutor() as executor:
|
|
13
|
+
# Show progress while loading
|
|
14
|
+
self.submissions = list(tqdm(
|
|
15
|
+
executor.map(Submission, folders),
|
|
16
|
+
total=len(folders),
|
|
17
|
+
desc="Loading submissions"
|
|
18
|
+
))
|
|
14
19
|
|
|
15
20
|
def __iter__(self):
|
|
16
21
|
return iter(self.submissions)
|
datamule/submission.py
CHANGED
|
@@ -51,8 +51,14 @@ class Submission:
|
|
|
51
51
|
filepath.unlink()
|
|
52
52
|
|
|
53
53
|
def document_type(self, document_type):
|
|
54
|
+
# Convert single document type to list for consistent handling
|
|
55
|
+
if isinstance(document_type, str):
|
|
56
|
+
document_types = [document_type]
|
|
57
|
+
else:
|
|
58
|
+
document_types = document_type
|
|
59
|
+
|
|
54
60
|
for doc in self.metadata['documents']:
|
|
55
|
-
if doc['TYPE']
|
|
61
|
+
if doc['TYPE'] in document_types:
|
|
56
62
|
filename = doc.get('FILENAME')
|
|
57
63
|
if filename is None:
|
|
58
64
|
continue
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datamule
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.420
|
|
4
4
|
Summary: Making it easier to use SEC filings.
|
|
5
5
|
Home-page: https://github.com/john-friedman/datamule-python
|
|
6
6
|
Author: John Friedman
|
|
@@ -16,11 +16,11 @@ Requires-Dist: selectolax
|
|
|
16
16
|
Requires-Dist: pytz
|
|
17
17
|
Requires-Dist: zstandard
|
|
18
18
|
Provides-Extra: all
|
|
19
|
+
Requires-Dist: flask; extra == "all"
|
|
19
20
|
Requires-Dist: psutil; extra == "all"
|
|
20
|
-
Requires-Dist: pandas; extra == "all"
|
|
21
21
|
Requires-Dist: openai; extra == "all"
|
|
22
22
|
Requires-Dist: google-generativeai; extra == "all"
|
|
23
|
-
Requires-Dist:
|
|
23
|
+
Requires-Dist: pandas; extra == "all"
|
|
24
24
|
Provides-Extra: dataset_builder
|
|
25
25
|
Requires-Dist: pandas; extra == "dataset-builder"
|
|
26
26
|
Requires-Dist: google-generativeai; extra == "dataset-builder"
|
|
@@ -3,8 +3,8 @@ datamule/document.py,sha256=Yn8UqUjKwYPE29MrMjreHK_HY9eTqOSjPyM5B1VBrHQ,5144
|
|
|
3
3
|
datamule/helper.py,sha256=tr3AQWus9dHNZFKpLSglWjcb8zmm5qDXjOWACMhvMxQ,4594
|
|
4
4
|
datamule/monitor.py,sha256=mRaM8v5NgcMF9DJ1s_YBzucjrbr-3yFwW422MVml-_Q,9114
|
|
5
5
|
datamule/packageupdater.py,sha256=rw4hTnQGjZttee8QmTadg9vlhEjVp_dwdSFv8uQNvss,9584
|
|
6
|
-
datamule/portfolio.py,sha256=
|
|
7
|
-
datamule/submission.py,sha256=
|
|
6
|
+
datamule/portfolio.py,sha256=Qd_j3gGUi95Ad0g-PpjfJ3ozg-92VohMvCOvcEb0Kbw,710
|
|
7
|
+
datamule/submission.py,sha256=LlwHJL3CIMHOlaHqCfDH7UBiKCFu0tPYV-g92uPvfcY,2492
|
|
8
8
|
datamule/data/company_former_names.csv,sha256=HE9cAv-_QKFX6jT-_-D0rHmaDyQuAzL4MJwank5O1U8,706380
|
|
9
9
|
datamule/data/company_metadata.csv,sha256=yPovrCVjYwLWTU_hBUFJymp8iNO0NBYuq_QwOkRLoN8,3068599
|
|
10
10
|
datamule/data/company_tickers.csv,sha256=GW6lOP54RiGJCx-d9N5jEBy7tGVgU3zI-5xHJXrZfSI,400363
|
|
@@ -36,7 +36,6 @@ datamule/parser/document_parsing/basic_10q_parser.py,sha256=ccQc3pwBqevDb6-vBwEE
|
|
|
36
36
|
datamule/parser/document_parsing/basic_13d_parser.py,sha256=loJC97H_ccu_hWMhgNt5tvGZnN3--7tsqZxzBnWB_FY,1528
|
|
37
37
|
datamule/parser/document_parsing/basic_13g_parser.py,sha256=sWg83-QTAzUDNs45iWtpxnMxQgtC3zJlFj0R9ybZpNI,1631
|
|
38
38
|
datamule/parser/document_parsing/basic_8k_parser.py,sha256=inCSmlH_BkLK0Lkvt0kZ6EUJ0nijul_RkdXzccyOmRI,2466
|
|
39
|
-
datamule/parser/document_parsing/company_concepts_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
39
|
datamule/parser/document_parsing/form_d_parser.py,sha256=dWlGeVZRzh0kfT3gVMC8eyqeQORdVV3r8KXUwEqAW3s,2036
|
|
41
40
|
datamule/parser/document_parsing/generalized_item_parser.py,sha256=67_DFb1BQbMmdHefEgoCPlEoiUT0zyxh3eBNJpjGXUk,2616
|
|
42
41
|
datamule/parser/document_parsing/generalized_xml_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -48,8 +47,8 @@ datamule/parser/document_parsing/n_port_p_parser.py,sha256=GmmQFkCZt57WikUZ5Daht
|
|
|
48
47
|
datamule/parser/document_parsing/sec_parser.py,sha256=AS8H4h1sfUAdWP2gotULcjbylsYN_nHgTfkeVRyENPo,2716
|
|
49
48
|
datamule/parser/document_parsing/sgml_parser.py,sha256=tC1cL3cdVQPWbc9QtoRUYSo2wRuYNaglFaCmP57oEfA,3317
|
|
50
49
|
datamule/parser/sgml_parsing/sgml_parser_cy.c,sha256=UwXSuLGSBLKfO5bM7xTzjFLnGDV3-NNnCjgUCPAY1gk,796689
|
|
51
|
-
datamule/parser/sgml_parsing/sgml_parser_cy.cpython-311-darwin.so,sha256=
|
|
52
|
-
datamule-0.
|
|
53
|
-
datamule-0.
|
|
54
|
-
datamule-0.
|
|
55
|
-
datamule-0.
|
|
50
|
+
datamule/parser/sgml_parsing/sgml_parser_cy.cpython-311-darwin.so,sha256=hds9ZxW6dUjdyxCvqlPmuDbgRpR5Q8xGJ53aeb7seiE,362376
|
|
51
|
+
datamule-0.420.dist-info/METADATA,sha256=yl7VYR4JZIqpbkLIUYZXM4DrFs1NpKdvaffNIVYg2xo,1007
|
|
52
|
+
datamule-0.420.dist-info/WHEEL,sha256=vrdZjlh4exCGuOeHFVGYgW_9eYEDoByDCp4vVTWiG0w,115
|
|
53
|
+
datamule-0.420.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
|
54
|
+
datamule-0.420.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|