datamule 0.422__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/__init__.py +71 -0
- datamule/data/company_former_names.csv +8148 -0
- datamule/data/company_metadata.csv +10049 -0
- datamule/data/company_tickers.csv +9999 -0
- datamule/data/sec-glossary.csv +728 -0
- datamule/data/xbrl_descriptions.csv +10024 -0
- datamule/dataset_builder/dataset_builder.py +259 -0
- datamule/document.py +130 -0
- datamule/downloader/downloader.py +364 -0
- datamule/downloader/premiumdownloader.py +332 -0
- datamule/helper.py +123 -0
- datamule/monitor.py +236 -0
- datamule/mulebot/__init__.py +1 -0
- datamule/mulebot/helper.py +35 -0
- datamule/mulebot/mulebot.py +130 -0
- datamule/mulebot/mulebot_server/__init__.py +1 -0
- datamule/mulebot/mulebot_server/server.py +87 -0
- datamule/mulebot/mulebot_server/static/css/minimalist.css +174 -0
- datamule/mulebot/mulebot_server/static/scripts/artifacts.js +68 -0
- datamule/mulebot/mulebot_server/static/scripts/chat.js +92 -0
- datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +56 -0
- datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +15 -0
- datamule/mulebot/mulebot_server/static/scripts/main.js +57 -0
- datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +27 -0
- datamule/mulebot/mulebot_server/static/scripts/suggestions.js +47 -0
- datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +129 -0
- datamule/mulebot/mulebot_server/static/scripts/utils.js +28 -0
- datamule/mulebot/mulebot_server/templates/chat-minimalist.html +91 -0
- datamule/mulebot/search.py +52 -0
- datamule/mulebot/tools.py +82 -0
- datamule/packageupdater.py +207 -0
- datamule/parser/document_parsing/basic_10k_parser.py +82 -0
- datamule/parser/document_parsing/basic_10q_parser.py +73 -0
- datamule/parser/document_parsing/basic_13d_parser.py +58 -0
- datamule/parser/document_parsing/basic_13g_parser.py +61 -0
- datamule/parser/document_parsing/basic_8k_parser.py +84 -0
- datamule/parser/document_parsing/form_d_parser.py +70 -0
- datamule/parser/document_parsing/generalized_item_parser.py +78 -0
- datamule/parser/document_parsing/generalized_xml_parser.py +0 -0
- datamule/parser/document_parsing/helper.py +75 -0
- datamule/parser/document_parsing/information_table_parser_13fhr.py +41 -0
- datamule/parser/document_parsing/insider_trading_parser.py +158 -0
- datamule/parser/document_parsing/mappings.py +95 -0
- datamule/parser/document_parsing/n_port_p_parser.py +70 -0
- datamule/parser/document_parsing/sec_parser.py +73 -0
- datamule/parser/document_parsing/sgml_parser.py +94 -0
- datamule/parser/sgml_parsing/sgml_parser_cy.c +19082 -0
- datamule/parser/sgml_parsing/sgml_parser_cy.cpython-312-x86_64-linux-gnu.so +0 -0
- datamule/portfolio.py +21 -0
- datamule/submission.py +67 -0
- datamule-0.422.dist-info/METADATA +31 -0
- datamule-0.422.dist-info/RECORD +54 -0
- datamule-0.422.dist-info/WHEEL +6 -0
- datamule-0.422.dist-info/top_level.txt +1 -0
datamule/portfolio.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from tqdm import tqdm
|
|
3
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
4
|
+
from .submission import Submission
|
|
5
|
+
|
|
6
|
+
class Portfolio:
|
|
7
|
+
def __init__(self, path):
|
|
8
|
+
self.path = Path(path)
|
|
9
|
+
folders = [f for f in self.path.iterdir() if f.is_dir()]
|
|
10
|
+
print(f"Loading {len(folders)} submissions")
|
|
11
|
+
# Load submissions in parallel
|
|
12
|
+
with ProcessPoolExecutor() as executor:
|
|
13
|
+
# Show progress while loading
|
|
14
|
+
self.submissions = list(tqdm(
|
|
15
|
+
executor.map(Submission, folders),
|
|
16
|
+
total=len(folders),
|
|
17
|
+
desc="Loading submissions"
|
|
18
|
+
))
|
|
19
|
+
|
|
20
|
+
def __iter__(self):
|
|
21
|
+
return iter(self.submissions)
|
datamule/submission.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
import json
|
|
3
|
+
from .document import Document
|
|
4
|
+
|
|
5
|
+
class Submission:
|
|
6
|
+
def __init__(self, path):
|
|
7
|
+
self.path = Path(path)
|
|
8
|
+
self._load_metadata()
|
|
9
|
+
|
|
10
|
+
def _load_metadata(self):
|
|
11
|
+
metadata_path = self.path / 'metadata.json'
|
|
12
|
+
with metadata_path.open('r') as f:
|
|
13
|
+
self.metadata = json.load(f)
|
|
14
|
+
|
|
15
|
+
def keep(self, document_types):
|
|
16
|
+
"""Keep files of specified document types, delete others
|
|
17
|
+
Args:
|
|
18
|
+
document_types: string or list of strings representing document types to keep
|
|
19
|
+
"""
|
|
20
|
+
# Convert single string to list for consistent handling
|
|
21
|
+
if isinstance(document_types, str):
|
|
22
|
+
document_types = [document_types]
|
|
23
|
+
|
|
24
|
+
for doc in self.metadata['documents']:
|
|
25
|
+
filename = doc.get('FILENAME')
|
|
26
|
+
if filename is None:
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
filepath = self.path / filename
|
|
30
|
+
# Delete if document type isn't in our keep list
|
|
31
|
+
if doc['TYPE'] not in document_types and filepath.exists():
|
|
32
|
+
filepath.unlink()
|
|
33
|
+
|
|
34
|
+
def drop(self, document_types):
|
|
35
|
+
"""Delete files of specified document types, keep others
|
|
36
|
+
Args:
|
|
37
|
+
document_types: string or list of strings representing document types to drop
|
|
38
|
+
"""
|
|
39
|
+
# Convert single string to list for consistent handling
|
|
40
|
+
if isinstance(document_types, str):
|
|
41
|
+
document_types = [document_types]
|
|
42
|
+
|
|
43
|
+
for doc in self.metadata['documents']:
|
|
44
|
+
filename = doc.get('FILENAME')
|
|
45
|
+
if filename is None:
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
filepath = self.path / filename
|
|
49
|
+
# Delete if document type is in our drop list
|
|
50
|
+
if doc['TYPE'] in document_types and filepath.exists():
|
|
51
|
+
filepath.unlink()
|
|
52
|
+
|
|
53
|
+
def document_type(self, document_type):
|
|
54
|
+
# Convert single document type to list for consistent handling
|
|
55
|
+
if isinstance(document_type, str):
|
|
56
|
+
document_types = [document_type]
|
|
57
|
+
else:
|
|
58
|
+
document_types = document_type
|
|
59
|
+
|
|
60
|
+
for doc in self.metadata['documents']:
|
|
61
|
+
if doc['TYPE'] in document_types:
|
|
62
|
+
filename = doc.get('FILENAME')
|
|
63
|
+
if filename is None:
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
document_path = self.path / filename
|
|
67
|
+
yield Document(doc['TYPE'], document_path)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: datamule
|
|
3
|
+
Version: 0.422
|
|
4
|
+
Summary: Making it easier to use SEC filings.
|
|
5
|
+
Home-page: https://github.com/john-friedman/datamule-python
|
|
6
|
+
Author: John Friedman
|
|
7
|
+
Requires-Dist: aiohttp
|
|
8
|
+
Requires-Dist: aiolimiter
|
|
9
|
+
Requires-Dist: tqdm
|
|
10
|
+
Requires-Dist: requests
|
|
11
|
+
Requires-Dist: nest_asyncio
|
|
12
|
+
Requires-Dist: aiofiles
|
|
13
|
+
Requires-Dist: polars
|
|
14
|
+
Requires-Dist: setuptools
|
|
15
|
+
Requires-Dist: selectolax
|
|
16
|
+
Requires-Dist: pytz
|
|
17
|
+
Requires-Dist: zstandard
|
|
18
|
+
Provides-Extra: mulebot
|
|
19
|
+
Requires-Dist: openai; extra == "mulebot"
|
|
20
|
+
Provides-Extra: mulebot-server
|
|
21
|
+
Requires-Dist: flask; extra == "mulebot-server"
|
|
22
|
+
Provides-Extra: dataset-builder
|
|
23
|
+
Requires-Dist: pandas; extra == "dataset-builder"
|
|
24
|
+
Requires-Dist: google-generativeai; extra == "dataset-builder"
|
|
25
|
+
Requires-Dist: psutil; extra == "dataset-builder"
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: pandas; extra == "all"
|
|
28
|
+
Requires-Dist: flask; extra == "all"
|
|
29
|
+
Requires-Dist: psutil; extra == "all"
|
|
30
|
+
Requires-Dist: google-generativeai; extra == "all"
|
|
31
|
+
Requires-Dist: openai; extra == "all"
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
datamule-0.422.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
|
|
2
|
+
datamule-0.422.dist-info/RECORD,,
|
|
3
|
+
datamule-0.422.dist-info/METADATA,sha256=BNN1-R7bd2pJwi9p-Zl9uo8K2Hy2qEEsM4U7RYOBrL4,1006
|
|
4
|
+
datamule-0.422.dist-info/WHEEL,sha256=tRzqFuK6eFjpbf2xTNvU7E3xL2y00S_NWJvyqxej3BA,151
|
|
5
|
+
datamule/packageupdater.py,sha256=rw4hTnQGjZttee8QmTadg9vlhEjVp_dwdSFv8uQNvss,9584
|
|
6
|
+
datamule/portfolio.py,sha256=Qd_j3gGUi95Ad0g-PpjfJ3ozg-92VohMvCOvcEb0Kbw,710
|
|
7
|
+
datamule/monitor.py,sha256=mRaM8v5NgcMF9DJ1s_YBzucjrbr-3yFwW422MVml-_Q,9114
|
|
8
|
+
datamule/helper.py,sha256=tr3AQWus9dHNZFKpLSglWjcb8zmm5qDXjOWACMhvMxQ,4594
|
|
9
|
+
datamule/__init__.py,sha256=i3HnWFrqEdsK2OmvE7NUjcO05w1BuIYBV6J4cSFaN3s,2268
|
|
10
|
+
datamule/document.py,sha256=Yn8UqUjKwYPE29MrMjreHK_HY9eTqOSjPyM5B1VBrHQ,5144
|
|
11
|
+
datamule/submission.py,sha256=LlwHJL3CIMHOlaHqCfDH7UBiKCFu0tPYV-g92uPvfcY,2492
|
|
12
|
+
datamule/data/xbrl_descriptions.csv,sha256=SQ9wUURNqG424rnTiZtopsxV2q-PvU4NMj52LqgDsvg,2621524
|
|
13
|
+
datamule/data/sec-glossary.csv,sha256=-cN7GjiadLw5C1sv4zSeCnfeZZDYeSgJl-0ydarMAo0,251209
|
|
14
|
+
datamule/data/company_former_names.csv,sha256=HE9cAv-_QKFX6jT-_-D0rHmaDyQuAzL4MJwank5O1U8,706380
|
|
15
|
+
datamule/data/company_tickers.csv,sha256=GW6lOP54RiGJCx-d9N5jEBy7tGVgU3zI-5xHJXrZfSI,400363
|
|
16
|
+
datamule/data/company_metadata.csv,sha256=yPovrCVjYwLWTU_hBUFJymp8iNO0NBYuq_QwOkRLoN8,3068599
|
|
17
|
+
datamule/downloader/downloader.py,sha256=XJF0FfoCB43_22lhPakq9dI-oJ_SsXIvNx4PUVmcEOc,14509
|
|
18
|
+
datamule/downloader/premiumdownloader.py,sha256=JH4aZ-ZwARCIACKwgzSgHAuOkKPc_GnhiUHSSu22XO4,14206
|
|
19
|
+
datamule/mulebot/tools.py,sha256=ctnGc2HItR-Roi-QXkc7GEaAOEYQiFRtfmdmIxNxYXk,2940
|
|
20
|
+
datamule/mulebot/mulebot.py,sha256=XbtgvXBSFu9OaaLW_k1KDgHVTNQGV8_0ZwNMFad-pPU,5837
|
|
21
|
+
datamule/mulebot/helper.py,sha256=olztOwltfELZ-IERM2bRNLBavD04kfB6ueWTisJAleA,1080
|
|
22
|
+
datamule/mulebot/search.py,sha256=mwvbB6Fex5dEQkfxkCL53ne5pXdVno-5KlZ5vZyGnJQ,2073
|
|
23
|
+
datamule/mulebot/__init__.py,sha256=YvZXV6xQ0iP-oGD8rloufjdwJL6D46P3NNr0CY9PQCA,29
|
|
24
|
+
datamule/mulebot/mulebot_server/server.py,sha256=M7kU4aZUoi8X8DUKZNckLewBiI637Krbeap31qD2jt8,3547
|
|
25
|
+
datamule/mulebot/mulebot_server/__init__.py,sha256=x1QhXys7BWxi2g9_ZHUYA6S6rL3VL2718x4rYtGaaIg,33
|
|
26
|
+
datamule/mulebot/mulebot_server/static/scripts/chat.js,sha256=q8vV_KtzuNCXCfXqavM1HROIkYHItOAmaR8P1OjSqa0,3108
|
|
27
|
+
datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js,sha256=mGhAXQnjnSxYqVqg1mE5g_ev0-aDhh849xunQtRchnY,1093
|
|
28
|
+
datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js,sha256=wxeIM2RzF6Zh_9ivnYuNyTzIgIcEz0-zX8gTCvyACJo,2034
|
|
29
|
+
datamule/mulebot/mulebot_server/static/scripts/utils.js,sha256=oGPMtyT9dvuqHqrfZj33t4vLZiF8UJrMXB1hpPXRNu4,1255
|
|
30
|
+
datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js,sha256=UtkUpLvELNI4Ibpb7VstgVA9Tk-8jbkxXhmXsgufFa4,4437
|
|
31
|
+
datamule/mulebot/mulebot_server/static/scripts/suggestions.js,sha256=TCyz8OYuXeIG9qNRgwU2fhz18YNXpy4Bl9mk66lXefo,1795
|
|
32
|
+
datamule/mulebot/mulebot_server/static/scripts/artifacts.js,sha256=WUAoI3LtEBEt3x-Ri0gwd6YT0JtGNwDZ_b8tuhWWSsg,2258
|
|
33
|
+
datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js,sha256=DZFLe-45mmzWvJPO1be5Ivfqx0BInrXfduQ1IhbHWzk,429
|
|
34
|
+
datamule/mulebot/mulebot_server/static/scripts/main.js,sha256=NEIVih1WJeQ-qo5k8hnmgFHd7N839Mr6hJur856oXVQ,1882
|
|
35
|
+
datamule/mulebot/mulebot_server/static/css/minimalist.css,sha256=Tz1tz8oF_esbfCvLTJBmTfb-5MIiqjfhU_4A4nto1mo,2974
|
|
36
|
+
datamule/mulebot/mulebot_server/templates/chat-minimalist.html,sha256=MsTbgpnLD0JCQiKKP3XeeNJRNsRqKsRa1j_XXW7nBKw,6975
|
|
37
|
+
datamule/dataset_builder/dataset_builder.py,sha256=NCvNbDwlEkA_eAbqbsG--YlqPBDREFTVSM1GJquR0RE,9747
|
|
38
|
+
datamule/parser/sgml_parsing/sgml_parser_cy.cpython-312-x86_64-linux-gnu.so,sha256=E4E2GqXhKwQOZ_8jdMgDynvdNtqRqU9rNN2EZzV2djw,968320
|
|
39
|
+
datamule/parser/sgml_parsing/sgml_parser_cy.c,sha256=UwXSuLGSBLKfO5bM7xTzjFLnGDV3-NNnCjgUCPAY1gk,796689
|
|
40
|
+
datamule/parser/document_parsing/generalized_item_parser.py,sha256=67_DFb1BQbMmdHefEgoCPlEoiUT0zyxh3eBNJpjGXUk,2616
|
|
41
|
+
datamule/parser/document_parsing/information_table_parser_13fhr.py,sha256=R4Up1oDx3xAlzHwXzVzUkdOSsk8YPuJBPS_3I_bNQSE,1767
|
|
42
|
+
datamule/parser/document_parsing/sgml_parser.py,sha256=tC1cL3cdVQPWbc9QtoRUYSo2wRuYNaglFaCmP57oEfA,3317
|
|
43
|
+
datamule/parser/document_parsing/mappings.py,sha256=VKdnT3C5yPTbB4ZBa4El4jnB-6_osomm2rbJx6Ac6HE,5286
|
|
44
|
+
datamule/parser/document_parsing/n_port_p_parser.py,sha256=GmmQFkCZt57WikUZ5DahtTYMhhk0VcfkhOJusM4Tkow,2224
|
|
45
|
+
datamule/parser/document_parsing/helper.py,sha256=QPhVxLxMSx6Qdi7sR4D4iPObGoTnVD3tXTCNWzNxStg,2533
|
|
46
|
+
datamule/parser/document_parsing/basic_13d_parser.py,sha256=loJC97H_ccu_hWMhgNt5tvGZnN3--7tsqZxzBnWB_FY,1528
|
|
47
|
+
datamule/parser/document_parsing/basic_13g_parser.py,sha256=sWg83-QTAzUDNs45iWtpxnMxQgtC3zJlFj0R9ybZpNI,1631
|
|
48
|
+
datamule/parser/document_parsing/basic_10k_parser.py,sha256=-_q0X9K4KyLccF6j_zNp7FknGPBW1r4U3AT9bPjQUgA,3056
|
|
49
|
+
datamule/parser/document_parsing/basic_8k_parser.py,sha256=inCSmlH_BkLK0Lkvt0kZ6EUJ0nijul_RkdXzccyOmRI,2466
|
|
50
|
+
datamule/parser/document_parsing/sec_parser.py,sha256=AS8H4h1sfUAdWP2gotULcjbylsYN_nHgTfkeVRyENPo,2716
|
|
51
|
+
datamule/parser/document_parsing/generalized_xml_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
+
datamule/parser/document_parsing/insider_trading_parser.py,sha256=OVQDeLcfaZtgmOWvWPDotftO6jxx-doFAqBYVqNgypo,7106
|
|
53
|
+
datamule/parser/document_parsing/form_d_parser.py,sha256=dWlGeVZRzh0kfT3gVMC8eyqeQORdVV3r8KXUwEqAW3s,2036
|
|
54
|
+
datamule/parser/document_parsing/basic_10q_parser.py,sha256=ccQc3pwBqevDb6-vBwEE5RTZwRcnrSxjRxZEk_zPO-s,2623
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
datamule
|