datamule 0.422__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. datamule/__init__.py +71 -0
  2. datamule/data/company_former_names.csv +8148 -0
  3. datamule/data/company_metadata.csv +10049 -0
  4. datamule/data/company_tickers.csv +9999 -0
  5. datamule/data/sec-glossary.csv +728 -0
  6. datamule/data/xbrl_descriptions.csv +10024 -0
  7. datamule/dataset_builder/dataset_builder.py +259 -0
  8. datamule/document.py +130 -0
  9. datamule/downloader/downloader.py +364 -0
  10. datamule/downloader/premiumdownloader.py +332 -0
  11. datamule/helper.py +123 -0
  12. datamule/monitor.py +236 -0
  13. datamule/mulebot/__init__.py +1 -0
  14. datamule/mulebot/helper.py +35 -0
  15. datamule/mulebot/mulebot.py +130 -0
  16. datamule/mulebot/mulebot_server/__init__.py +1 -0
  17. datamule/mulebot/mulebot_server/server.py +87 -0
  18. datamule/mulebot/mulebot_server/static/css/minimalist.css +174 -0
  19. datamule/mulebot/mulebot_server/static/scripts/artifacts.js +68 -0
  20. datamule/mulebot/mulebot_server/static/scripts/chat.js +92 -0
  21. datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +56 -0
  22. datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +15 -0
  23. datamule/mulebot/mulebot_server/static/scripts/main.js +57 -0
  24. datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +27 -0
  25. datamule/mulebot/mulebot_server/static/scripts/suggestions.js +47 -0
  26. datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +129 -0
  27. datamule/mulebot/mulebot_server/static/scripts/utils.js +28 -0
  28. datamule/mulebot/mulebot_server/templates/chat-minimalist.html +91 -0
  29. datamule/mulebot/search.py +52 -0
  30. datamule/mulebot/tools.py +82 -0
  31. datamule/packageupdater.py +207 -0
  32. datamule/parser/document_parsing/basic_10k_parser.py +82 -0
  33. datamule/parser/document_parsing/basic_10q_parser.py +73 -0
  34. datamule/parser/document_parsing/basic_13d_parser.py +58 -0
  35. datamule/parser/document_parsing/basic_13g_parser.py +61 -0
  36. datamule/parser/document_parsing/basic_8k_parser.py +84 -0
  37. datamule/parser/document_parsing/form_d_parser.py +70 -0
  38. datamule/parser/document_parsing/generalized_item_parser.py +78 -0
  39. datamule/parser/document_parsing/generalized_xml_parser.py +0 -0
  40. datamule/parser/document_parsing/helper.py +75 -0
  41. datamule/parser/document_parsing/information_table_parser_13fhr.py +41 -0
  42. datamule/parser/document_parsing/insider_trading_parser.py +158 -0
  43. datamule/parser/document_parsing/mappings.py +95 -0
  44. datamule/parser/document_parsing/n_port_p_parser.py +70 -0
  45. datamule/parser/document_parsing/sec_parser.py +73 -0
  46. datamule/parser/document_parsing/sgml_parser.py +94 -0
  47. datamule/parser/sgml_parsing/sgml_parser_cy.c +19082 -0
  48. datamule/parser/sgml_parsing/sgml_parser_cy.cpython-312-x86_64-linux-gnu.so +0 -0
  49. datamule/portfolio.py +21 -0
  50. datamule/submission.py +67 -0
  51. datamule-0.422.dist-info/METADATA +31 -0
  52. datamule-0.422.dist-info/RECORD +54 -0
  53. datamule-0.422.dist-info/WHEEL +6 -0
  54. datamule-0.422.dist-info/top_level.txt +1 -0
datamule/portfolio.py ADDED
@@ -0,0 +1,21 @@
1
+ from pathlib import Path
2
+ from tqdm import tqdm
3
+ from concurrent.futures import ProcessPoolExecutor
4
+ from .submission import Submission
5
+
6
+ class Portfolio:
7
+ def __init__(self, path):
8
+ self.path = Path(path)
9
+ folders = [f for f in self.path.iterdir() if f.is_dir()]
10
+ print(f"Loading {len(folders)} submissions")
11
+ # Load submissions in parallel
12
+ with ProcessPoolExecutor() as executor:
13
+ # Show progress while loading
14
+ self.submissions = list(tqdm(
15
+ executor.map(Submission, folders),
16
+ total=len(folders),
17
+ desc="Loading submissions"
18
+ ))
19
+
20
+ def __iter__(self):
21
+ return iter(self.submissions)
datamule/submission.py ADDED
@@ -0,0 +1,67 @@
1
+ from pathlib import Path
2
+ import json
3
+ from .document import Document
4
+
5
+ class Submission:
6
+ def __init__(self, path):
7
+ self.path = Path(path)
8
+ self._load_metadata()
9
+
10
+ def _load_metadata(self):
11
+ metadata_path = self.path / 'metadata.json'
12
+ with metadata_path.open('r') as f:
13
+ self.metadata = json.load(f)
14
+
15
+ def keep(self, document_types):
16
+ """Keep files of specified document types, delete others
17
+ Args:
18
+ document_types: string or list of strings representing document types to keep
19
+ """
20
+ # Convert single string to list for consistent handling
21
+ if isinstance(document_types, str):
22
+ document_types = [document_types]
23
+
24
+ for doc in self.metadata['documents']:
25
+ filename = doc.get('FILENAME')
26
+ if filename is None:
27
+ continue
28
+
29
+ filepath = self.path / filename
30
+ # Delete if document type isn't in our keep list
31
+ if doc['TYPE'] not in document_types and filepath.exists():
32
+ filepath.unlink()
33
+
34
+ def drop(self, document_types):
35
+ """Delete files of specified document types, keep others
36
+ Args:
37
+ document_types: string or list of strings representing document types to drop
38
+ """
39
+ # Convert single string to list for consistent handling
40
+ if isinstance(document_types, str):
41
+ document_types = [document_types]
42
+
43
+ for doc in self.metadata['documents']:
44
+ filename = doc.get('FILENAME')
45
+ if filename is None:
46
+ continue
47
+
48
+ filepath = self.path / filename
49
+ # Delete if document type is in our drop list
50
+ if doc['TYPE'] in document_types and filepath.exists():
51
+ filepath.unlink()
52
+
53
+ def document_type(self, document_type):
54
+ # Convert single document type to list for consistent handling
55
+ if isinstance(document_type, str):
56
+ document_types = [document_type]
57
+ else:
58
+ document_types = document_type
59
+
60
+ for doc in self.metadata['documents']:
61
+ if doc['TYPE'] in document_types:
62
+ filename = doc.get('FILENAME')
63
+ if filename is None:
64
+ continue
65
+
66
+ document_path = self.path / filename
67
+ yield Document(doc['TYPE'], document_path)
@@ -0,0 +1,31 @@
1
+ Metadata-Version: 2.1
2
+ Name: datamule
3
+ Version: 0.422
4
+ Summary: Making it easier to use SEC filings.
5
+ Home-page: https://github.com/john-friedman/datamule-python
6
+ Author: John Friedman
7
+ Requires-Dist: aiohttp
8
+ Requires-Dist: aiolimiter
9
+ Requires-Dist: tqdm
10
+ Requires-Dist: requests
11
+ Requires-Dist: nest_asyncio
12
+ Requires-Dist: aiofiles
13
+ Requires-Dist: polars
14
+ Requires-Dist: setuptools
15
+ Requires-Dist: selectolax
16
+ Requires-Dist: pytz
17
+ Requires-Dist: zstandard
18
+ Provides-Extra: mulebot
19
+ Requires-Dist: openai; extra == "mulebot"
20
+ Provides-Extra: mulebot-server
21
+ Requires-Dist: flask; extra == "mulebot-server"
22
+ Provides-Extra: dataset-builder
23
+ Requires-Dist: pandas; extra == "dataset-builder"
24
+ Requires-Dist: google-generativeai; extra == "dataset-builder"
25
+ Requires-Dist: psutil; extra == "dataset-builder"
26
+ Provides-Extra: all
27
+ Requires-Dist: pandas; extra == "all"
28
+ Requires-Dist: flask; extra == "all"
29
+ Requires-Dist: psutil; extra == "all"
30
+ Requires-Dist: google-generativeai; extra == "all"
31
+ Requires-Dist: openai; extra == "all"
@@ -0,0 +1,54 @@
1
+ datamule-0.422.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
2
+ datamule-0.422.dist-info/RECORD,,
3
+ datamule-0.422.dist-info/METADATA,sha256=BNN1-R7bd2pJwi9p-Zl9uo8K2Hy2qEEsM4U7RYOBrL4,1006
4
+ datamule-0.422.dist-info/WHEEL,sha256=tRzqFuK6eFjpbf2xTNvU7E3xL2y00S_NWJvyqxej3BA,151
5
+ datamule/packageupdater.py,sha256=rw4hTnQGjZttee8QmTadg9vlhEjVp_dwdSFv8uQNvss,9584
6
+ datamule/portfolio.py,sha256=Qd_j3gGUi95Ad0g-PpjfJ3ozg-92VohMvCOvcEb0Kbw,710
7
+ datamule/monitor.py,sha256=mRaM8v5NgcMF9DJ1s_YBzucjrbr-3yFwW422MVml-_Q,9114
8
+ datamule/helper.py,sha256=tr3AQWus9dHNZFKpLSglWjcb8zmm5qDXjOWACMhvMxQ,4594
9
+ datamule/__init__.py,sha256=i3HnWFrqEdsK2OmvE7NUjcO05w1BuIYBV6J4cSFaN3s,2268
10
+ datamule/document.py,sha256=Yn8UqUjKwYPE29MrMjreHK_HY9eTqOSjPyM5B1VBrHQ,5144
11
+ datamule/submission.py,sha256=LlwHJL3CIMHOlaHqCfDH7UBiKCFu0tPYV-g92uPvfcY,2492
12
+ datamule/data/xbrl_descriptions.csv,sha256=SQ9wUURNqG424rnTiZtopsxV2q-PvU4NMj52LqgDsvg,2621524
13
+ datamule/data/sec-glossary.csv,sha256=-cN7GjiadLw5C1sv4zSeCnfeZZDYeSgJl-0ydarMAo0,251209
14
+ datamule/data/company_former_names.csv,sha256=HE9cAv-_QKFX6jT-_-D0rHmaDyQuAzL4MJwank5O1U8,706380
15
+ datamule/data/company_tickers.csv,sha256=GW6lOP54RiGJCx-d9N5jEBy7tGVgU3zI-5xHJXrZfSI,400363
16
+ datamule/data/company_metadata.csv,sha256=yPovrCVjYwLWTU_hBUFJymp8iNO0NBYuq_QwOkRLoN8,3068599
17
+ datamule/downloader/downloader.py,sha256=XJF0FfoCB43_22lhPakq9dI-oJ_SsXIvNx4PUVmcEOc,14509
18
+ datamule/downloader/premiumdownloader.py,sha256=JH4aZ-ZwARCIACKwgzSgHAuOkKPc_GnhiUHSSu22XO4,14206
19
+ datamule/mulebot/tools.py,sha256=ctnGc2HItR-Roi-QXkc7GEaAOEYQiFRtfmdmIxNxYXk,2940
20
+ datamule/mulebot/mulebot.py,sha256=XbtgvXBSFu9OaaLW_k1KDgHVTNQGV8_0ZwNMFad-pPU,5837
21
+ datamule/mulebot/helper.py,sha256=olztOwltfELZ-IERM2bRNLBavD04kfB6ueWTisJAleA,1080
22
+ datamule/mulebot/search.py,sha256=mwvbB6Fex5dEQkfxkCL53ne5pXdVno-5KlZ5vZyGnJQ,2073
23
+ datamule/mulebot/__init__.py,sha256=YvZXV6xQ0iP-oGD8rloufjdwJL6D46P3NNr0CY9PQCA,29
24
+ datamule/mulebot/mulebot_server/server.py,sha256=M7kU4aZUoi8X8DUKZNckLewBiI637Krbeap31qD2jt8,3547
25
+ datamule/mulebot/mulebot_server/__init__.py,sha256=x1QhXys7BWxi2g9_ZHUYA6S6rL3VL2718x4rYtGaaIg,33
26
+ datamule/mulebot/mulebot_server/static/scripts/chat.js,sha256=q8vV_KtzuNCXCfXqavM1HROIkYHItOAmaR8P1OjSqa0,3108
27
+ datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js,sha256=mGhAXQnjnSxYqVqg1mE5g_ev0-aDhh849xunQtRchnY,1093
28
+ datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js,sha256=wxeIM2RzF6Zh_9ivnYuNyTzIgIcEz0-zX8gTCvyACJo,2034
29
+ datamule/mulebot/mulebot_server/static/scripts/utils.js,sha256=oGPMtyT9dvuqHqrfZj33t4vLZiF8UJrMXB1hpPXRNu4,1255
30
+ datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js,sha256=UtkUpLvELNI4Ibpb7VstgVA9Tk-8jbkxXhmXsgufFa4,4437
31
+ datamule/mulebot/mulebot_server/static/scripts/suggestions.js,sha256=TCyz8OYuXeIG9qNRgwU2fhz18YNXpy4Bl9mk66lXefo,1795
32
+ datamule/mulebot/mulebot_server/static/scripts/artifacts.js,sha256=WUAoI3LtEBEt3x-Ri0gwd6YT0JtGNwDZ_b8tuhWWSsg,2258
33
+ datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js,sha256=DZFLe-45mmzWvJPO1be5Ivfqx0BInrXfduQ1IhbHWzk,429
34
+ datamule/mulebot/mulebot_server/static/scripts/main.js,sha256=NEIVih1WJeQ-qo5k8hnmgFHd7N839Mr6hJur856oXVQ,1882
35
+ datamule/mulebot/mulebot_server/static/css/minimalist.css,sha256=Tz1tz8oF_esbfCvLTJBmTfb-5MIiqjfhU_4A4nto1mo,2974
36
+ datamule/mulebot/mulebot_server/templates/chat-minimalist.html,sha256=MsTbgpnLD0JCQiKKP3XeeNJRNsRqKsRa1j_XXW7nBKw,6975
37
+ datamule/dataset_builder/dataset_builder.py,sha256=NCvNbDwlEkA_eAbqbsG--YlqPBDREFTVSM1GJquR0RE,9747
38
+ datamule/parser/sgml_parsing/sgml_parser_cy.cpython-312-x86_64-linux-gnu.so,sha256=E4E2GqXhKwQOZ_8jdMgDynvdNtqRqU9rNN2EZzV2djw,968320
39
+ datamule/parser/sgml_parsing/sgml_parser_cy.c,sha256=UwXSuLGSBLKfO5bM7xTzjFLnGDV3-NNnCjgUCPAY1gk,796689
40
+ datamule/parser/document_parsing/generalized_item_parser.py,sha256=67_DFb1BQbMmdHefEgoCPlEoiUT0zyxh3eBNJpjGXUk,2616
41
+ datamule/parser/document_parsing/information_table_parser_13fhr.py,sha256=R4Up1oDx3xAlzHwXzVzUkdOSsk8YPuJBPS_3I_bNQSE,1767
42
+ datamule/parser/document_parsing/sgml_parser.py,sha256=tC1cL3cdVQPWbc9QtoRUYSo2wRuYNaglFaCmP57oEfA,3317
43
+ datamule/parser/document_parsing/mappings.py,sha256=VKdnT3C5yPTbB4ZBa4El4jnB-6_osomm2rbJx6Ac6HE,5286
44
+ datamule/parser/document_parsing/n_port_p_parser.py,sha256=GmmQFkCZt57WikUZ5DahtTYMhhk0VcfkhOJusM4Tkow,2224
45
+ datamule/parser/document_parsing/helper.py,sha256=QPhVxLxMSx6Qdi7sR4D4iPObGoTnVD3tXTCNWzNxStg,2533
46
+ datamule/parser/document_parsing/basic_13d_parser.py,sha256=loJC97H_ccu_hWMhgNt5tvGZnN3--7tsqZxzBnWB_FY,1528
47
+ datamule/parser/document_parsing/basic_13g_parser.py,sha256=sWg83-QTAzUDNs45iWtpxnMxQgtC3zJlFj0R9ybZpNI,1631
48
+ datamule/parser/document_parsing/basic_10k_parser.py,sha256=-_q0X9K4KyLccF6j_zNp7FknGPBW1r4U3AT9bPjQUgA,3056
49
+ datamule/parser/document_parsing/basic_8k_parser.py,sha256=inCSmlH_BkLK0Lkvt0kZ6EUJ0nijul_RkdXzccyOmRI,2466
50
+ datamule/parser/document_parsing/sec_parser.py,sha256=AS8H4h1sfUAdWP2gotULcjbylsYN_nHgTfkeVRyENPo,2716
51
+ datamule/parser/document_parsing/generalized_xml_parser.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
+ datamule/parser/document_parsing/insider_trading_parser.py,sha256=OVQDeLcfaZtgmOWvWPDotftO6jxx-doFAqBYVqNgypo,7106
53
+ datamule/parser/document_parsing/form_d_parser.py,sha256=dWlGeVZRzh0kfT3gVMC8eyqeQORdVV3r8KXUwEqAW3s,2036
54
+ datamule/parser/document_parsing/basic_10q_parser.py,sha256=ccQc3pwBqevDb6-vBwEE5RTZwRcnrSxjRxZEk_zPO-s,2623
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.6.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp312-cp312-manylinux_2_17_x86_64
5
+ Tag: cp312-cp312-manylinux2014_x86_64
6
+
@@ -0,0 +1 @@
1
+ datamule