datamule 0.381__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. datamule/__init__.py +46 -86
  2. datamule/book.py +16 -0
  3. datamule/config.py +29 -0
  4. datamule/data/company_former_names.csv +8148 -8148
  5. datamule/data/company_metadata.csv +10049 -10049
  6. datamule/data/company_tickers.csv +9999 -10168
  7. datamule/data/sec-glossary.csv +728 -728
  8. datamule/data/xbrl_descriptions.csv +10024 -10024
  9. datamule/document.py +278 -0
  10. datamule/downloader/downloader.py +374 -0
  11. datamule/downloader/premiumdownloader.py +335 -0
  12. datamule/helper.py +123 -136
  13. datamule/mapping_dicts/txt_mapping_dicts.py +232 -0
  14. datamule/mapping_dicts/xml_mapping_dicts.py +19 -0
  15. datamule/monitor.py +238 -0
  16. datamule/mulebot/__init__.py +1 -1
  17. datamule/mulebot/helper.py +34 -34
  18. datamule/mulebot/mulebot.py +129 -129
  19. datamule/mulebot/mulebot_server/server.py +86 -86
  20. datamule/mulebot/mulebot_server/static/css/minimalist.css +173 -173
  21. datamule/mulebot/mulebot_server/static/scripts/artifacts.js +67 -67
  22. datamule/mulebot/mulebot_server/static/scripts/chat.js +91 -91
  23. datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +55 -55
  24. datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +14 -14
  25. datamule/mulebot/mulebot_server/static/scripts/main.js +56 -56
  26. datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +26 -26
  27. datamule/mulebot/mulebot_server/static/scripts/suggestions.js +46 -46
  28. datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +128 -128
  29. datamule/mulebot/mulebot_server/static/scripts/utils.js +27 -27
  30. datamule/mulebot/mulebot_server/templates/chat-minimalist.html +90 -90
  31. datamule/mulebot/search.py +51 -51
  32. datamule/mulebot/tools.py +82 -82
  33. datamule/packageupdater.py +207 -0
  34. datamule/portfolio.py +106 -0
  35. datamule/submission.py +76 -0
  36. datamule-1.0.0.dist-info/METADATA +27 -0
  37. datamule-1.0.0.dist-info/RECORD +40 -0
  38. {datamule-0.381.dist-info → datamule-1.0.0.dist-info}/WHEEL +1 -1
  39. datamule/data/filing_types.csv +0 -485
  40. datamule/data/ftd_locations.csv +0 -388
  41. datamule/datamule_api.py +0 -21
  42. datamule/dataset_builder/_init.py +0 -1
  43. datamule/dataset_builder/dataset_builder.py +0 -260
  44. datamule/downloader/__init__.py +0 -0
  45. datamule/downloader/dropbox_downloader.py +0 -225
  46. datamule/downloader/ftd.py +0 -216
  47. datamule/downloader/information_table_13f.py +0 -231
  48. datamule/downloader/sec_downloader.py +0 -635
  49. datamule/filing_viewer/__init__.py +0 -1
  50. datamule/filing_viewer/filing_viewer.py +0 -256
  51. datamule/global_vars.py +0 -202
  52. datamule/parser/__init__.py +0 -1
  53. datamule/parser/basic_10k_parser.py +0 -82
  54. datamule/parser/basic_10q_parser.py +0 -73
  55. datamule/parser/basic_13d_parser.py +0 -58
  56. datamule/parser/basic_13g_parser.py +0 -61
  57. datamule/parser/basic_8k_parser.py +0 -84
  58. datamule/parser/company_concepts_parser.py +0 -0
  59. datamule/parser/form_d_parser.py +0 -70
  60. datamule/parser/generalized_item_parser.py +0 -78
  61. datamule/parser/generalized_xml_parser.py +0 -0
  62. datamule/parser/helper.py +0 -75
  63. datamule/parser/information_table_parser_13fhr.py +0 -41
  64. datamule/parser/insider_trading_parser.py +0 -158
  65. datamule/parser/mappings.py +0 -95
  66. datamule/parser/n_port_p_parser.py +0 -70
  67. datamule/parser/sec_parser.py +0 -79
  68. datamule/parser/sgml_parser.py +0 -180
  69. datamule/sec_filing.py +0 -126
  70. datamule/sec_search.py +0 -20
  71. datamule-0.381.dist-info/METADATA +0 -132
  72. datamule-0.381.dist-info/RECORD +0 -61
  73. {datamule-0.381.dist-info → datamule-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,130 +1,130 @@
1
- import openai
2
- import json
3
-
4
- from datamule.helper import identifier_to_cik
5
- from datamule import Downloader, Parser
6
- from .search import search_filing
7
- from .tools import tools, return_title_tool
8
- from .helper import get_company_concept, select_dict_by_title
9
-
10
- downloader = Downloader()
11
- parser = Parser()
12
-
13
-
14
- class MuleBot:
15
- def __init__(self, api_key):
16
- self.client = openai.OpenAI(api_key=api_key)
17
- self.messages = [
18
- {"role": "system", "content": "You are a helpful, but concise, assistant to assist with questions related to the Securities and Exchanges Commission. You are allowed to guess tickers."}
19
- ]
20
- self.total_tokens = 0
21
-
22
- def process_message(self, user_input):
23
-
24
- new_message_chain = self.messages
25
- new_message_chain.append({"role": "user", "content": user_input})
26
-
27
- try:
28
- response = self.client.chat.completions.create(
29
- model="gpt-4o-mini",
30
- messages=new_message_chain,
31
- tools=tools,
32
- tool_choice="auto"
33
- )
34
-
35
- self.total_tokens += response.usage.total_tokens
36
- assistant_message = response.choices[0].message
37
-
38
- if assistant_message.content is None:
39
- assistant_message.content = "I'm processing your request."
40
-
41
- new_message_chain.append({"role": "assistant", "content": assistant_message.content})
42
-
43
- tool_calls = assistant_message.tool_calls
44
- if tool_calls is None:
45
- return {'key':'text','value':assistant_message.content}
46
- else:
47
- for tool_call in tool_calls:
48
- print(f"Tool call: {tool_call.function.name}")
49
- if tool_call.function.name == "identifier_to_cik":
50
- function_args = json.loads(tool_call.function.arguments)
51
- print(f"Function args: {function_args}")
52
-
53
- cik = identifier_to_cik(function_args["ticker"])
54
- return {'key':'text','value':cik}
55
- elif tool_call.function.name == "get_company_concept":
56
- function_args = json.loads(tool_call.function.arguments)
57
- print(f"Function args: {function_args}")
58
- table_dict_list = get_company_concept(function_args["ticker"])
59
- return {'key':'table','value':table_dict_list}
60
- elif tool_call.function.name == "get_filing_urls":
61
- function_args = json.loads(tool_call.function.arguments)
62
- print(f"Function args: {function_args}")
63
- result = downloader.download(**function_args,return_urls=True)
64
- return {'key':'list','value':result}
65
- elif tool_call.function.name == "find_filing_section_by_title":
66
- function_args = json.loads(tool_call.function.arguments)
67
- print(f"Function args: {function_args}")
68
- # Parse the filing
69
- data = parser.parse_filing(function_args["url"])
70
-
71
- # find possible matches
72
- section_dicts = search_filing(query = function_args["title"], nested_dict =data, score_cutoff=0.3)
73
-
74
- # feed titles back to assistant
75
- titles = [section['title'] for section in section_dicts]
76
- new_message_chain.append({"role": "assistant", "content": f"Which of these titles is closest: {','.join(titles)}"})
77
-
78
- title_response = self.client.chat.completions.create(
79
- model="gpt-4o-mini",
80
- messages=new_message_chain,
81
- tools=[return_title_tool],
82
- tool_choice="required"
83
- )
84
-
85
- title_tool_call = title_response.choices[0].message.tool_calls[0]
86
- title = json.loads(title_tool_call.function.arguments)['title']
87
- print(f"Selected title: {title}")
88
- #print(f"Possible titles: {titles}")
89
-
90
- # select the section
91
- #section_dict = select_dict_by_title(data, title)
92
-
93
- # probably want to return full dict, and section label
94
- return {'key':'filing','value':{'data':data,'title':title}}
95
-
96
- return {'key':'text','value':'No tool call was made.'}
97
-
98
- except Exception as e:
99
- return f"An error occurred: {str(e)}"
100
-
101
- def get_total_tokens(self):
102
- return self.total_tokens
103
-
104
- def run(self):
105
- """Basic chatbot loop"""
106
- print("MuleBot: Hello! I'm here to assist you with questions related to the Securities and Exchange Commission. Type 'quit', 'exit', or 'bye' to end the conversation.")
107
- while True:
108
- user_input = input("You: ")
109
- if user_input.lower() in ['quit', 'exit', 'bye']:
110
- print("MuleBot: Goodbye!")
111
- break
112
-
113
- response = self.process_message(user_input)
114
- response_type = response['key']
115
-
116
- if response_type == 'text':
117
- value = response['value']
118
- print(value)
119
- elif response_type == 'table':
120
- value = response['value']
121
- print(value)
122
- elif response_type == 'list':
123
- value = response['value']
124
- print(value)
125
- elif response_type == 'filing':
126
- value = response['value']
127
- print(value)
128
- else:
129
- value = response['value']
1
+ import openai
2
+ import json
3
+
4
+ from datamule.helper import identifier_to_cik
5
+ from datamule import Downloader, Parser
6
+ from .search import search_filing
7
+ from .tools import tools, return_title_tool
8
+ from .helper import get_company_concept, select_dict_by_title
9
+
10
+ downloader = Downloader()
11
+ parser = Parser()
12
+
13
+
14
+ class MuleBot:
15
+ def __init__(self, api_key):
16
+ self.client = openai.OpenAI(api_key=api_key)
17
+ self.messages = [
18
+ {"role": "system", "content": "You are a helpful, but concise, assistant to assist with questions related to the Securities and Exchanges Commission. You are allowed to guess tickers."}
19
+ ]
20
+ self.total_tokens = 0
21
+
22
+ def process_message(self, user_input):
23
+
24
+ new_message_chain = self.messages
25
+ new_message_chain.append({"role": "user", "content": user_input})
26
+
27
+ try:
28
+ response = self.client.chat.completions.create(
29
+ model="gpt-4o-mini",
30
+ messages=new_message_chain,
31
+ tools=tools,
32
+ tool_choice="auto"
33
+ )
34
+
35
+ self.total_tokens += response.usage.total_tokens
36
+ assistant_message = response.choices[0].message
37
+
38
+ if assistant_message.content is None:
39
+ assistant_message.content = "I'm processing your request."
40
+
41
+ new_message_chain.append({"role": "assistant", "content": assistant_message.content})
42
+
43
+ tool_calls = assistant_message.tool_calls
44
+ if tool_calls is None:
45
+ return {'key':'text','value':assistant_message.content}
46
+ else:
47
+ for tool_call in tool_calls:
48
+ print(f"Tool call: {tool_call.function.name}")
49
+ if tool_call.function.name == "identifier_to_cik":
50
+ function_args = json.loads(tool_call.function.arguments)
51
+ print(f"Function args: {function_args}")
52
+
53
+ cik = identifier_to_cik(function_args["ticker"])
54
+ return {'key':'text','value':cik}
55
+ elif tool_call.function.name == "get_company_concept":
56
+ function_args = json.loads(tool_call.function.arguments)
57
+ print(f"Function args: {function_args}")
58
+ table_dict_list = get_company_concept(function_args["ticker"])
59
+ return {'key':'table','value':table_dict_list}
60
+ elif tool_call.function.name == "get_filing_urls":
61
+ function_args = json.loads(tool_call.function.arguments)
62
+ print(f"Function args: {function_args}")
63
+ result = downloader.download(**function_args,return_urls=True)
64
+ return {'key':'list','value':result}
65
+ elif tool_call.function.name == "find_filing_section_by_title":
66
+ function_args = json.loads(tool_call.function.arguments)
67
+ print(f"Function args: {function_args}")
68
+ # Parse the filing
69
+ data = parser.parse_filing(function_args["url"])
70
+
71
+ # find possible matches
72
+ section_dicts = search_filing(query = function_args["title"], nested_dict =data, score_cutoff=0.3)
73
+
74
+ # feed titles back to assistant
75
+ titles = [section['title'] for section in section_dicts]
76
+ new_message_chain.append({"role": "assistant", "content": f"Which of these titles is closest: {','.join(titles)}"})
77
+
78
+ title_response = self.client.chat.completions.create(
79
+ model="gpt-4o-mini",
80
+ messages=new_message_chain,
81
+ tools=[return_title_tool],
82
+ tool_choice="required"
83
+ )
84
+
85
+ title_tool_call = title_response.choices[0].message.tool_calls[0]
86
+ title = json.loads(title_tool_call.function.arguments)['title']
87
+ print(f"Selected title: {title}")
88
+ #print(f"Possible titles: {titles}")
89
+
90
+ # select the section
91
+ #section_dict = select_dict_by_title(data, title)
92
+
93
+ # probably want to return full dict, and section label
94
+ return {'key':'filing','value':{'data':data,'title':title}}
95
+
96
+ return {'key':'text','value':'No tool call was made.'}
97
+
98
+ except Exception as e:
99
+ return f"An error occurred: {str(e)}"
100
+
101
+ def get_total_tokens(self):
102
+ return self.total_tokens
103
+
104
+ def run(self):
105
+ """Basic chatbot loop"""
106
+ print("MuleBot: Hello! I'm here to assist you with questions related to the Securities and Exchange Commission. Type 'quit', 'exit', or 'bye' to end the conversation.")
107
+ while True:
108
+ user_input = input("You: ")
109
+ if user_input.lower() in ['quit', 'exit', 'bye']:
110
+ print("MuleBot: Goodbye!")
111
+ break
112
+
113
+ response = self.process_message(user_input)
114
+ response_type = response['key']
115
+
116
+ if response_type == 'text':
117
+ value = response['value']
118
+ print(value)
119
+ elif response_type == 'table':
120
+ value = response['value']
121
+ print(value)
122
+ elif response_type == 'list':
123
+ value = response['value']
124
+ print(value)
125
+ elif response_type == 'filing':
126
+ value = response['value']
127
+ print(value)
128
+ else:
129
+ value = response['value']
130
130
  print(value)
@@ -1,87 +1,87 @@
1
- import os
2
- from flask import Flask, request, jsonify, render_template
3
- from datamule.mulebot import MuleBot
4
- from datamule.filing_viewer import create_interactive_filing, create_valid_id
5
-
6
- class MuleBotServer:
7
- def __init__(self, template='chat-minimalist.html'):
8
- template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates'))
9
- static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'static'))
10
- self.app = Flask(__name__, template_folder=template_dir, static_folder=static_dir)
11
- self.mulebot = None
12
- self.template = template
13
- self.setup_routes()
14
-
15
- def setup_routes(self):
16
- @self.app.route('/')
17
- def home():
18
- return render_template(self.template)
19
-
20
- @self.app.route('/chat-with-prompt')
21
- def chat_with_prompt():
22
- prefilled_prompt = request.args.get('prompt', '')
23
- return render_template(self.template, prefilled_prompt=prefilled_prompt)
24
-
25
- @self.app.route('/chat', methods=['POST'])
26
- def chat():
27
- user_input = request.json['message']
28
-
29
- # Process the message using MuleBot's process_message method
30
- response = self.mulebot.process_message(user_input)
31
- response_type = response['key']
32
-
33
- # Prepare the response based on the type
34
- if response_type == 'text':
35
- # If response type is text, add it to the chat
36
- chat_response = {
37
- 'type': 'text',
38
- 'content': response['value']
39
- }
40
- elif response_type == 'table':
41
- # If response type is table, prepare it for the artifact window
42
- chat_response = {
43
- 'type': 'artifact',
44
- 'content': response['value'],
45
- 'artifact_type': 'artifact-table'
46
- }
47
- elif response_type == 'list':
48
- chat_response = {
49
- 'type': 'artifact',
50
- 'content': response['value'],
51
- 'artifact_type': 'artifact-list'
52
- }
53
- elif response_type == 'filing':
54
- data = response['value']['data']
55
- title = response['value']['title']
56
- section_id = create_valid_id(title)
57
-
58
- # create a filing viewer display
59
- html = create_interactive_filing(data)
60
-
61
- # we'll need to display the filing viewer in the artifact window, with a json export option
62
- chat_response = {
63
- 'type': 'artifact',
64
- 'content': html,
65
- 'data': data,
66
- 'section_id': section_id,
67
- 'artifact_type': 'artifact-filing'
68
- }
69
- else:
70
- # Handle other types of responses if needed
71
- chat_response = {
72
- 'type': 'unknown',
73
- 'content': 'Unsupported response type'
74
- }
75
-
76
- return jsonify({
77
- 'response': chat_response,
78
- 'total_tokens': self.mulebot.get_total_tokens()
79
- })
80
-
81
- def set_api_key(self, api_key):
82
- self.mulebot = MuleBot(api_key)
83
-
84
- def run(self, debug=False, host='0.0.0.0', port=5000):
85
- if not self.mulebot:
86
- raise ValueError("API key not set. Please call set_api_key() before running the server.")
1
+ import os
2
+ from flask import Flask, request, jsonify, render_template
3
+ from datamule.mulebot import MuleBot
4
+ from datamule.filing_viewer import create_interactive_filing, create_valid_id
5
+
6
+ class MuleBotServer:
7
+ def __init__(self, template='chat-minimalist.html'):
8
+ template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates'))
9
+ static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'static'))
10
+ self.app = Flask(__name__, template_folder=template_dir, static_folder=static_dir)
11
+ self.mulebot = None
12
+ self.template = template
13
+ self.setup_routes()
14
+
15
+ def setup_routes(self):
16
+ @self.app.route('/')
17
+ def home():
18
+ return render_template(self.template)
19
+
20
+ @self.app.route('/chat-with-prompt')
21
+ def chat_with_prompt():
22
+ prefilled_prompt = request.args.get('prompt', '')
23
+ return render_template(self.template, prefilled_prompt=prefilled_prompt)
24
+
25
+ @self.app.route('/chat', methods=['POST'])
26
+ def chat():
27
+ user_input = request.json['message']
28
+
29
+ # Process the message using MuleBot's process_message method
30
+ response = self.mulebot.process_message(user_input)
31
+ response_type = response['key']
32
+
33
+ # Prepare the response based on the type
34
+ if response_type == 'text':
35
+ # If response type is text, add it to the chat
36
+ chat_response = {
37
+ 'type': 'text',
38
+ 'content': response['value']
39
+ }
40
+ elif response_type == 'table':
41
+ # If response type is table, prepare it for the artifact window
42
+ chat_response = {
43
+ 'type': 'artifact',
44
+ 'content': response['value'],
45
+ 'artifact_type': 'artifact-table'
46
+ }
47
+ elif response_type == 'list':
48
+ chat_response = {
49
+ 'type': 'artifact',
50
+ 'content': response['value'],
51
+ 'artifact_type': 'artifact-list'
52
+ }
53
+ elif response_type == 'filing':
54
+ data = response['value']['data']
55
+ title = response['value']['title']
56
+ section_id = create_valid_id(title)
57
+
58
+ # create a filing viewer display
59
+ html = create_interactive_filing(data)
60
+
61
+ # we'll need to display the filing viewer in the artifact window, with a json export option
62
+ chat_response = {
63
+ 'type': 'artifact',
64
+ 'content': html,
65
+ 'data': data,
66
+ 'section_id': section_id,
67
+ 'artifact_type': 'artifact-filing'
68
+ }
69
+ else:
70
+ # Handle other types of responses if needed
71
+ chat_response = {
72
+ 'type': 'unknown',
73
+ 'content': 'Unsupported response type'
74
+ }
75
+
76
+ return jsonify({
77
+ 'response': chat_response,
78
+ 'total_tokens': self.mulebot.get_total_tokens()
79
+ })
80
+
81
+ def set_api_key(self, api_key):
82
+ self.mulebot = MuleBot(api_key)
83
+
84
+ def run(self, debug=False, host='0.0.0.0', port=5000):
85
+ if not self.mulebot:
86
+ raise ValueError("API key not set. Please call set_api_key() before running the server.")
87
87
  self.app.run(debug=debug, host=host, port=port)