datamule 0.422__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. datamule/__init__.py +71 -0
  2. datamule/data/company_former_names.csv +8148 -0
  3. datamule/data/company_metadata.csv +10049 -0
  4. datamule/data/company_tickers.csv +9999 -0
  5. datamule/data/sec-glossary.csv +728 -0
  6. datamule/data/xbrl_descriptions.csv +10024 -0
  7. datamule/dataset_builder/dataset_builder.py +259 -0
  8. datamule/document.py +130 -0
  9. datamule/downloader/downloader.py +364 -0
  10. datamule/downloader/premiumdownloader.py +332 -0
  11. datamule/helper.py +123 -0
  12. datamule/monitor.py +236 -0
  13. datamule/mulebot/__init__.py +1 -0
  14. datamule/mulebot/helper.py +35 -0
  15. datamule/mulebot/mulebot.py +130 -0
  16. datamule/mulebot/mulebot_server/__init__.py +1 -0
  17. datamule/mulebot/mulebot_server/server.py +87 -0
  18. datamule/mulebot/mulebot_server/static/css/minimalist.css +174 -0
  19. datamule/mulebot/mulebot_server/static/scripts/artifacts.js +68 -0
  20. datamule/mulebot/mulebot_server/static/scripts/chat.js +92 -0
  21. datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +56 -0
  22. datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +15 -0
  23. datamule/mulebot/mulebot_server/static/scripts/main.js +57 -0
  24. datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +27 -0
  25. datamule/mulebot/mulebot_server/static/scripts/suggestions.js +47 -0
  26. datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +129 -0
  27. datamule/mulebot/mulebot_server/static/scripts/utils.js +28 -0
  28. datamule/mulebot/mulebot_server/templates/chat-minimalist.html +91 -0
  29. datamule/mulebot/search.py +52 -0
  30. datamule/mulebot/tools.py +82 -0
  31. datamule/packageupdater.py +207 -0
  32. datamule/parser/document_parsing/basic_10k_parser.py +82 -0
  33. datamule/parser/document_parsing/basic_10q_parser.py +73 -0
  34. datamule/parser/document_parsing/basic_13d_parser.py +58 -0
  35. datamule/parser/document_parsing/basic_13g_parser.py +61 -0
  36. datamule/parser/document_parsing/basic_8k_parser.py +84 -0
  37. datamule/parser/document_parsing/form_d_parser.py +70 -0
  38. datamule/parser/document_parsing/generalized_item_parser.py +78 -0
  39. datamule/parser/document_parsing/generalized_xml_parser.py +0 -0
  40. datamule/parser/document_parsing/helper.py +75 -0
  41. datamule/parser/document_parsing/information_table_parser_13fhr.py +41 -0
  42. datamule/parser/document_parsing/insider_trading_parser.py +158 -0
  43. datamule/parser/document_parsing/mappings.py +95 -0
  44. datamule/parser/document_parsing/n_port_p_parser.py +70 -0
  45. datamule/parser/document_parsing/sec_parser.py +73 -0
  46. datamule/parser/document_parsing/sgml_parser.py +94 -0
  47. datamule/parser/sgml_parsing/sgml_parser_cy.c +19082 -0
  48. datamule/parser/sgml_parsing/sgml_parser_cy.cpython-312-x86_64-linux-gnu.so +0 -0
  49. datamule/portfolio.py +21 -0
  50. datamule/submission.py +67 -0
  51. datamule-0.422.dist-info/METADATA +31 -0
  52. datamule-0.422.dist-info/RECORD +54 -0
  53. datamule-0.422.dist-info/WHEEL +6 -0
  54. datamule-0.422.dist-info/top_level.txt +1 -0
datamule/helper.py ADDED
@@ -0,0 +1,123 @@
1
+ import requests
2
+ import os
3
+ from tqdm import tqdm
4
+ import zipfile
5
+ from pkg_resources import resource_filename
6
+ import csv
7
+ import re
8
+
9
+ # Unused in current implementation.
10
+ def construct_primary_doc_url(cik, accession_number,primary_doc_url):
11
+ accession_number = accession_number.replace("-", "")
12
+ return f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_doc_url}"
13
+
14
+ # DONE
15
+ def _download_from_dropbox(url, output_path):
16
+ headers = {'user-agent': 'Wget/1.16 (linux-gnu)'}
17
+ r = requests.get(url, stream=True, headers=headers)
18
+ total_size = int(r.headers.get('content-length', 0))
19
+
20
+ with open(output_path, 'wb') as f, tqdm(
21
+ desc="Downloading " + os.path.basename(output_path),
22
+ total=total_size,
23
+ unit='iB',
24
+ unit_scale=True,
25
+ unit_divisor=1024,
26
+ ) as progress_bar:
27
+ for chunk in r.iter_content(chunk_size=1024):
28
+ size = f.write(chunk)
29
+ progress_bar.update(size)
30
+
31
+ # Check if the downloaded file is a zip file
32
+ if zipfile.is_zipfile(output_path):
33
+ extract_path = os.path.dirname(output_path)
34
+ with zipfile.ZipFile(output_path, 'r') as zip_ref:
35
+ for file_info in zip_ref.infolist():
36
+ extract_file_path = os.path.join(extract_path, file_info.filename)
37
+ with zip_ref.open(file_info) as file_in_zip, \
38
+ open(extract_file_path, 'wb') as output_file, \
39
+ tqdm(total=file_info.file_size, unit='B', unit_scale=True,
40
+ desc=f"Extracting {file_info.filename}") as pbar:
41
+ while True:
42
+ chunk = file_in_zip.read(8192)
43
+ if not chunk:
44
+ break
45
+ output_file.write(chunk)
46
+ pbar.update(len(chunk))
47
+
48
+ # Remove the zip file after extraction
49
+ os.remove(output_path)
50
+ print(f"Extracted contents to {extract_path}")
51
+ else:
52
+ print(f"Downloaded file is not a zip. Saved to {output_path}")
53
+
54
+ # May generalize to load any package resource
55
+ def load_package_csv(name):
56
+ """Load package CSV files"""
57
+ csv_path = resource_filename('datamule', f'data/{name}.csv')
58
+ company_tickers = []
59
+
60
+ with open(csv_path, 'r') as csvfile:
61
+ csv_reader = csv.DictReader(csvfile)
62
+ for row in csv_reader:
63
+ company_tickers.append(row)
64
+
65
+ return company_tickers
66
+
67
+ def load_package_dataset(dataset):
68
+ if dataset == 'company_tickers':
69
+ return load_package_csv('company_tickers')
70
+ elif dataset =='company_former_names':
71
+ return load_package_csv('company_former_names')
72
+ elif dataset =='company_metadata':
73
+ return load_package_csv('company_metadata')
74
+ elif dataset == 'sec_glossary':
75
+ return load_package_csv('sec-glossary')
76
+ elif dataset == 'xbrl_descriptions':
77
+ return load_package_csv('xbrl_descriptions')
78
+
79
+ # DONE
80
+ def identifier_to_cik(ticker):
81
+ """Convert company tickers to CIK codes"""
82
+ company_tickers = load_package_csv('company_tickers')
83
+ if ticker:
84
+ if isinstance(ticker, list):
85
+ cik = []
86
+ for t in ticker:
87
+ cik.extend([company['cik'] for company in company_tickers if t == company['ticker']])
88
+ else:
89
+ cik = [company['cik'] for company in company_tickers if ticker == company['ticker']]
90
+
91
+ if not cik:
92
+ raise ValueError("No matching companies found")
93
+
94
+ return cik
95
+
96
+
97
+ def fix_filing_url(url):
98
+ match_suffix = re.search(r'/(\d{4})\.(.+?)$', url)
99
+ if match_suffix:
100
+ suffix_number = match_suffix.group(1)
101
+ file_ext = match_suffix.group(2)
102
+ match_accession = re.search(r'/(\d{18})/', url)
103
+ if match_accession:
104
+ accession_number = match_accession.group(1)
105
+ formatted_accession_number = f"{accession_number[:10]}-{accession_number[10:12]}-{accession_number[12:]}"
106
+ new_url = url.rsplit('/', 1)[0] + f'/{formatted_accession_number}-{suffix_number}.{file_ext}'
107
+ return new_url
108
+ return url
109
+
110
+ def convert_to_dashed_accession(accession):
111
+ # Remove any existing dashes or whitespace
112
+ cleaned = ''.join(accession.split())
113
+
114
+ # Check if the cleaned string has 18 characters
115
+ if len(cleaned) != 18:
116
+ raise ValueError("Invalid accession number format. Expected 18 characters.")
117
+
118
+ # Insert dashes at the correct positions
119
+ dashed = f"{cleaned[:10]}-{cleaned[10:12]}-{cleaned[12:]}"
120
+
121
+ return dashed
122
+
123
+ headers = {'User-Agent': 'John Smith johnsmith@gmail.com'}
datamule/monitor.py ADDED
@@ -0,0 +1,236 @@
1
+ import asyncio
2
+ import aiohttp
3
+ from datetime import timedelta, datetime
4
+ import pytz
5
+ from collections import deque
6
+ import time
7
+ from .helper import headers, identifier_to_cik
8
+
9
+ def _get_current_eastern_date():
10
+ """Get current date in US Eastern timezone (automatically handles DST) """
11
+ eastern = pytz.timezone('America/New_York')
12
+ return datetime.now(eastern)
13
+
14
+ class PreciseRateLimiter:
15
+ def __init__(self, rate, interval=1.0):
16
+ self.rate = rate # requests per interval
17
+ self.interval = interval # in seconds
18
+ self.token_time = self.interval / self.rate # time per token
19
+ self.last_time = time.time()
20
+ self.lock = asyncio.Lock()
21
+
22
+ async def acquire(self):
23
+ async with self.lock:
24
+ now = time.time()
25
+ wait_time = self.last_time + self.token_time - now
26
+ if wait_time > 0:
27
+ await asyncio.sleep(wait_time)
28
+ self.last_time = time.time()
29
+ return True
30
+
31
+ async def __aenter__(self):
32
+ await self.acquire()
33
+ return self
34
+
35
+ async def __aexit__(self, exc_type, exc, tb):
36
+ pass
37
+
38
+ class RateMonitor:
39
+ def __init__(self, window_size=1.0):
40
+ self.window_size = window_size
41
+ self.requests = deque()
42
+ self._lock = asyncio.Lock()
43
+
44
+ async def add_request(self, size_bytes):
45
+ async with self._lock:
46
+ now = time.time()
47
+ self.requests.append((now, size_bytes))
48
+ while self.requests and self.requests[0][0] < now - self.window_size:
49
+ self.requests.popleft()
50
+
51
+ def get_current_rates(self):
52
+ now = time.time()
53
+ while self.requests and self.requests[0][0] < now - self.window_size:
54
+ self.requests.popleft()
55
+
56
+ if not self.requests:
57
+ return 0, 0
58
+
59
+ request_count = len(self.requests)
60
+ byte_count = sum(size for _, size in self.requests)
61
+
62
+ requests_per_second = request_count / self.window_size
63
+ mb_per_second = (byte_count / 1024 / 1024) / self.window_size
64
+
65
+ return round(requests_per_second, 1), round(mb_per_second, 2)
66
+
67
+ class Monitor:
68
+ def __init__(self):
69
+ self.last_total = 0
70
+ self.last_date = _get_current_eastern_date()
71
+ self.submissions = []
72
+ self.max_hits = 10000
73
+ self.limiter = PreciseRateLimiter(5) # 5 requests per second
74
+ self.rate_monitor = RateMonitor()
75
+ self.headers = headers
76
+
77
+ async def _fetch_json(self, session, url):
78
+ """Fetch JSON with rate limiting and monitoring."""
79
+ async with self.limiter:
80
+ try:
81
+ async with session.get(url) as response:
82
+ response.raise_for_status()
83
+ content = await response.read()
84
+ await self.rate_monitor.add_request(len(content))
85
+ return await response.json()
86
+ except Exception as e:
87
+ print(f"Error fetching {url}: {str(e)}")
88
+ return None
89
+
90
+ async def _poll(self, base_url, session, poll_interval, quiet):
91
+ """Poll API until new submissions are found."""
92
+ while True:
93
+ current_date = _get_current_eastern_date()
94
+ date_str = current_date.strftime('%Y-%m-%d')
95
+
96
+ if self.last_date != current_date.strftime('%Y-%m-%d'):
97
+ print(f"New date: {date_str}")
98
+ self.last_total = 0
99
+ self.submissions = []
100
+ self.last_date = date_str
101
+
102
+ poll_url = f"{base_url}&startdt={date_str}&enddt={date_str}"
103
+ if not quiet:
104
+ print(f"Polling {poll_url}")
105
+
106
+ try:
107
+ data = await self._fetch_json(session, poll_url)
108
+ if data:
109
+ current_total = data['hits']['total']['value']
110
+ if current_total > self.last_total:
111
+ print(f"Found {current_total - self.last_total} new submissions")
112
+ self.last_total = current_total
113
+ return current_total, data, poll_url
114
+ self.last_total = current_total
115
+ except Exception as e:
116
+ print(f"Error in poll: {str(e)}")
117
+
118
+ await asyncio.sleep(poll_interval / 1000)
119
+
120
+ async def _retrieve_batch(self, session, poll_url, from_positions, quiet):
121
+ """Retrieve a batch of submissions concurrently."""
122
+ tasks = [
123
+ self._fetch_json(
124
+ session,
125
+ f"{poll_url}&from={pos}"
126
+ )
127
+ for pos in from_positions
128
+ ]
129
+
130
+ results = await asyncio.gather(*tasks, return_exceptions=True)
131
+ submissions = []
132
+
133
+ for result in results:
134
+ if isinstance(result, Exception):
135
+ print(f"Error in batch: {str(result)}")
136
+ continue
137
+ if result and 'hits' in result:
138
+ submissions.extend(result['hits']['hits'])
139
+
140
+ return submissions
141
+
142
+ async def _retrieve(self, poll_url, initial_data, session, quiet):
143
+ """Retrieve all submissions using parallel batch processing."""
144
+ batch_size = 10 # Number of concurrent requests
145
+ page_size = 100 # Results per request
146
+ max_position = min(self.max_hits, self.last_total)
147
+ submissions = []
148
+
149
+ # Process in batches of concurrent requests
150
+ for batch_start in range(0, max_position, batch_size * page_size):
151
+ from_positions = [
152
+ pos for pos in range(
153
+ batch_start,
154
+ min(batch_start + batch_size * page_size, max_position),
155
+ page_size
156
+ )
157
+ ]
158
+
159
+ if not quiet:
160
+ print(f"Retrieving batch from positions: {from_positions}")
161
+
162
+ batch_submissions = await self._retrieve_batch(
163
+ session, poll_url, from_positions, quiet
164
+ )
165
+
166
+ if not batch_submissions:
167
+ break
168
+
169
+ submissions.extend(batch_submissions)
170
+
171
+ # If we got fewer results than expected, we're done
172
+ if len(batch_submissions) < len(from_positions) * page_size:
173
+ break
174
+
175
+ return submissions
176
+
177
+ async def _monitor(self, callback, form=None, cik=None, ticker=None, poll_interval=1000, quiet=True):
178
+ """Main monitoring loop with parallel processing."""
179
+ if poll_interval < 100:
180
+ raise ValueError("SEC rate limit is 10 requests per second, set poll_interval to 100ms or higher")
181
+
182
+ # Handle form parameter
183
+ if form is None:
184
+ form = ['-0']
185
+ elif isinstance(form, str):
186
+ form = [form]
187
+
188
+ # Handle CIK/ticker parameter
189
+ cik_param = None
190
+ if ticker is not None:
191
+ cik_param = identifier_to_cik(ticker)
192
+ elif cik is not None:
193
+ cik_param = cik if isinstance(cik, list) else [cik]
194
+
195
+ # Construct base URL
196
+ base_url = 'https://efts.sec.gov/LATEST/search-index?forms=' + ','.join(form)
197
+
198
+ # Add CIK parameter if specified
199
+ if cik_param:
200
+ cik_list = ','.join(str(c).zfill(10) for c in cik_param)
201
+ base_url += f"&ciks={cik_list}"
202
+
203
+ async with aiohttp.ClientSession(headers=self.headers) as session:
204
+ while True:
205
+ try:
206
+ # Poll until we find new submissions
207
+ _, data, poll_url = await self._poll(base_url, session, poll_interval, quiet)
208
+
209
+ # Retrieve all submissions in parallel
210
+ submissions = await self._retrieve(poll_url, data, session, quiet)
211
+
212
+ # Find new submissions
213
+ existing_ids = {sub['_id'] for sub in self.submissions}
214
+ new_submissions = [
215
+ sub for sub in submissions
216
+ if sub['_id'] not in existing_ids
217
+ ]
218
+
219
+ if new_submissions:
220
+ self.submissions.extend(new_submissions)
221
+ if callback:
222
+ await callback(new_submissions)
223
+
224
+ reqs_per_sec, mb_per_sec = self.rate_monitor.get_current_rates()
225
+ if not quiet:
226
+ print(f"Current rates: {reqs_per_sec} req/s, {mb_per_sec} MB/s")
227
+
228
+ except Exception as e:
229
+ print(f"Error in monitor: {str(e)}")
230
+ await asyncio.sleep(poll_interval / 1000)
231
+
232
+ await asyncio.sleep(poll_interval / 1000)
233
+
234
+ def monitor_submissions(self, callback=None, form=None, cik=None, ticker=None, poll_interval=1000, quiet=True):
235
+ """Start the monitoring process."""
236
+ asyncio.run(self._monitor(callback, form, cik, ticker, poll_interval, quiet))
@@ -0,0 +1 @@
1
+ from .mulebot import MuleBot
@@ -0,0 +1,35 @@
1
+ import requests
2
+ from datamule.global_vars import headers
3
+ from datamule.helper import identifier_to_cik
4
+ from datamule import Parser
5
+
6
+ parser = Parser()
7
+
8
+ def get_company_concept(ticker):
9
+
10
+ cik = identifier_to_cik(ticker)[0]
11
+ url = f'https://data.sec.gov/api/xbrl/companyfacts/CIK{str(cik).zfill(10)}.json'
12
+ response = requests.get(url,headers=headers)
13
+ data = response.json()
14
+
15
+ table_dict_list = parser.parse_company_concepts(data)
16
+
17
+ # drop tables where label is None
18
+ table_dict_list = [table_dict for table_dict in table_dict_list if table_dict['label'] is not None]
19
+
20
+ return table_dict_list
21
+
22
+ def select_dict_by_title(data, title):
23
+ if isinstance(data, dict):
24
+ if data.get('title') == title:
25
+ return data
26
+ for value in data.values():
27
+ result = select_dict_by_title(value, title)
28
+ if result:
29
+ return result
30
+ elif isinstance(data, list):
31
+ for item in data:
32
+ result = select_dict_by_title(item, title)
33
+ if result:
34
+ return result
35
+ return None
@@ -0,0 +1,130 @@
1
+ import openai
2
+ import json
3
+
4
+ from datamule.helper import identifier_to_cik
5
+ from datamule import Downloader, Parser
6
+ from .search import search_filing
7
+ from .tools import tools, return_title_tool
8
+ from .helper import get_company_concept, select_dict_by_title
9
+
10
+ downloader = Downloader()
11
+ parser = Parser()
12
+
13
+
14
+ class MuleBot:
15
+ def __init__(self, api_key):
16
+ self.client = openai.OpenAI(api_key=api_key)
17
+ self.messages = [
18
+ {"role": "system", "content": "You are a helpful, but concise, assistant to assist with questions related to the Securities and Exchanges Commission. You are allowed to guess tickers."}
19
+ ]
20
+ self.total_tokens = 0
21
+
22
+ def process_message(self, user_input):
23
+
24
+ new_message_chain = self.messages
25
+ new_message_chain.append({"role": "user", "content": user_input})
26
+
27
+ try:
28
+ response = self.client.chat.completions.create(
29
+ model="gpt-4o-mini",
30
+ messages=new_message_chain,
31
+ tools=tools,
32
+ tool_choice="auto"
33
+ )
34
+
35
+ self.total_tokens += response.usage.total_tokens
36
+ assistant_message = response.choices[0].message
37
+
38
+ if assistant_message.content is None:
39
+ assistant_message.content = "I'm processing your request."
40
+
41
+ new_message_chain.append({"role": "assistant", "content": assistant_message.content})
42
+
43
+ tool_calls = assistant_message.tool_calls
44
+ if tool_calls is None:
45
+ return {'key':'text','value':assistant_message.content}
46
+ else:
47
+ for tool_call in tool_calls:
48
+ print(f"Tool call: {tool_call.function.name}")
49
+ if tool_call.function.name == "identifier_to_cik":
50
+ function_args = json.loads(tool_call.function.arguments)
51
+ print(f"Function args: {function_args}")
52
+
53
+ cik = identifier_to_cik(function_args["ticker"])
54
+ return {'key':'text','value':cik}
55
+ elif tool_call.function.name == "get_company_concept":
56
+ function_args = json.loads(tool_call.function.arguments)
57
+ print(f"Function args: {function_args}")
58
+ table_dict_list = get_company_concept(function_args["ticker"])
59
+ return {'key':'table','value':table_dict_list}
60
+ elif tool_call.function.name == "get_filing_urls":
61
+ function_args = json.loads(tool_call.function.arguments)
62
+ print(f"Function args: {function_args}")
63
+ result = downloader.download(**function_args,return_urls=True)
64
+ return {'key':'list','value':result}
65
+ elif tool_call.function.name == "find_filing_section_by_title":
66
+ function_args = json.loads(tool_call.function.arguments)
67
+ print(f"Function args: {function_args}")
68
+ # Parse the filing
69
+ data = parser.parse_filing(function_args["url"])
70
+
71
+ # find possible matches
72
+ section_dicts = search_filing(query = function_args["title"], nested_dict =data, score_cutoff=0.3)
73
+
74
+ # feed titles back to assistant
75
+ titles = [section['title'] for section in section_dicts]
76
+ new_message_chain.append({"role": "assistant", "content": f"Which of these titles is closest: {','.join(titles)}"})
77
+
78
+ title_response = self.client.chat.completions.create(
79
+ model="gpt-4o-mini",
80
+ messages=new_message_chain,
81
+ tools=[return_title_tool],
82
+ tool_choice="required"
83
+ )
84
+
85
+ title_tool_call = title_response.choices[0].message.tool_calls[0]
86
+ title = json.loads(title_tool_call.function.arguments)['title']
87
+ print(f"Selected title: {title}")
88
+ #print(f"Possible titles: {titles}")
89
+
90
+ # select the section
91
+ #section_dict = select_dict_by_title(data, title)
92
+
93
+ # probably want to return full dict, and section label
94
+ return {'key':'filing','value':{'data':data,'title':title}}
95
+
96
+ return {'key':'text','value':'No tool call was made.'}
97
+
98
+ except Exception as e:
99
+ return f"An error occurred: {str(e)}"
100
+
101
+ def get_total_tokens(self):
102
+ return self.total_tokens
103
+
104
+ def run(self):
105
+ """Basic chatbot loop"""
106
+ print("MuleBot: Hello! I'm here to assist you with questions related to the Securities and Exchange Commission. Type 'quit', 'exit', or 'bye' to end the conversation.")
107
+ while True:
108
+ user_input = input("You: ")
109
+ if user_input.lower() in ['quit', 'exit', 'bye']:
110
+ print("MuleBot: Goodbye!")
111
+ break
112
+
113
+ response = self.process_message(user_input)
114
+ response_type = response['key']
115
+
116
+ if response_type == 'text':
117
+ value = response['value']
118
+ print(value)
119
+ elif response_type == 'table':
120
+ value = response['value']
121
+ print(value)
122
+ elif response_type == 'list':
123
+ value = response['value']
124
+ print(value)
125
+ elif response_type == 'filing':
126
+ value = response['value']
127
+ print(value)
128
+ else:
129
+ value = response['value']
130
+ print(value)
@@ -0,0 +1 @@
1
+ from .server import MuleBotServer
@@ -0,0 +1,87 @@
1
+ import os
2
+ from flask import Flask, request, jsonify, render_template
3
+ from datamule.mulebot import MuleBot
4
+ from datamule.filing_viewer import create_interactive_filing, create_valid_id
5
+
6
+ class MuleBotServer:
7
+ def __init__(self, template='chat-minimalist.html'):
8
+ template_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'templates'))
9
+ static_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'static'))
10
+ self.app = Flask(__name__, template_folder=template_dir, static_folder=static_dir)
11
+ self.mulebot = None
12
+ self.template = template
13
+ self.setup_routes()
14
+
15
+ def setup_routes(self):
16
+ @self.app.route('/')
17
+ def home():
18
+ return render_template(self.template)
19
+
20
+ @self.app.route('/chat-with-prompt')
21
+ def chat_with_prompt():
22
+ prefilled_prompt = request.args.get('prompt', '')
23
+ return render_template(self.template, prefilled_prompt=prefilled_prompt)
24
+
25
+ @self.app.route('/chat', methods=['POST'])
26
+ def chat():
27
+ user_input = request.json['message']
28
+
29
+ # Process the message using MuleBot's process_message method
30
+ response = self.mulebot.process_message(user_input)
31
+ response_type = response['key']
32
+
33
+ # Prepare the response based on the type
34
+ if response_type == 'text':
35
+ # If response type is text, add it to the chat
36
+ chat_response = {
37
+ 'type': 'text',
38
+ 'content': response['value']
39
+ }
40
+ elif response_type == 'table':
41
+ # If response type is table, prepare it for the artifact window
42
+ chat_response = {
43
+ 'type': 'artifact',
44
+ 'content': response['value'],
45
+ 'artifact_type': 'artifact-table'
46
+ }
47
+ elif response_type == 'list':
48
+ chat_response = {
49
+ 'type': 'artifact',
50
+ 'content': response['value'],
51
+ 'artifact_type': 'artifact-list'
52
+ }
53
+ elif response_type == 'filing':
54
+ data = response['value']['data']
55
+ title = response['value']['title']
56
+ section_id = create_valid_id(title)
57
+
58
+ # create a filing viewer display
59
+ html = create_interactive_filing(data)
60
+
61
+ # we'll need to display the filing viewer in the artifact window, with a json export option
62
+ chat_response = {
63
+ 'type': 'artifact',
64
+ 'content': html,
65
+ 'data': data,
66
+ 'section_id': section_id,
67
+ 'artifact_type': 'artifact-filing'
68
+ }
69
+ else:
70
+ # Handle other types of responses if needed
71
+ chat_response = {
72
+ 'type': 'unknown',
73
+ 'content': 'Unsupported response type'
74
+ }
75
+
76
+ return jsonify({
77
+ 'response': chat_response,
78
+ 'total_tokens': self.mulebot.get_total_tokens()
79
+ })
80
+
81
+ def set_api_key(self, api_key):
82
+ self.mulebot = MuleBot(api_key)
83
+
84
+ def run(self, debug=False, host='0.0.0.0', port=5000):
85
+ if not self.mulebot:
86
+ raise ValueError("API key not set. Please call set_api_key() before running the server.")
87
+ self.app.run(debug=debug, host=host, port=port)