datamule 0.381__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamule/__init__.py +46 -86
- datamule/book/book.py +34 -0
- datamule/book/eftsquery.py +127 -0
- datamule/book/xbrl_retriever.py +88 -0
- datamule/config.py +29 -0
- datamule/data/company_former_names.csv +8148 -8148
- datamule/data/company_metadata.csv +10049 -10049
- datamule/data/company_tickers.csv +9999 -10168
- datamule/data/sec-glossary.csv +728 -728
- datamule/data/xbrl_descriptions.csv +10024 -10024
- datamule/document.py +279 -0
- datamule/downloader/downloader.py +374 -0
- datamule/downloader/premiumdownloader.py +335 -0
- datamule/helper.py +123 -136
- datamule/mapping_dicts/txt_mapping_dicts.py +232 -0
- datamule/mapping_dicts/xml_mapping_dicts.py +19 -0
- datamule/monitor.py +238 -0
- datamule/mulebot/__init__.py +1 -1
- datamule/mulebot/helper.py +34 -34
- datamule/mulebot/mulebot.py +129 -129
- datamule/mulebot/mulebot_server/server.py +86 -86
- datamule/mulebot/mulebot_server/static/css/minimalist.css +173 -173
- datamule/mulebot/mulebot_server/static/scripts/artifacts.js +67 -67
- datamule/mulebot/mulebot_server/static/scripts/chat.js +91 -91
- datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +55 -55
- datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +14 -14
- datamule/mulebot/mulebot_server/static/scripts/main.js +56 -56
- datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +26 -26
- datamule/mulebot/mulebot_server/static/scripts/suggestions.js +46 -46
- datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +128 -128
- datamule/mulebot/mulebot_server/static/scripts/utils.js +27 -27
- datamule/mulebot/mulebot_server/templates/chat-minimalist.html +90 -90
- datamule/mulebot/search.py +51 -51
- datamule/mulebot/tools.py +82 -82
- datamule/packageupdater.py +207 -0
- datamule/portfolio.py +106 -0
- datamule/submission.py +76 -0
- datamule-1.0.2.dist-info/METADATA +27 -0
- datamule-1.0.2.dist-info/RECORD +43 -0
- {datamule-0.381.dist-info → datamule-1.0.2.dist-info}/WHEEL +1 -1
- datamule/data/filing_types.csv +0 -485
- datamule/data/ftd_locations.csv +0 -388
- datamule/datamule_api.py +0 -21
- datamule/dataset_builder/_init.py +0 -1
- datamule/dataset_builder/dataset_builder.py +0 -260
- datamule/downloader/dropbox_downloader.py +0 -225
- datamule/downloader/ftd.py +0 -216
- datamule/downloader/information_table_13f.py +0 -231
- datamule/downloader/sec_downloader.py +0 -635
- datamule/filing_viewer/__init__.py +0 -1
- datamule/filing_viewer/filing_viewer.py +0 -256
- datamule/global_vars.py +0 -202
- datamule/parser/__init__.py +0 -1
- datamule/parser/basic_10k_parser.py +0 -82
- datamule/parser/basic_10q_parser.py +0 -73
- datamule/parser/basic_13d_parser.py +0 -58
- datamule/parser/basic_13g_parser.py +0 -61
- datamule/parser/basic_8k_parser.py +0 -84
- datamule/parser/company_concepts_parser.py +0 -0
- datamule/parser/form_d_parser.py +0 -70
- datamule/parser/generalized_item_parser.py +0 -78
- datamule/parser/generalized_xml_parser.py +0 -0
- datamule/parser/helper.py +0 -75
- datamule/parser/information_table_parser_13fhr.py +0 -41
- datamule/parser/insider_trading_parser.py +0 -158
- datamule/parser/mappings.py +0 -95
- datamule/parser/n_port_p_parser.py +0 -70
- datamule/parser/sec_parser.py +0 -79
- datamule/parser/sgml_parser.py +0 -180
- datamule/sec_filing.py +0 -126
- datamule/sec_search.py +0 -20
- datamule-0.381.dist-info/METADATA +0 -132
- datamule-0.381.dist-info/RECORD +0 -61
- /datamule/{downloader → book}/__init__.py +0 -0
- {datamule-0.381.dist-info → datamule-1.0.2.dist-info}/top_level.txt +0 -0
@@ -1,129 +1,129 @@
|
|
1
|
-
// tableArtifacts.js
|
2
|
-
import { renderMetadata, downloadCSV } from './utils.js';
|
3
|
-
import { allArtifacts } from './artifacts.js';
|
4
|
-
|
5
|
-
export function renderTableArtifact(tableData) {
|
6
|
-
let html = `
|
7
|
-
<div class="mb-3 select-wrapper">
|
8
|
-
<input type="text" id="artifact-select" class="form-control" placeholder="Select a table..." value="${tableData.fact}">
|
9
|
-
<div id="autocomplete-list" class="autocomplete-items"></div>
|
10
|
-
</div>
|
11
|
-
<div class="mb-3">
|
12
|
-
<button id="download-csv" class="btn btn-secondary me-2">Download Selected Table (CSV)</button>
|
13
|
-
<button id="download-all-zip" class="btn btn-secondary">Download All Tables (ZIP)</button>
|
14
|
-
</div>
|
15
|
-
<div id="metadata-content"></div>
|
16
|
-
`;
|
17
|
-
|
18
|
-
if (tableData.table && tableData.table.length > 0) {
|
19
|
-
html += '<table class="table table-striped mt-3"><thead><tr>';
|
20
|
-
Object.keys(tableData.table[0]).forEach(header => {
|
21
|
-
html += `<th>${header}</th>`;
|
22
|
-
});
|
23
|
-
html += '</tr></thead><tbody>';
|
24
|
-
tableData.table.forEach(row => {
|
25
|
-
html += '<tr>';
|
26
|
-
Object.values(row).forEach(cell => {
|
27
|
-
html += `<td>${cell}</td>`;
|
28
|
-
});
|
29
|
-
html += '</tr>';
|
30
|
-
});
|
31
|
-
html += '</tbody></table>';
|
32
|
-
} else {
|
33
|
-
html += '<p>No table data available.</p>';
|
34
|
-
}
|
35
|
-
|
36
|
-
const artifactContent = document.getElementById('artifact-content');
|
37
|
-
if (artifactContent) {
|
38
|
-
artifactContent.innerHTML = html;
|
39
|
-
}
|
40
|
-
renderMetadata(tableData);
|
41
|
-
setupTableEventListeners(tableData);
|
42
|
-
}
|
43
|
-
|
44
|
-
function setupTableEventListeners(tableData) {
|
45
|
-
const artifactSelect = document.getElementById('artifact-select');
|
46
|
-
const downloadCsvBtn = document.getElementById('download-csv');
|
47
|
-
const downloadAllZipBtn = document.getElementById('download-all-zip');
|
48
|
-
|
49
|
-
if (artifactSelect) {
|
50
|
-
artifactSelect.addEventListener('input', handleArtifactSelectInput);
|
51
|
-
artifactSelect.addEventListener('focus', handleArtifactSelectFocus);
|
52
|
-
}
|
53
|
-
|
54
|
-
if (downloadCsvBtn) {
|
55
|
-
downloadCsvBtn.addEventListener('click', () => downloadCSV(tableData));
|
56
|
-
}
|
57
|
-
|
58
|
-
if (downloadAllZipBtn) {
|
59
|
-
downloadAllZipBtn.addEventListener('click', handleDownloadAllZip);
|
60
|
-
}
|
61
|
-
}
|
62
|
-
|
63
|
-
export function handleArtifactSelectInput(e) {
|
64
|
-
const inputValue = e.target.value.toLowerCase().trim();
|
65
|
-
const filteredTables = allArtifacts.filter(table =>
|
66
|
-
table.fact.toLowerCase().includes(inputValue)
|
67
|
-
);
|
68
|
-
createAutocompleteList(filteredTables);
|
69
|
-
}
|
70
|
-
|
71
|
-
export function handleArtifactSelectFocus() {
|
72
|
-
createAutocompleteList(allArtifacts);
|
73
|
-
}
|
74
|
-
|
75
|
-
function createAutocompleteList(tables) {
|
76
|
-
const autocompleteList = document.getElementById('autocomplete-list');
|
77
|
-
if (!autocompleteList) return;
|
78
|
-
|
79
|
-
autocompleteList.innerHTML = '';
|
80
|
-
autocompleteList.style.display = 'block';
|
81
|
-
|
82
|
-
if (tables.length === 0) {
|
83
|
-
autocompleteList.innerHTML = '<div style="color: #999;">No matching tables found</div>';
|
84
|
-
} else {
|
85
|
-
tables.forEach(table => {
|
86
|
-
const div = document.createElement("div");
|
87
|
-
div.textContent = table.fact;
|
88
|
-
div.addEventListener("click", function () {
|
89
|
-
document.getElementById('artifact-select').value = this.textContent;
|
90
|
-
renderTableArtifact(table);
|
91
|
-
closeAutocompleteList();
|
92
|
-
});
|
93
|
-
autocompleteList.appendChild(div);
|
94
|
-
});
|
95
|
-
}
|
96
|
-
}
|
97
|
-
|
98
|
-
export function handleDocumentClick(e) {
|
99
|
-
if (e.target.id !== 'artifact-select') {
|
100
|
-
closeAutocompleteList();
|
101
|
-
}
|
102
|
-
}
|
103
|
-
|
104
|
-
function closeAutocompleteList() {
|
105
|
-
const autocompleteList = document.getElementById('autocomplete-list');
|
106
|
-
if (autocompleteList) {
|
107
|
-
autocompleteList.style.display = 'none';
|
108
|
-
}
|
109
|
-
}
|
110
|
-
|
111
|
-
async function handleDownloadAllZip() {
|
112
|
-
const tables = allArtifacts.filter(artifact => artifact.type === 'artifact-table');
|
113
|
-
if (tables.length === 0) {
|
114
|
-
alert('No tables available to download.');
|
115
|
-
return;
|
116
|
-
}
|
117
|
-
const zip = new JSZip();
|
118
|
-
tables.forEach(table => {
|
119
|
-
let csv = '';
|
120
|
-
const headers = Object.keys(table.table[0]);
|
121
|
-
csv += headers.join(',') + '\n';
|
122
|
-
table.table.forEach(row => {
|
123
|
-
csv += Object.values(row).join(',') + '\n';
|
124
|
-
});
|
125
|
-
zip.file(`${table.fact}.csv`, csv);
|
126
|
-
});
|
127
|
-
const content = await zip.generateAsync({ type: "blob" });
|
128
|
-
saveAs(content, "all_tables.zip");
|
1
|
+
// tableArtifacts.js
|
2
|
+
import { renderMetadata, downloadCSV } from './utils.js';
|
3
|
+
import { allArtifacts } from './artifacts.js';
|
4
|
+
|
5
|
+
export function renderTableArtifact(tableData) {
|
6
|
+
let html = `
|
7
|
+
<div class="mb-3 select-wrapper">
|
8
|
+
<input type="text" id="artifact-select" class="form-control" placeholder="Select a table..." value="${tableData.fact}">
|
9
|
+
<div id="autocomplete-list" class="autocomplete-items"></div>
|
10
|
+
</div>
|
11
|
+
<div class="mb-3">
|
12
|
+
<button id="download-csv" class="btn btn-secondary me-2">Download Selected Table (CSV)</button>
|
13
|
+
<button id="download-all-zip" class="btn btn-secondary">Download All Tables (ZIP)</button>
|
14
|
+
</div>
|
15
|
+
<div id="metadata-content"></div>
|
16
|
+
`;
|
17
|
+
|
18
|
+
if (tableData.table && tableData.table.length > 0) {
|
19
|
+
html += '<table class="table table-striped mt-3"><thead><tr>';
|
20
|
+
Object.keys(tableData.table[0]).forEach(header => {
|
21
|
+
html += `<th>${header}</th>`;
|
22
|
+
});
|
23
|
+
html += '</tr></thead><tbody>';
|
24
|
+
tableData.table.forEach(row => {
|
25
|
+
html += '<tr>';
|
26
|
+
Object.values(row).forEach(cell => {
|
27
|
+
html += `<td>${cell}</td>`;
|
28
|
+
});
|
29
|
+
html += '</tr>';
|
30
|
+
});
|
31
|
+
html += '</tbody></table>';
|
32
|
+
} else {
|
33
|
+
html += '<p>No table data available.</p>';
|
34
|
+
}
|
35
|
+
|
36
|
+
const artifactContent = document.getElementById('artifact-content');
|
37
|
+
if (artifactContent) {
|
38
|
+
artifactContent.innerHTML = html;
|
39
|
+
}
|
40
|
+
renderMetadata(tableData);
|
41
|
+
setupTableEventListeners(tableData);
|
42
|
+
}
|
43
|
+
|
44
|
+
function setupTableEventListeners(tableData) {
|
45
|
+
const artifactSelect = document.getElementById('artifact-select');
|
46
|
+
const downloadCsvBtn = document.getElementById('download-csv');
|
47
|
+
const downloadAllZipBtn = document.getElementById('download-all-zip');
|
48
|
+
|
49
|
+
if (artifactSelect) {
|
50
|
+
artifactSelect.addEventListener('input', handleArtifactSelectInput);
|
51
|
+
artifactSelect.addEventListener('focus', handleArtifactSelectFocus);
|
52
|
+
}
|
53
|
+
|
54
|
+
if (downloadCsvBtn) {
|
55
|
+
downloadCsvBtn.addEventListener('click', () => downloadCSV(tableData));
|
56
|
+
}
|
57
|
+
|
58
|
+
if (downloadAllZipBtn) {
|
59
|
+
downloadAllZipBtn.addEventListener('click', handleDownloadAllZip);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
export function handleArtifactSelectInput(e) {
|
64
|
+
const inputValue = e.target.value.toLowerCase().trim();
|
65
|
+
const filteredTables = allArtifacts.filter(table =>
|
66
|
+
table.fact.toLowerCase().includes(inputValue)
|
67
|
+
);
|
68
|
+
createAutocompleteList(filteredTables);
|
69
|
+
}
|
70
|
+
|
71
|
+
export function handleArtifactSelectFocus() {
|
72
|
+
createAutocompleteList(allArtifacts);
|
73
|
+
}
|
74
|
+
|
75
|
+
function createAutocompleteList(tables) {
|
76
|
+
const autocompleteList = document.getElementById('autocomplete-list');
|
77
|
+
if (!autocompleteList) return;
|
78
|
+
|
79
|
+
autocompleteList.innerHTML = '';
|
80
|
+
autocompleteList.style.display = 'block';
|
81
|
+
|
82
|
+
if (tables.length === 0) {
|
83
|
+
autocompleteList.innerHTML = '<div style="color: #999;">No matching tables found</div>';
|
84
|
+
} else {
|
85
|
+
tables.forEach(table => {
|
86
|
+
const div = document.createElement("div");
|
87
|
+
div.textContent = table.fact;
|
88
|
+
div.addEventListener("click", function () {
|
89
|
+
document.getElementById('artifact-select').value = this.textContent;
|
90
|
+
renderTableArtifact(table);
|
91
|
+
closeAutocompleteList();
|
92
|
+
});
|
93
|
+
autocompleteList.appendChild(div);
|
94
|
+
});
|
95
|
+
}
|
96
|
+
}
|
97
|
+
|
98
|
+
export function handleDocumentClick(e) {
|
99
|
+
if (e.target.id !== 'artifact-select') {
|
100
|
+
closeAutocompleteList();
|
101
|
+
}
|
102
|
+
}
|
103
|
+
|
104
|
+
function closeAutocompleteList() {
|
105
|
+
const autocompleteList = document.getElementById('autocomplete-list');
|
106
|
+
if (autocompleteList) {
|
107
|
+
autocompleteList.style.display = 'none';
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
async function handleDownloadAllZip() {
|
112
|
+
const tables = allArtifacts.filter(artifact => artifact.type === 'artifact-table');
|
113
|
+
if (tables.length === 0) {
|
114
|
+
alert('No tables available to download.');
|
115
|
+
return;
|
116
|
+
}
|
117
|
+
const zip = new JSZip();
|
118
|
+
tables.forEach(table => {
|
119
|
+
let csv = '';
|
120
|
+
const headers = Object.keys(table.table[0]);
|
121
|
+
csv += headers.join(',') + '\n';
|
122
|
+
table.table.forEach(row => {
|
123
|
+
csv += Object.values(row).join(',') + '\n';
|
124
|
+
});
|
125
|
+
zip.file(`${table.fact}.csv`, csv);
|
126
|
+
});
|
127
|
+
const content = await zip.generateAsync({ type: "blob" });
|
128
|
+
saveAs(content, "all_tables.zip");
|
129
129
|
}
|
@@ -1,28 +1,28 @@
|
|
1
|
-
// utils.js
|
2
|
-
export function renderMetadata(artifactData) {
|
3
|
-
const metadataHtml = `
|
4
|
-
<div class="card mb-3">
|
5
|
-
<div class="card-body">
|
6
|
-
<h5 class="card-title">Metadata</h5>
|
7
|
-
<p><strong>Fact:</strong> ${artifactData.fact}</p>
|
8
|
-
${artifactData.cik ? `<p><strong>CIK:</strong> ${artifactData.cik}</p>` : ''}
|
9
|
-
${artifactData.category ? `<p><strong>Category:</strong> ${artifactData.category}</p>` : ''}
|
10
|
-
${artifactData.label ? `<p><strong>Label:</strong> ${artifactData.label}</p>` : ''}
|
11
|
-
${artifactData.description ? `<p><strong>Description:</strong> ${artifactData.description}</p>` : ''}
|
12
|
-
${artifactData.unit ? `<p><strong>Unit:</strong> ${artifactData.unit}</p>` : ''}
|
13
|
-
</div>
|
14
|
-
</div>
|
15
|
-
`;
|
16
|
-
document.getElementById('metadata-content').innerHTML = metadataHtml;
|
17
|
-
}
|
18
|
-
|
19
|
-
export function downloadCSV(table) {
|
20
|
-
let csv = '';
|
21
|
-
const headers = Object.keys(table.table[0]);
|
22
|
-
csv += headers.join(',') + '\n';
|
23
|
-
table.table.forEach(row => {
|
24
|
-
csv += Object.values(row).join(',') + '\n';
|
25
|
-
});
|
26
|
-
const blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
|
27
|
-
saveAs(blob, `${table.fact}.csv`);
|
1
|
+
// utils.js
|
2
|
+
export function renderMetadata(artifactData) {
|
3
|
+
const metadataHtml = `
|
4
|
+
<div class="card mb-3">
|
5
|
+
<div class="card-body">
|
6
|
+
<h5 class="card-title">Metadata</h5>
|
7
|
+
<p><strong>Fact:</strong> ${artifactData.fact}</p>
|
8
|
+
${artifactData.cik ? `<p><strong>CIK:</strong> ${artifactData.cik}</p>` : ''}
|
9
|
+
${artifactData.category ? `<p><strong>Category:</strong> ${artifactData.category}</p>` : ''}
|
10
|
+
${artifactData.label ? `<p><strong>Label:</strong> ${artifactData.label}</p>` : ''}
|
11
|
+
${artifactData.description ? `<p><strong>Description:</strong> ${artifactData.description}</p>` : ''}
|
12
|
+
${artifactData.unit ? `<p><strong>Unit:</strong> ${artifactData.unit}</p>` : ''}
|
13
|
+
</div>
|
14
|
+
</div>
|
15
|
+
`;
|
16
|
+
document.getElementById('metadata-content').innerHTML = metadataHtml;
|
17
|
+
}
|
18
|
+
|
19
|
+
export function downloadCSV(table) {
|
20
|
+
let csv = '';
|
21
|
+
const headers = Object.keys(table.table[0]);
|
22
|
+
csv += headers.join(',') + '\n';
|
23
|
+
table.table.forEach(row => {
|
24
|
+
csv += Object.values(row).join(',') + '\n';
|
25
|
+
});
|
26
|
+
const blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
|
27
|
+
saveAs(blob, `${table.fact}.csv`);
|
28
28
|
}
|
@@ -1,91 +1,91 @@
|
|
1
|
-
<!DOCTYPE html>
|
2
|
-
<html lang="en">
|
3
|
-
|
4
|
-
<head>
|
5
|
-
<meta charset="UTF-8">
|
6
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7
|
-
<title>Mulebot</title>
|
8
|
-
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.1.3/css/bootstrap.min.css" rel="stylesheet">
|
9
|
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js"></script>
|
10
|
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js"></script>
|
11
|
-
<link rel="stylesheet" href="{{ url_for('static', filename='css/minimalist.css') }}">
|
12
|
-
</head>
|
13
|
-
|
14
|
-
<body>
|
15
|
-
<div class="container mt-5">
|
16
|
-
<div class="d-flex justify-content-between align-items-center mb-4">
|
17
|
-
<h1>Mulebot</h1>
|
18
|
-
<div>
|
19
|
-
<a href="https://github.com/john-friedman/datamule-python" target="_blank" class="social-btn">
|
20
|
-
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
|
21
|
-
class="bi bi-github" viewBox="0 0 16 16">
|
22
|
-
<path
|
23
|
-
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z" />
|
24
|
-
</svg>
|
25
|
-
GitHub
|
26
|
-
</a>
|
27
|
-
<a href="https://datamule.xyz" target="_blank" class="social-btn">
|
28
|
-
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
|
29
|
-
viewBox="0 0 16 16">
|
30
|
-
<path
|
31
|
-
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855-.143.268-.276.56-.395.872.705.157 1.472.257 2.282.287V1.077zM4.249 3.539c.142-.384.304-.744.481-1.078a6.7 6.7 0 0 1 .597-.933A7.01 7.01 0 0 0 3.051 3.05c.362.184.763.349 1.198.49zM3.509 7.5c.036-1.07.188-2.087.436-3.008a9.124 9.124 0 0 1-1.565-.667A6.964 6.964 0 0 0 1.018 7.5h2.49zm1.4-2.741a12.344 12.344 0 0 0-.4 2.741H7.5V5.091c-.91-.03-1.783-.145-2.591-.332zM8.5 5.09V7.5h2.99a12.342 12.342 0 0 0-.399-2.741c-.808.187-1.681.301-2.591.332zM4.51 8.5c.035.987.176 1.914.399 2.741A13.612 13.612 0 0 1 7.5 10.91V8.5H4.51zm3.99 0v2.409c.91.03 1.783.145 2.591.332.223-.827.364-1.754.4-2.741H8.5zm-3.282 3.696c.12.312.252.604.395.872.552 1.035 1.218 1.65 1.887 1.855V11.91c-.81.03-1.577.13-2.282.287zm.11 2.276a6.696 6.696 0 0 1-.598-.933 8.853 8.853 0 0 1-.481-1.079 8.38 8.38 0 0 0-1.198.49 7.01 7.01 0 0 0 2.276 1.522zm-1.383-2.964A13.36 13.36 0 0 1 3.508 8.5h-2.49a6.963 6.963 0 0 0 1.362 3.675c.47-.258.995-.482 1.565-.667zm6.728 2.964a7.009 7.009 0 0 0 2.275-1.521 8.376 8.376 0 0 0-1.197-.49 8.853 8.853 0 0 1-.481 1.078 6.688 6.688 0 0 1-.597.933zM8.5 11.909v3.014c.67-.204 1.335-.82 1.887-1.855.143-.268.276-.56.395-.872A12.63 12.63 0 0 0 8.5 11.91zm3.555-.401c.57.185 1.095.409 1.565.667A6.963 6.963 0 0 0 14.982 8.5h-2.49a13.36 13.36 0 0 1-.437 3.008zM14.982 7.5a6.963 6.963 0 0 0-1.362-3.675c-.47.258-.995.482-1.565.667.248.92.4 1.938.437 3.008h2.49zM11.27 2.461c.177.334.339.694.482 1.078a8.368 8.368 0 0 0 1.196-.49 7.01 7.01 0 0 0-2.275-1.52c.218.283.418.597.597.932zm-.488 1.343a7.765 7.765 0 0 0-.395-.872C9.835 1.897 9.17 1.282 8.5 1.077V4.09c.81-.03 1.577-.13 2.282-.287z" />
|
32
|
-
</svg>
|
33
|
-
DataMule Website
|
34
|
-
</a>
|
35
|
-
</div>
|
36
|
-
</div>
|
37
|
-
<div class="row">
|
38
|
-
<div class="col-md-6">
|
39
|
-
<div class="card mb-3">
|
40
|
-
<div class="card-body">
|
41
|
-
<div id="chat-outer-container">
|
42
|
-
<div id="chat-container" class="mb-3"></div>
|
43
|
-
<div id="thinking-indicator" class="thinking-indicator">
|
44
|
-
<span>Bot is thinking</span><span class="dot-animation">...</span>
|
45
|
-
</div>
|
46
|
-
</div>
|
47
|
-
<form id="chat-form">
|
48
|
-
<div class="input-group">
|
49
|
-
<input type="text" id="user-input" class="form-control"
|
50
|
-
placeholder="Type your message...">
|
51
|
-
<button type="submit" class="btn btn-primary">Send</button>
|
52
|
-
</div>
|
53
|
-
</form>
|
54
|
-
</div>
|
55
|
-
</div>
|
56
|
-
</div>
|
57
|
-
<div class="col-md-6">
|
58
|
-
<button id="toggle-artifacts" class="btn btn-secondary mb-3">Show Artifacts</button>
|
59
|
-
<div id="artifact-container" class="card mb-3" style="display: none;">
|
60
|
-
<div class="card-body">
|
61
|
-
<div id="artifact-content"></div>
|
62
|
-
</div>
|
63
|
-
</div>
|
64
|
-
<div class="suggestion-box">
|
65
|
-
<h5 class="mb-2">Suggested Commands:</h5>
|
66
|
-
<ul class="list-unstyled">
|
67
|
-
<li class="suggestion-item">Get all company facts for Tesla</li>
|
68
|
-
<li class="suggestion-item">Get Ford's CIK</li>
|
69
|
-
<li class="suggestion-item">Get all 10-K urls for META</li>
|
70
|
-
<li class="suggestion-item">Get the management discussion and analysis section from
|
71
|
-
https://www.sec.gov/Archives/edgar/data/1318605/000095017022000796/tsla-20211231.htm</li>
|
72
|
-
</ul>
|
73
|
-
</div>
|
74
|
-
</div>
|
75
|
-
</div>
|
76
|
-
</div>
|
77
|
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
|
78
|
-
|
79
|
-
<!-- Application scripts -->
|
80
|
-
<script type="module" src="{{ url_for('static', filename='scripts/utils.js') }}"></script>
|
81
|
-
<script type="module" src="{{ url_for('static', filename='scripts/chat.js') }}"></script>
|
82
|
-
<script type="module" src="{{ url_for('static', filename='scripts/listArtifacts.js') }}"></script>
|
83
|
-
<script type="module" src="{{ url_for('static', filename='scripts/filingArtifacts.js') }}"></script>
|
84
|
-
<script type="module" src="{{ url_for('static', filename='scripts/tableArtifacts.js') }}"></script>
|
85
|
-
<script type="module" src="{{ url_for('static', filename='scripts/artifacts.js') }}"></script>
|
86
|
-
<script type="module" src="{{ url_for('static', filename='scripts/suggestions.js') }}"></script>
|
87
|
-
<script type="module" src="{{ url_for('static', filename='scripts/prefilledPrompt.js') }}"></script>
|
88
|
-
<script type="module" src="{{ url_for('static', filename='scripts/main.js') }}"></script>
|
89
|
-
</body>
|
90
|
-
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang="en">
|
3
|
+
|
4
|
+
<head>
|
5
|
+
<meta charset="UTF-8">
|
6
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
7
|
+
<title>Mulebot</title>
|
8
|
+
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.1.3/css/bootstrap.min.css" rel="stylesheet">
|
9
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js"></script>
|
10
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.7.1/jszip.min.js"></script>
|
11
|
+
<link rel="stylesheet" href="{{ url_for('static', filename='css/minimalist.css') }}">
|
12
|
+
</head>
|
13
|
+
|
14
|
+
<body>
|
15
|
+
<div class="container mt-5">
|
16
|
+
<div class="d-flex justify-content-between align-items-center mb-4">
|
17
|
+
<h1>Mulebot</h1>
|
18
|
+
<div>
|
19
|
+
<a href="https://github.com/john-friedman/datamule-python" target="_blank" class="social-btn">
|
20
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
|
21
|
+
class="bi bi-github" viewBox="0 0 16 16">
|
22
|
+
<path
|
23
|
+
d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z" />
|
24
|
+
</svg>
|
25
|
+
GitHub
|
26
|
+
</a>
|
27
|
+
<a href="https://datamule.xyz" target="_blank" class="social-btn">
|
28
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor"
|
29
|
+
viewBox="0 0 16 16">
|
30
|
+
<path
|
31
|
+
d="M0 8a8 8 0 1 1 16 0A8 8 0 0 1 0 8zm7.5-6.923c-.67.204-1.335.82-1.887 1.855-.143.268-.276.56-.395.872.705.157 1.472.257 2.282.287V1.077zM4.249 3.539c.142-.384.304-.744.481-1.078a6.7 6.7 0 0 1 .597-.933A7.01 7.01 0 0 0 3.051 3.05c.362.184.763.349 1.198.49zM3.509 7.5c.036-1.07.188-2.087.436-3.008a9.124 9.124 0 0 1-1.565-.667A6.964 6.964 0 0 0 1.018 7.5h2.49zm1.4-2.741a12.344 12.344 0 0 0-.4 2.741H7.5V5.091c-.91-.03-1.783-.145-2.591-.332zM8.5 5.09V7.5h2.99a12.342 12.342 0 0 0-.399-2.741c-.808.187-1.681.301-2.591.332zM4.51 8.5c.035.987.176 1.914.399 2.741A13.612 13.612 0 0 1 7.5 10.91V8.5H4.51zm3.99 0v2.409c.91.03 1.783.145 2.591.332.223-.827.364-1.754.4-2.741H8.5zm-3.282 3.696c.12.312.252.604.395.872.552 1.035 1.218 1.65 1.887 1.855V11.91c-.81.03-1.577.13-2.282.287zm.11 2.276a6.696 6.696 0 0 1-.598-.933 8.853 8.853 0 0 1-.481-1.079 8.38 8.38 0 0 0-1.198.49 7.01 7.01 0 0 0 2.276 1.522zm-1.383-2.964A13.36 13.36 0 0 1 3.508 8.5h-2.49a6.963 6.963 0 0 0 1.362 3.675c.47-.258.995-.482 1.565-.667zm6.728 2.964a7.009 7.009 0 0 0 2.275-1.521 8.376 8.376 0 0 0-1.197-.49 8.853 8.853 0 0 1-.481 1.078 6.688 6.688 0 0 1-.597.933zM8.5 11.909v3.014c.67-.204 1.335-.82 1.887-1.855.143-.268.276-.56.395-.872A12.63 12.63 0 0 0 8.5 11.91zm3.555-.401c.57.185 1.095.409 1.565.667A6.963 6.963 0 0 0 14.982 8.5h-2.49a13.36 13.36 0 0 1-.437 3.008zM14.982 7.5a6.963 6.963 0 0 0-1.362-3.675c-.47.258-.995.482-1.565.667.248.92.4 1.938.437 3.008h2.49zM11.27 2.461c.177.334.339.694.482 1.078a8.368 8.368 0 0 0 1.196-.49 7.01 7.01 0 0 0-2.275-1.52c.218.283.418.597.597.932zm-.488 1.343a7.765 7.765 0 0 0-.395-.872C9.835 1.897 9.17 1.282 8.5 1.077V4.09c.81-.03 1.577-.13 2.282-.287z" />
|
32
|
+
</svg>
|
33
|
+
DataMule Website
|
34
|
+
</a>
|
35
|
+
</div>
|
36
|
+
</div>
|
37
|
+
<div class="row">
|
38
|
+
<div class="col-md-6">
|
39
|
+
<div class="card mb-3">
|
40
|
+
<div class="card-body">
|
41
|
+
<div id="chat-outer-container">
|
42
|
+
<div id="chat-container" class="mb-3"></div>
|
43
|
+
<div id="thinking-indicator" class="thinking-indicator">
|
44
|
+
<span>Bot is thinking</span><span class="dot-animation">...</span>
|
45
|
+
</div>
|
46
|
+
</div>
|
47
|
+
<form id="chat-form">
|
48
|
+
<div class="input-group">
|
49
|
+
<input type="text" id="user-input" class="form-control"
|
50
|
+
placeholder="Type your message...">
|
51
|
+
<button type="submit" class="btn btn-primary">Send</button>
|
52
|
+
</div>
|
53
|
+
</form>
|
54
|
+
</div>
|
55
|
+
</div>
|
56
|
+
</div>
|
57
|
+
<div class="col-md-6">
|
58
|
+
<button id="toggle-artifacts" class="btn btn-secondary mb-3">Show Artifacts</button>
|
59
|
+
<div id="artifact-container" class="card mb-3" style="display: none;">
|
60
|
+
<div class="card-body">
|
61
|
+
<div id="artifact-content"></div>
|
62
|
+
</div>
|
63
|
+
</div>
|
64
|
+
<div class="suggestion-box">
|
65
|
+
<h5 class="mb-2">Suggested Commands:</h5>
|
66
|
+
<ul class="list-unstyled">
|
67
|
+
<li class="suggestion-item">Get all company facts for Tesla</li>
|
68
|
+
<li class="suggestion-item">Get Ford's CIK</li>
|
69
|
+
<li class="suggestion-item">Get all 10-K urls for META</li>
|
70
|
+
<li class="suggestion-item">Get the management discussion and analysis section from
|
71
|
+
https://www.sec.gov/Archives/edgar/data/1318605/000095017022000796/tsla-20211231.htm</li>
|
72
|
+
</ul>
|
73
|
+
</div>
|
74
|
+
</div>
|
75
|
+
</div>
|
76
|
+
</div>
|
77
|
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
|
78
|
+
|
79
|
+
<!-- Application scripts -->
|
80
|
+
<script type="module" src="{{ url_for('static', filename='scripts/utils.js') }}"></script>
|
81
|
+
<script type="module" src="{{ url_for('static', filename='scripts/chat.js') }}"></script>
|
82
|
+
<script type="module" src="{{ url_for('static', filename='scripts/listArtifacts.js') }}"></script>
|
83
|
+
<script type="module" src="{{ url_for('static', filename='scripts/filingArtifacts.js') }}"></script>
|
84
|
+
<script type="module" src="{{ url_for('static', filename='scripts/tableArtifacts.js') }}"></script>
|
85
|
+
<script type="module" src="{{ url_for('static', filename='scripts/artifacts.js') }}"></script>
|
86
|
+
<script type="module" src="{{ url_for('static', filename='scripts/suggestions.js') }}"></script>
|
87
|
+
<script type="module" src="{{ url_for('static', filename='scripts/prefilledPrompt.js') }}"></script>
|
88
|
+
<script type="module" src="{{ url_for('static', filename='scripts/main.js') }}"></script>
|
89
|
+
</body>
|
90
|
+
|
91
91
|
</html>
|
datamule/mulebot/search.py
CHANGED
@@ -1,52 +1,52 @@
|
|
1
|
-
import difflib
|
2
|
-
from typing import Dict, List, Any
|
3
|
-
|
4
|
-
def search_filing(query: str, nested_dict: Dict[str, Any], max_matches: int = 20, score_cutoff: float = 0.6) -> List[Dict[str, Any]]:
|
5
|
-
max_matches = min(max_matches, 20)
|
6
|
-
query = query.lower() # Convert query to lowercase
|
7
|
-
|
8
|
-
def flatten_dict(d: Dict[str, Any], parent_path: List[str] = None) -> List[Dict[str, Any]]:
|
9
|
-
parent_path = parent_path or []
|
10
|
-
items = []
|
11
|
-
|
12
|
-
if isinstance(d, dict):
|
13
|
-
for k, v in d.items():
|
14
|
-
new_path = parent_path + [k]
|
15
|
-
if k == 'title' and isinstance(v, str):
|
16
|
-
items.append({'path': new_path, 'title': v, 'title_lower': v.lower()})
|
17
|
-
items.extend(flatten_dict(v, new_path))
|
18
|
-
elif isinstance(d, list):
|
19
|
-
for i, item in enumerate(d):
|
20
|
-
new_path = parent_path + [str(i)]
|
21
|
-
items.extend(flatten_dict(item, new_path))
|
22
|
-
|
23
|
-
return items
|
24
|
-
|
25
|
-
flat_list = flatten_dict(nested_dict)
|
26
|
-
all_titles_lower = [item['title_lower'] for item in flat_list]
|
27
|
-
|
28
|
-
matches = difflib.get_close_matches(query, all_titles_lower, n=max_matches, cutoff=score_cutoff)
|
29
|
-
|
30
|
-
results = []
|
31
|
-
for match in matches:
|
32
|
-
similarity = difflib.SequenceMatcher(None, query, match).ratio()
|
33
|
-
for item in flat_list:
|
34
|
-
if item['title_lower'] == match:
|
35
|
-
# Navigate to the correct nested dictionary
|
36
|
-
d = nested_dict
|
37
|
-
for key in item['path'][:-1]: # Exclude the last 'title' key
|
38
|
-
if key.isdigit():
|
39
|
-
d = d[int(key)]
|
40
|
-
else:
|
41
|
-
d = d[key]
|
42
|
-
results.append({
|
43
|
-
'path': '.'.join(item['path'][:-1]), # Exclude the last 'title' key
|
44
|
-
'content': d,
|
45
|
-
'similarity': similarity
|
46
|
-
})
|
47
|
-
break
|
48
|
-
|
49
|
-
results.sort(key=lambda x: x['similarity'], reverse=True)
|
50
|
-
|
51
|
-
|
1
|
+
import difflib
|
2
|
+
from typing import Dict, List, Any
|
3
|
+
|
4
|
+
def search_filing(query: str, nested_dict: Dict[str, Any], max_matches: int = 20, score_cutoff: float = 0.6) -> List[Dict[str, Any]]:
|
5
|
+
max_matches = min(max_matches, 20)
|
6
|
+
query = query.lower() # Convert query to lowercase
|
7
|
+
|
8
|
+
def flatten_dict(d: Dict[str, Any], parent_path: List[str] = None) -> List[Dict[str, Any]]:
|
9
|
+
parent_path = parent_path or []
|
10
|
+
items = []
|
11
|
+
|
12
|
+
if isinstance(d, dict):
|
13
|
+
for k, v in d.items():
|
14
|
+
new_path = parent_path + [k]
|
15
|
+
if k == 'title' and isinstance(v, str):
|
16
|
+
items.append({'path': new_path, 'title': v, 'title_lower': v.lower()})
|
17
|
+
items.extend(flatten_dict(v, new_path))
|
18
|
+
elif isinstance(d, list):
|
19
|
+
for i, item in enumerate(d):
|
20
|
+
new_path = parent_path + [str(i)]
|
21
|
+
items.extend(flatten_dict(item, new_path))
|
22
|
+
|
23
|
+
return items
|
24
|
+
|
25
|
+
flat_list = flatten_dict(nested_dict)
|
26
|
+
all_titles_lower = [item['title_lower'] for item in flat_list]
|
27
|
+
|
28
|
+
matches = difflib.get_close_matches(query, all_titles_lower, n=max_matches, cutoff=score_cutoff)
|
29
|
+
|
30
|
+
results = []
|
31
|
+
for match in matches:
|
32
|
+
similarity = difflib.SequenceMatcher(None, query, match).ratio()
|
33
|
+
for item in flat_list:
|
34
|
+
if item['title_lower'] == match:
|
35
|
+
# Navigate to the correct nested dictionary
|
36
|
+
d = nested_dict
|
37
|
+
for key in item['path'][:-1]: # Exclude the last 'title' key
|
38
|
+
if key.isdigit():
|
39
|
+
d = d[int(key)]
|
40
|
+
else:
|
41
|
+
d = d[key]
|
42
|
+
results.append({
|
43
|
+
'path': '.'.join(item['path'][:-1]), # Exclude the last 'title' key
|
44
|
+
'content': d,
|
45
|
+
'similarity': similarity
|
46
|
+
})
|
47
|
+
break
|
48
|
+
|
49
|
+
results.sort(key=lambda x: x['similarity'], reverse=True)
|
50
|
+
|
51
|
+
|
52
52
|
return [item['content'] for item in results[:max_matches]]
|