datamule 0.381__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. datamule/__init__.py +46 -86
  2. datamule/book/book.py +34 -0
  3. datamule/book/eftsquery.py +127 -0
  4. datamule/book/xbrl_retriever.py +88 -0
  5. datamule/config.py +29 -0
  6. datamule/data/company_former_names.csv +8148 -8148
  7. datamule/data/company_metadata.csv +10049 -10049
  8. datamule/data/company_tickers.csv +9999 -10168
  9. datamule/data/sec-glossary.csv +728 -728
  10. datamule/data/xbrl_descriptions.csv +10024 -10024
  11. datamule/document.py +279 -0
  12. datamule/downloader/downloader.py +374 -0
  13. datamule/downloader/premiumdownloader.py +335 -0
  14. datamule/helper.py +123 -136
  15. datamule/mapping_dicts/txt_mapping_dicts.py +232 -0
  16. datamule/mapping_dicts/xml_mapping_dicts.py +19 -0
  17. datamule/monitor.py +238 -0
  18. datamule/mulebot/__init__.py +1 -1
  19. datamule/mulebot/helper.py +34 -34
  20. datamule/mulebot/mulebot.py +129 -129
  21. datamule/mulebot/mulebot_server/server.py +86 -86
  22. datamule/mulebot/mulebot_server/static/css/minimalist.css +173 -173
  23. datamule/mulebot/mulebot_server/static/scripts/artifacts.js +67 -67
  24. datamule/mulebot/mulebot_server/static/scripts/chat.js +91 -91
  25. datamule/mulebot/mulebot_server/static/scripts/filingArtifacts.js +55 -55
  26. datamule/mulebot/mulebot_server/static/scripts/listArtifacts.js +14 -14
  27. datamule/mulebot/mulebot_server/static/scripts/main.js +56 -56
  28. datamule/mulebot/mulebot_server/static/scripts/prefilledPrompt.js +26 -26
  29. datamule/mulebot/mulebot_server/static/scripts/suggestions.js +46 -46
  30. datamule/mulebot/mulebot_server/static/scripts/tableArtifacts.js +128 -128
  31. datamule/mulebot/mulebot_server/static/scripts/utils.js +27 -27
  32. datamule/mulebot/mulebot_server/templates/chat-minimalist.html +90 -90
  33. datamule/mulebot/search.py +51 -51
  34. datamule/mulebot/tools.py +82 -82
  35. datamule/packageupdater.py +207 -0
  36. datamule/portfolio.py +106 -0
  37. datamule/submission.py +76 -0
  38. datamule-1.0.2.dist-info/METADATA +27 -0
  39. datamule-1.0.2.dist-info/RECORD +43 -0
  40. {datamule-0.381.dist-info → datamule-1.0.2.dist-info}/WHEEL +1 -1
  41. datamule/data/filing_types.csv +0 -485
  42. datamule/data/ftd_locations.csv +0 -388
  43. datamule/datamule_api.py +0 -21
  44. datamule/dataset_builder/_init.py +0 -1
  45. datamule/dataset_builder/dataset_builder.py +0 -260
  46. datamule/downloader/dropbox_downloader.py +0 -225
  47. datamule/downloader/ftd.py +0 -216
  48. datamule/downloader/information_table_13f.py +0 -231
  49. datamule/downloader/sec_downloader.py +0 -635
  50. datamule/filing_viewer/__init__.py +0 -1
  51. datamule/filing_viewer/filing_viewer.py +0 -256
  52. datamule/global_vars.py +0 -202
  53. datamule/parser/__init__.py +0 -1
  54. datamule/parser/basic_10k_parser.py +0 -82
  55. datamule/parser/basic_10q_parser.py +0 -73
  56. datamule/parser/basic_13d_parser.py +0 -58
  57. datamule/parser/basic_13g_parser.py +0 -61
  58. datamule/parser/basic_8k_parser.py +0 -84
  59. datamule/parser/company_concepts_parser.py +0 -0
  60. datamule/parser/form_d_parser.py +0 -70
  61. datamule/parser/generalized_item_parser.py +0 -78
  62. datamule/parser/generalized_xml_parser.py +0 -0
  63. datamule/parser/helper.py +0 -75
  64. datamule/parser/information_table_parser_13fhr.py +0 -41
  65. datamule/parser/insider_trading_parser.py +0 -158
  66. datamule/parser/mappings.py +0 -95
  67. datamule/parser/n_port_p_parser.py +0 -70
  68. datamule/parser/sec_parser.py +0 -79
  69. datamule/parser/sgml_parser.py +0 -180
  70. datamule/sec_filing.py +0 -126
  71. datamule/sec_search.py +0 -20
  72. datamule-0.381.dist-info/METADATA +0 -132
  73. datamule-0.381.dist-info/RECORD +0 -61
  74. /datamule/{downloader → book}/__init__.py +0 -0
  75. {datamule-0.381.dist-info → datamule-1.0.2.dist-info}/top_level.txt +0 -0
@@ -1,58 +0,0 @@
1
- import re
2
- from pathlib import Path
3
- from .helper import load_file_content, clean_title
4
-
5
- ITEM_PATTERN = re.compile(
6
- r"(?:^[ \t]*)"
7
- r"(?:"
8
- r"(?:Item|ITEM)\s*"
9
- r"(?:"
10
- r"1|"
11
- r"2|"
12
- r"3|"
13
- r"4|"
14
- r"5|"
15
- r"6|"
16
- r"7|"
17
- r"8|"
18
- r"9"
19
- r")"
20
- r"|"
21
- r"SIGNATURES?"
22
- r")",
23
- re.IGNORECASE | re.MULTILINE
24
- )
25
-
26
- def parse_13d(filename: Path) -> dict:
27
- text = load_file_content(filename)
28
- matches = [(clean_title(m.group().strip()), m.start()) for m in ITEM_PATTERN.finditer(text)]
29
-
30
- result = {
31
- "metadata": {"document_name": Path(filename).stem},
32
- "document": {}
33
- }
34
-
35
- if not matches:
36
- return result
37
-
38
- for i, (current_match, start_pos) in enumerate(matches[:-1]):
39
- section_text = WHITESPACE_PATTERN.sub(' ', text[start_pos:matches[i + 1][1]]).strip()
40
- if section_text:
41
- if "signature" in current_match.lower():
42
- key = "signatures"
43
- else:
44
- key = f"item{current_match.lower().replace('item', '').strip()}"
45
- result["document"][key] = section_text
46
-
47
- last_match, last_pos = matches[-1]
48
- section_text = WHITESPACE_PATTERN.sub(' ', text[last_pos:len(text)]).strip()
49
- if section_text:
50
- if "signature" in last_match.lower():
51
- key = "signatures"
52
- else:
53
- key = f"item{last_match.lower().replace('item', '').strip()}"
54
- result["document"][key] = section_text
55
-
56
- return result
57
-
58
- WHITESPACE_PATTERN = re.compile(r'\s+')
@@ -1,61 +0,0 @@
1
- import re
2
- from pathlib import Path
3
- from .helper import load_file_content, clean_title
4
-
5
- ITEM_PATTERN_13G = re.compile(
6
- r"(?:^[ \t]*)"
7
- r"(?:"
8
- r"(?:Item|ITEM)\s*"
9
- r"(?:"
10
- r"10|" # Move 10 to the start so it's matched before 1
11
- r"11|" # Similarly with 11 and 12
12
- r"12|"
13
- r"1|"
14
- r"2|"
15
- r"3|"
16
- r"4|"
17
- r"5|"
18
- r"6|"
19
- r"7|"
20
- r"8|"
21
- r"9"
22
- r")"
23
- r"|"
24
- r"SIGNATURES?"
25
- r")",
26
- re.IGNORECASE | re.MULTILINE
27
- )
28
-
29
- def parse_13g(filename: Path) -> dict:
30
- text = load_file_content(filename)
31
- matches = [(clean_title(m.group().strip()), m.start()) for m in ITEM_PATTERN_13G.finditer(text)]
32
-
33
- result = {
34
- "metadata": {"document_name": Path(filename).stem},
35
- "document": {}
36
- }
37
-
38
- if not matches:
39
- return result
40
-
41
- for i, (current_match, start_pos) in enumerate(matches[:-1]):
42
- section_text = WHITESPACE_PATTERN.sub(' ', text[start_pos:matches[i + 1][1]]).strip()
43
- if section_text:
44
- if "signature" in current_match.lower():
45
- key = "signatures"
46
- else:
47
- key = f"item{current_match.lower().replace('item', '').strip()}"
48
- result["document"][key] = section_text
49
-
50
- last_match, last_pos = matches[-1]
51
- section_text = WHITESPACE_PATTERN.sub(' ', text[last_pos:len(text)]).strip()
52
- if section_text:
53
- if "signature" in last_match.lower():
54
- key = "signatures"
55
- else:
56
- key = f"item{last_match.lower().replace('item', '').strip()}"
57
- result["document"][key] = section_text
58
-
59
- return result
60
-
61
- WHITESPACE_PATTERN = re.compile(r'\s+')
@@ -1,84 +0,0 @@
1
- import re
2
- from pathlib import Path
3
- from .helper import load_file_content, clean_title
4
-
5
- ITEM_PATTERN = re.compile(
6
- r"(?:^[ \t]*)"
7
- r"(?:"
8
- r"(?:Item|ITEM)\s*"
9
- r"(?:"
10
- r"1\.0[1-4]|"
11
- r"2\.0[1-6]|"
12
- r"3\.0[1-3]|"
13
- r"4\.0[1-2]|"
14
- r"5\.0[1-8]|"
15
- r"6\.0[1-5]|"
16
- r"7\.01|"
17
- r"8\.01|"
18
- r"9\.01"
19
- r")"
20
- r"|"
21
- r"SIGNATURES?"
22
- r")",
23
- re.IGNORECASE | re.MULTILINE
24
- )
25
-
26
- WHITESPACE_PATTERN = re.compile(r'\s+')
27
-
28
- def parse_section(text: str, start: int, end: int) -> str:
29
- return WHITESPACE_PATTERN.sub(' ', text[start:end].strip())
30
-
31
- def validate_section_sequence(matches: list) -> None:
32
- current_base = None
33
-
34
- for match, _ in matches:
35
- base_section = re.match(r'(?:Item|ITEM)\s*(?:\d+\.\d+|\bSIGNATURES?\b)', match)
36
- if base_section:
37
- base_section = base_section.group().upper()
38
-
39
- if current_base is None:
40
- current_base = base_section
41
- elif base_section != current_base:
42
- current_base = base_section
43
- else:
44
- raise DuplicateSectionError(f"Section {base_section} appears multiple times before a different section")
45
-
46
- def parse_8k(filename: Path) -> dict:
47
- text = load_file_content(filename)
48
- matches = [(clean_title(m.group().strip()), m.start()) for m in ITEM_PATTERN.finditer(text)]
49
-
50
- result = {
51
- "metadata": {"document_name": Path(filename).stem},
52
- "document": {}
53
- }
54
-
55
- if not matches:
56
- return result
57
-
58
- validate_section_sequence(matches)
59
-
60
- # Process all sections except last
61
- for i, (current_match, start_pos) in enumerate(matches[:-1]):
62
- section_text = parse_section(text, start_pos, matches[i + 1][1])
63
- if section_text:
64
- if "signature" in current_match.lower():
65
- key = "signatures"
66
- else:
67
- key = f"item{current_match.lower().replace('item', '').strip()}"
68
- result["document"][key] = section_text
69
-
70
- # Process last section
71
- last_match, last_pos = matches[-1]
72
- section_text = parse_section(text, last_pos, len(text))
73
- if section_text:
74
- if "signature" in last_match.lower():
75
- key = "signatures"
76
- else:
77
- key = f"item{last_match.lower().replace('item', '').strip()}"
78
- result["document"][key] = section_text
79
-
80
- return result
81
-
82
- class DuplicateSectionError(Exception):
83
- """Raised when a section appears multiple times before a different section."""
84
- pass
File without changes
@@ -1,70 +0,0 @@
1
- from xml.etree import ElementTree as ET
2
-
3
- def element_to_dict(elem):
4
- """Convert an XML element to dict preserving structure."""
5
- result = {}
6
-
7
- # Add attributes directly to result
8
- if elem.attrib:
9
- result.update(elem.attrib)
10
-
11
- # Add text content if present and no children
12
- if elem.text and elem.text.strip():
13
- text = elem.text.strip()
14
- if not len(elem): # No children
15
- return text
16
- else:
17
- result['text'] = text
18
-
19
- # Process children
20
- for child in elem:
21
- child_data = element_to_dict(child)
22
- child_tag = child.tag.split('}')[-1] # Remove namespace
23
-
24
- if child_tag in result:
25
- # Convert to list if multiple elements
26
- if not isinstance(result[child_tag], list):
27
- result[child_tag] = [result[child_tag]]
28
- result[child_tag].append(child_data)
29
- else:
30
- result[child_tag] = child_data
31
-
32
- return result
33
-
34
- def parse_form_d(filepath):
35
- """Parse Form D XML file into metadata and document sections."""
36
- # Parse XML
37
- tree = ET.parse(filepath)
38
- root = tree.getroot()
39
-
40
- # Remove namespaces for cleaner processing
41
- for elem in root.iter():
42
- if '}' in elem.tag:
43
- elem.tag = elem.tag.split('}')[-1]
44
-
45
- # Convert entire document to dict
46
- full_dict = element_to_dict(root)
47
-
48
- # Separate metadata and document content
49
- result = {
50
- 'metadata': {},
51
- 'document': {}
52
- }
53
-
54
- # Extract metadata
55
- metadata_fields = {
56
- 'schemaVersion',
57
- 'submissionType',
58
- 'testOrLive',
59
- 'primaryIssuer' # Including all issuer information in metadata
60
- }
61
-
62
- for field in metadata_fields:
63
- if field in full_dict:
64
- result['metadata'][field] = full_dict[field]
65
- del full_dict[field] # Remove from full_dict to avoid duplication
66
-
67
- # Everything else goes to document
68
- result['document'] = full_dict
69
-
70
- return result
@@ -1,78 +0,0 @@
1
- # Parses e.g. 10-K, 10-Q,..... any form with items and/or parts
2
- from .helper import load_file_content, clean_title, clean_text
3
- from pathlib import Path
4
- import re
5
-
6
- # OK figured out general pattern
7
- # find toc
8
- # figure out mapping. we do need it
9
- # just do mapping tonight
10
-
11
- pattern = re.compile(r'^\s*(?:item\s+\d+(?:\.\d+)?(?:[a-z])?|signature(?:\.?s)?)\s*', re.I | re.M)
12
-
13
- def find_anchors(content):
14
- anchors = []
15
- prev_title = None
16
-
17
- for part_match in pattern.finditer(content):
18
- title = clean_title(part_match.group())
19
- # Skip duplicates, e.g. "item 1" and "item1 continued"
20
- if prev_title == title:
21
- continue
22
- prev_title = title
23
- anchors.append((title, part_match.start()))
24
-
25
- return anchors
26
-
27
- # I think this works, but I haven't tested it extensively.
28
- def map_sections(content, anchors):
29
- positions = anchors + [('end', len(content))]
30
-
31
- result = {}
32
- for i, (title, start) in enumerate(positions[:-1]):
33
- _, next_start = positions[i + 1]
34
- section_text = content[start:next_start].strip()
35
- result[title.lower()] = clean_text(section_text)
36
-
37
- def sort_key(x):
38
- match = re.search(r'item\s+(\d+)(?:[\.a-z])?', x[0], re.I)
39
- if not match:
40
- return float('inf')
41
- num = match.group(0).lower()
42
- # This will sort 1, 1a, 1b, 2, 2a etc
43
- return float(re.findall(r'\d+', num)[0]) + (ord(num[-1]) - ord('a') + 1) / 100 if num[-1].isalpha() else float(re.findall(r'\d+', num)[0])
44
-
45
- return dict(sorted(result.items(), key=sort_key))
46
-
47
- # def find_content_start(anchors):
48
- # def find_first_non_repeating(seq):
49
- # for i in range(len(seq)):
50
- # remaining = seq[i:]
51
- # # Get same length subsequence from the next position
52
- # next_seq = seq[i + 1:i + 1 + len(remaining)]
53
- # if remaining != next_seq and len(next_seq) > 0:
54
- # return i
55
- # return 0 # Default to start if no pattern found
56
-
57
- # return find_first_non_repeating([title for title, _ in anchors])
58
-
59
- def generalized_parser(filename):
60
- # load content
61
- content = load_file_content(filename)
62
-
63
- # find anchors
64
- anchors = find_anchors(content)
65
-
66
- # Skip tables of contents. Not implemented yet, since we overwrite the keys anyway.
67
- # content_start = find_content_start(anchors)
68
- # print(content_start)
69
-
70
- result = {}
71
- # assign metadata
72
- result["metadata"] = {"document_name": Path(filename).stem}
73
-
74
- # extract sections, assign text based on mapping_dict
75
- result['document'] = map_sections(content, anchors)
76
-
77
- return result
78
-
File without changes
datamule/parser/helper.py DELETED
@@ -1,75 +0,0 @@
1
- from selectolax.parser import HTMLParser
2
- import re
3
-
4
- # This will be modified in the future to remove SEC specific code such as <PAGE> tags
5
- def load_text_content(filename):
6
- with open(filename) as f:
7
- return f.read().translate(str.maketrans({
8
- '\xa0': ' ', '\u2003': ' ',
9
- '\u2018': "'", '\u2019': "'",
10
- '\u201c': '"', '\u201d': '"'
11
- }))
12
-
13
- def load_html_content(filename):
14
- parser = HTMLParser(open(filename).read())
15
-
16
- # Remove hidden elements first
17
- hidden_nodes = parser.css('[style*="display: none"], [style*="display:none"], .hidden, .hide, .d-none')
18
- for node in hidden_nodes:
19
- node.decompose()
20
-
21
- blocks = {'p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'article', 'section', 'li', 'td'}
22
- lines = []
23
- current_line = []
24
-
25
- def flush_line():
26
- if current_line:
27
- lines.append(' '.join(current_line))
28
- current_line.clear()
29
-
30
- for node in parser.root.traverse(include_text=True):
31
- if node.tag in ('script', 'style', 'css'):
32
- continue
33
-
34
- if node.tag in blocks:
35
- flush_line()
36
- lines.append('')
37
-
38
- if node.text_content:
39
- text = node.text_content.strip()
40
- if text:
41
- if node.tag in blocks:
42
- flush_line()
43
- lines.append(text)
44
- lines.append('')
45
- else:
46
- current_line.append(text)
47
-
48
- flush_line()
49
-
50
- text = '\n'.join(lines)
51
- while '\n\n\n' in text:
52
- text = text.replace('\n\n\n', '\n\n')
53
-
54
- return text.translate(str.maketrans({
55
- '\xa0': ' ', '\u2003': ' ',
56
- '\u2018': "'", '\u2019': "'",
57
- '\u201c': '"', '\u201d': '"'
58
- }))
59
- def load_file_content(filename):
60
- if filename.endswith('.txt'):
61
- return load_text_content(filename)
62
- elif filename.endswith('.html') or filename.endswith('.htm'):
63
- return load_html_content(filename)
64
- else:
65
- raise ValueError(f"Unsupported file type: {filename}")
66
-
67
- def clean_title(title: str) -> str:
68
- """Clean up section title by removing newlines, periods, and all whitespace, converting to lowercase."""
69
- return ''.join(title.replace('\n', '').replace('.', '').split()).lower()
70
-
71
- # This is a bit hacky, removes PART IV, PART V etc from the end of the text
72
- # we do this to avoid having to map for general cases
73
- def clean_text(text):
74
- text = text.strip()
75
- return re.sub(r'\s*PART\s+[IVX]+\s*$', '', text, flags=re.I)
@@ -1,41 +0,0 @@
1
- from xml.etree import ElementTree as ET
2
-
3
- def parse_13f_hr_information_table_xml(xml_file):
4
- # Parse the XML file
5
- tree = ET.parse(xml_file)
6
- root = tree.getroot()
7
-
8
- data = []
9
-
10
- # Iterate through each infoTable
11
- for info_table in root.findall('.//{*}infoTable'):
12
- row = {
13
- 'NAMEOFISSUER': info_table.findtext('.//{*}nameOfIssuer') or '',
14
- 'TITLEOFCLASS': info_table.findtext('.//{*}titleOfClass') or '',
15
- 'CUSIP': info_table.findtext('.//{*}cusip') or '',
16
- 'FIGI': info_table.findtext('.//{*}figi') or '',
17
- 'VALUE': info_table.findtext('.//{*}value') or '',
18
- 'SSHPRNAMT': '',
19
- 'SSHPRNAMTTYPE': '',
20
- 'PUTCALL': info_table.findtext('.//{*}putCall') or '',
21
- 'INVESTMENTDISCRETION': info_table.findtext('.//{*}investmentDiscretion') or '',
22
- 'OTHERMANAGER': info_table.findtext('.//{*}otherManager') or '',
23
- 'VOTING_AUTH_SOLE': '',
24
- 'VOTING_AUTH_SHARED': '',
25
- 'VOTING_AUTH_NONE': ''
26
- }
27
-
28
- shrs_or_prn_amt = info_table.find('.//{*}shrsOrPrnAmt')
29
- if shrs_or_prn_amt is not None:
30
- row['SSHPRNAMT'] = shrs_or_prn_amt.findtext('.//{*}sshPrnamt') or ''
31
- row['SSHPRNAMTTYPE'] = shrs_or_prn_amt.findtext('.//{*}sshPrnamtType') or ''
32
-
33
- voting_authority = info_table.find('.//{*}votingAuthority')
34
- if voting_authority is not None:
35
- row['VOTING_AUTH_SOLE'] = voting_authority.findtext('.//{*}Sole') or ''
36
- row['VOTING_AUTH_SHARED'] = voting_authority.findtext('.//{*}Shared') or ''
37
- row['VOTING_AUTH_NONE'] = voting_authority.findtext('.//{*}None') or ''
38
-
39
- data.append(row)
40
-
41
- return data
@@ -1,158 +0,0 @@
1
- from xml.etree import ElementTree as ET
2
- from typing import Dict, Any, Optional
3
-
4
- def get_footnotes(doc) -> dict:
5
- """Extract footnotes into a lookup dictionary."""
6
- return {
7
- f.attrib.get('id', ''): f.text.strip()
8
- for f in doc.findall('.//footnotes/footnote')
9
- }
10
-
11
- def get_value_and_footnote(elem, footnotes: dict) -> dict:
12
- """Get value and footnote for a field."""
13
- result = {'value': ''}
14
-
15
- if elem is None:
16
- return result
17
-
18
- # Get value
19
- value_elem = elem.find('.//value')
20
- if value_elem is not None:
21
- result['value'] = value_elem.text or ''
22
-
23
- # Get footnote if exists
24
- footnote_elem = elem.find('.//footnoteId')
25
- if footnote_elem is not None:
26
- footnote_id = footnote_elem.attrib.get('id', '')
27
- if footnote_id in footnotes:
28
- result['footnote'] = footnotes[footnote_id]
29
-
30
- return result
31
-
32
- def parse_form345(filepath) -> Dict[str, Any]:
33
- """Parse SEC Form XML with enhanced data extraction."""
34
- doc = ET.parse(filepath).getroot()
35
- if doc is None:
36
- return {"error": "No ownershipDocument found"}
37
-
38
- footnotes = get_footnotes(doc)
39
-
40
- result = {
41
- 'metadata': {
42
- 'schemaVersion': doc.findtext('schemaVersion', ''),
43
- 'documentType': doc.findtext('documentType', ''),
44
- 'periodOfReport': doc.findtext('periodOfReport', ''),
45
- 'dateOfOriginalSubmission': doc.findtext('dateOfOriginalSubmission', ''),
46
- 'form3HoldingsReported': doc.findtext('form3HoldingsReported', ''),
47
- 'form4TransactionsReported': doc.findtext('form4TransactionsReported', ''),
48
- 'issuer': {
49
- 'cik': doc.findtext('.//issuerCik', ''),
50
- 'name': doc.findtext('.//issuerName', ''),
51
- 'tradingSymbol': doc.findtext('.//issuerTradingSymbol', '')
52
- },
53
- 'reportingOwner': {
54
- 'cik': doc.findtext('.//rptOwnerCik', ''),
55
- 'name': doc.findtext('.//rptOwnerName', ''),
56
- 'address': {
57
- 'street1': doc.findtext('.//rptOwnerStreet1', ''),
58
- 'street2': doc.findtext('.//rptOwnerStreet2', ''),
59
- 'city': doc.findtext('.//rptOwnerCity', ''),
60
- 'state': doc.findtext('.//rptOwnerState', ''),
61
- 'zip': doc.findtext('.//rptOwnerZipCode', ''),
62
- 'stateDescription': doc.findtext('.//rptOwnerStateDescription', '')
63
- },
64
- 'relationship': {
65
- 'isDirector': doc.findtext('.//isDirector', ''),
66
- 'isOfficer': doc.findtext('.//isOfficer', ''),
67
- 'isTenPercentOwner': doc.findtext('.//isTenPercentOwner', ''),
68
- 'isOther': doc.findtext('.//isOther', ''),
69
- 'officerTitle': doc.findtext('.//officerTitle', '')
70
- }
71
- },
72
- 'signature': {
73
- 'name': doc.findtext('.//signatureName', ''),
74
- 'date': doc.findtext('.//signatureDate', '')
75
- }
76
- },
77
- 'holdings': []
78
- }
79
-
80
- # Parse non-derivative holdings/transactions
81
- for entry in doc.findall('.//nonDerivativeTable/*'):
82
- holding = {
83
- 'type': 'non-derivative',
84
- 'securityTitle': get_value_and_footnote(entry.find('.//securityTitle'), footnotes),
85
- 'postTransactionAmounts': {
86
- 'sharesOwned': get_value_and_footnote(entry.find('.//sharesOwnedFollowingTransaction'), footnotes)
87
- },
88
- 'ownershipNature': {
89
- 'directOrIndirect': entry.findtext('.//directOrIndirectOwnership/value', '')
90
- }
91
- }
92
-
93
- # Add transaction fields if present
94
- if 'Transaction' in entry.tag:
95
- transactionCoding = {
96
- 'formType': entry.findtext('.//transactionFormType', ''),
97
- 'code': entry.findtext('.//transactionCode', ''),
98
- 'equitySwapInvolved': entry.findtext('.//equitySwapInvolved', '')
99
- }
100
-
101
- transactionAmounts = {
102
- 'shares': get_value_and_footnote(entry.find('.//transactionShares'), footnotes),
103
- 'pricePerShare': get_value_and_footnote(entry.find('.//transactionPricePerShare'), footnotes),
104
- 'acquiredDisposedCode': get_value_and_footnote(entry.find('.//transactionAcquiredDisposedCode'), footnotes)
105
- }
106
-
107
- holding.update({
108
- 'transactionDate': get_value_and_footnote(entry.find('.//transactionDate'), footnotes),
109
- 'transactionCoding': transactionCoding,
110
- 'transactionAmounts': transactionAmounts
111
- })
112
-
113
- result['holdings'].append(holding)
114
-
115
- # Parse derivative holdings/transactions
116
- for entry in doc.findall('.//derivativeTable/*'):
117
- holding = {
118
- 'type': 'derivative',
119
- 'securityTitle': get_value_and_footnote(entry.find('.//securityTitle'), footnotes),
120
- 'conversionOrExercisePrice': get_value_and_footnote(entry.find('.//conversionOrExercisePrice'), footnotes),
121
- 'exerciseDate': get_value_and_footnote(entry.find('.//exerciseDate'), footnotes),
122
- 'expirationDate': get_value_and_footnote(entry.find('.//expirationDate'), footnotes),
123
- 'underlyingSecurity': {
124
- 'title': get_value_and_footnote(entry.find('.//underlyingSecurityTitle'), footnotes),
125
- 'shares': get_value_and_footnote(entry.find('.//underlyingSecurityShares'), footnotes)
126
- },
127
- 'postTransactionAmounts': {
128
- 'sharesOwned': get_value_and_footnote(entry.find('.//sharesOwnedFollowingTransaction'), footnotes)
129
- },
130
- 'ownershipNature': {
131
- 'directOrIndirect': entry.findtext('.//directOrIndirectOwnership/value', ''),
132
- 'nature': entry.findtext('.//natureOfOwnership/value', '')
133
- }
134
- }
135
-
136
- # Add transaction-specific fields
137
- if 'Transaction' in entry.tag:
138
- transactionCoding = {
139
- 'formType': entry.findtext('.//transactionFormType', ''),
140
- 'code': entry.findtext('.//transactionCode', ''),
141
- 'equitySwapInvolved': entry.findtext('.//equitySwapInvolved', '')
142
- }
143
-
144
- transactionAmounts = {
145
- 'shares': get_value_and_footnote(entry.find('.//transactionShares'), footnotes),
146
- 'pricePerShare': get_value_and_footnote(entry.find('.//transactionPricePerShare'), footnotes),
147
- 'acquiredDisposedCode': get_value_and_footnote(entry.find('.//transactionAcquiredDisposedCode'), footnotes)
148
- }
149
-
150
- holding.update({
151
- 'transactionDate': get_value_and_footnote(entry.find('.//transactionDate'), footnotes),
152
- 'transactionCoding': transactionCoding,
153
- 'transactionAmounts': transactionAmounts
154
- })
155
-
156
- result['holdings'].append(holding)
157
-
158
- return result