datamule 2.1.1__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,11 @@ from ..utils.format_accession import format_accession
23
23
  # could be cleaned up
24
24
 
25
25
  # Set up logging
26
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format='%(asctime)s - %(levelname)s - %(message)s',
29
+ handlers=logging.getLogger().handlers,
30
+ )
27
31
  logger = logging.getLogger(__name__)
28
32
 
29
33
 
@@ -315,11 +315,11 @@ class Document:
315
315
  else:
316
316
  visualize_dict(self.data)
317
317
 
318
- def get_section(self, title, format='dict'):
318
+ def get_section(self, title=None, title_regex=None,title_class=None, format='dict'):
319
319
  if not self.data:
320
320
  self.parse()
321
321
 
322
- result = get_title(self.data,title)
322
+ result = get_title(self.data,title=title,title_regex=title_regex,title_class=title_class)
323
323
 
324
324
  if format == 'text':
325
325
  result = [item[1] for item in result]
@@ -1,7 +1,7 @@
1
1
  dict_10k_html = {
2
2
  ('part',r'^part\s*([ivx]+)$') : 0,
3
3
  ('signatures',r'^signatures?\.*$') : 0,
4
- ('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
4
+ ('item',r'^item\s*(\d+)\.?([a-z])?(?![a-z])') : 1,
5
5
  }
6
6
  dict_10q_html = dict_10k_html
7
7
 
@@ -48,7 +48,7 @@ dict_10d_html = dict_10k_html
48
48
 
49
49
  dict_20f_html = {
50
50
  ('part',r'^part\s*([ivx]+)') : 0,
51
- ('item',r'^item\s*(\d+)\.?([a-z])?') : 1,
51
+ ('item',r'^item\s*(\d+)\.?([a-z])?(?![a-z])') : 1,
52
52
  ('letter',r'\d*\.?([a-z])') : 2,
53
53
  ('signatures',r'^signatures?\.*$') : 0,
54
54
  }
datamule/submission.py CHANGED
@@ -163,8 +163,8 @@ class Submission:
163
163
  content = zstd.ZstdDecompressor().decompress(content)
164
164
 
165
165
  # Decode text files
166
- if extension in ['.htm', '.html', '.txt', '.xml']:
167
- content = content.decode('utf-8', errors='replace')
166
+ # if extension in ['.htm', '.html', '.txt', '.xml']:
167
+ # content = content.decode('utf-8', errors='replace')
168
168
 
169
169
  document_path = f"{self.batch_tar_path}::{self.accession_prefix}/{filename}"
170
170
 
@@ -197,8 +197,8 @@ class Submission:
197
197
  content = zstd.ZstdDecompressor().decompress(content)
198
198
 
199
199
  # Decode text files
200
- if extension in ['.htm', '.html', '.txt', '.xml']:
201
- content = content.decode('utf-8', errors='replace')
200
+ # if extension in ['.htm', '.html', '.txt', '.xml']:
201
+ # content = content.decode('utf-8', errors='replace')
202
202
 
203
203
  document_path = f"{self.path}::{actual_filename}"
204
204
 
@@ -219,8 +219,8 @@ class Submission:
219
219
  content = zstd.ZstdDecompressor().decompress(content)
220
220
 
221
221
  # Decode text files
222
- if extension in ['.htm', '.html', '.txt', '.xml']:
223
- content = content.decode('utf-8', errors='replace')
222
+ # if extension in ['.htm', '.html', '.txt', '.xml']:
223
+ # content = content.decode('utf-8', errors='replace')
224
224
 
225
225
  return Document(
226
226
  type=doc['type'],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datamule
3
- Version: 2.1.1
3
+ Version: 2.1.2
4
4
  Summary: Work with SEC submissions at scale.
5
5
  Home-page: https://github.com/john-friedman/datamule-python
6
6
  Author: John Friedman
@@ -6,15 +6,15 @@ datamule/package_updater.py,sha256=Z9zaa_y0Z5cknpRn8oPea3gg4kquFHfpfhduKKCZ6NU,9
6
6
  datamule/portfolio.py,sha256=YViG1JgJ9SFhg8N3tOOhBI8oc6Pmi2vwnHeHmlkC_5U,12119
7
7
  datamule/portfolio_compression_utils.py,sha256=8OPYEN5zAdV1FiTxgVN3S7cTKs99Elv74bwgoIJP4QY,12654
8
8
  datamule/sheet.py,sha256=Ws_YRtpvewLVioarngVMe8cgG_sp11MP9_goGbRaiWE,23952
9
- datamule/submission.py,sha256=7rtN3EEB50iU7E-B_i-e0JHY382EIgmDcrS1KRZ1mUc,15886
9
+ datamule/submission.py,sha256=piMtTyoMZrKmLBpjyi0BBFhlkugi_CEuyox7J-jnusQ,15898
10
10
  datamule/data/listed_filer_metadata.csv,sha256=dT9fQ8AC5P1-Udf_UF0ZkdXJ88jNxJb_tuhi5YYL1rc,2426827
11
11
  datamule/datamule/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  datamule/datamule/datamule_lookup.py,sha256=e8djAg-ctSyHiKk7BjbtgugZ3p8roUjzsym5z3AihUg,9468
13
13
  datamule/datamule/datamule_mysql_rds.py,sha256=Q6_h24-SNECWK60RnM6UQjUIp5dhJmfn3SSKzTITB3o,12317
14
- datamule/datamule/downloader.py,sha256=aTyVUuIwynPtHB0Z9BvCasy9Ao5wfHptNAsjN-7yDTk,18525
14
+ datamule/datamule/downloader.py,sha256=mVg1SApfij_9-dTpcm_YB26Bxc_Yq1FR8xv2k50MHqU,18579
15
15
  datamule/datamule/sec_connector.py,sha256=VwOaODpHoAWy8JIky6kLR1-orW_PB61RHw7pIGRpkow,3288
16
16
  datamule/document/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- datamule/document/document.py,sha256=OtfIvHRxdIXmNq5lOpBiLQw9P4o9OYScdpJxNEX51Yc,14477
17
+ datamule/document/document.py,sha256=Q137zAfOm18ZDBRxT_u2s4adMdOXfW1ktEw3hsB-AMI,14571
18
18
  datamule/document/tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  datamule/document/tables/tables.py,sha256=8riSAof6o-Gxoo0SkiQAE61fw8NmzDnEhJe6dATzmvA,4487
20
20
  datamule/document/tables/tables_13fhr.py,sha256=-6tWcaTyNsb0XuW0WMBrYir9Zn1wLZL0laKxRYfPNyg,4265
@@ -27,7 +27,7 @@ datamule/document/tables/tables_sbsef.py,sha256=X6VKVnAdWxn2TgRmaAd1WWlxPhcLPQ-5
27
27
  datamule/document/tables/tables_sdr.py,sha256=BwHRJvtijiYvNJ2lIc_30kct6VEmLimIzX28JjZBBqo,4924
28
28
  datamule/document/tables/utils.py,sha256=2-X_1NsiWj_XsD9djxCXwTeIVlg-ip78gG11xACJiDs,738
29
29
  datamule/mapping_dicts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- datamule/mapping_dicts/html_mapping_dicts.py,sha256=G2PWB__FNg4VH9iFJFkflM0u-qOEtk67IWtGoqesb0k,5388
30
+ datamule/mapping_dicts/html_mapping_dicts.py,sha256=OmelylkccxcPcz6Qv28r6rjbwOhUGnz5Vmzy_BYC0Pg,5406
31
31
  datamule/mapping_dicts/txt_mapping_dicts.py,sha256=DQPrGYbAPQxomRUtt4iiMGrwuF7BHc_LeFBQuYBzU9o,6311
32
32
  datamule/mapping_dicts/xml_mapping_dicts.py,sha256=Z22yDVwKYonUfM5foQP00dVDE8EHhhMKp0CLqVKV5OI,438
33
33
  datamule/sec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -50,7 +50,7 @@ datamule/seclibrary/bq.py,sha256=C8sb_rpXTvchprrFLcbRar4Qi0XWW25tnv1YsHSS5o4,180
50
50
  datamule/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
51
  datamule/utils/construct_submissions_data.py,sha256=NB_hvfxlRXPyt4Fgc-5qA8vJRItkLhBedCSTaxwW7Jg,5887
52
52
  datamule/utils/format_accession.py,sha256=60RtqoNqoT9zSKVb1DeOv1gncJxzPTFMNW4SNOVmC_g,476
53
- datamule-2.1.1.dist-info/METADATA,sha256=c7wijhGbi_7q3LNTf1HvZcGJtCgvvO43nxcdqj3tWhs,560
54
- datamule-2.1.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
55
- datamule-2.1.1.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
56
- datamule-2.1.1.dist-info/RECORD,,
53
+ datamule-2.1.2.dist-info/METADATA,sha256=sHCW3Up78hM0SW9WWKf5guhYjgEjjB6F0D6hy_CcNBM,560
54
+ datamule-2.1.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
55
+ datamule-2.1.2.dist-info/top_level.txt,sha256=iOfgmtSMFVyr7JGl_bYSTDry79JbmsG4p8zKq89ktKk,9
56
+ datamule-2.1.2.dist-info/RECORD,,