seer-pas-sdk 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -99,7 +99,7 @@ def dict_to_df(data):
99
99
 
100
100
 
101
101
  # Most cases appear to be a .tsv file.
102
- def download_df(url, is_tsv=True, dtype={}):
102
+ def download_df(url, is_tsv=True, dtype={}, usecols=None):
103
103
  """
104
104
  Fetches a TSV/CSV file from a URL and returns as a Pandas DataFrame.
105
105
 
@@ -114,6 +114,9 @@ def download_df(url, is_tsv=True, dtype={}):
114
114
  dtype : dict
115
115
  Data type conversion when intaking columns. e.g. {'a': str, 'b': np.float64}
116
116
 
117
+ usecols : list
118
+ Subset of columns to download. If not specified, downloads all columns.
119
+
117
120
  Returns
118
121
  -------
119
122
  pandas.core.frame.DataFrame
@@ -139,11 +142,9 @@ def download_df(url, is_tsv=True, dtype={}):
139
142
 
140
143
  if not url:
141
144
  return pd.DataFrame()
142
- url_content = io.StringIO(requests.get(url).content.decode("utf-8"))
143
- if is_tsv:
144
- csv = pd.read_csv(url_content, sep="\t", dtype=dtype)
145
- else:
146
- csv = pd.read_csv(url_content, dtype=dtype)
145
+ csv = pd.read_csv(
146
+ url, dtype=dtype, sep="\t" if is_tsv else ",", usecols=usecols
147
+ )
147
148
  return csv
148
149
 
149
150
 
@@ -679,6 +680,52 @@ def camel_case(s):
679
680
  return "".join([s[0].lower(), s[1:]])
680
681
 
681
682
 
683
+ def validate_d_zip_file(file):
684
+ """
685
+ Return True if a .d.zip file aligns with Seer requirements for PAS upload.
686
+
687
+ Parameters
688
+ ----------
689
+ file : str
690
+ The name of the zip file.
691
+
692
+ Returns
693
+ -------
694
+ bool
695
+ True if the .d.zip file is valid, False otherwise.
696
+ """
697
+
698
+ if not file.lower().endswith(".d.zip"):
699
+ return False
700
+
701
+ basename = os.path.basename(file)
702
+
703
+ # Remove the .zip extension to get the .d folder name
704
+ d_name = basename[:-4]
705
+
706
+ try:
707
+ with zipfile.ZipFile(file, "r") as zf:
708
+ names = zf.namelist()
709
+
710
+ except:
711
+ return False
712
+
713
+ if not names:
714
+ return False
715
+
716
+ # check for files at the root level
717
+ root_entries = [n for n in names if "/" not in n.rstrip("/")]
718
+ if root_entries:
719
+ return False
720
+
721
+ # find folders
722
+ top_level = {n.split("/")[0] for n in names}
723
+ if len(top_level) != 1 or d_name not in top_level:
724
+ return False
725
+
726
+ return True
727
+
728
+
682
729
  def rename_d_zip_file(source, destination):
683
730
  """
684
731
  Renames a .d.zip file. The function extracts the contents of the source zip file, renames the inner .d folder, and rezips the contents into the destination zip file.