PyPI - seer-pas-sdk - Versions diffs - 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl - Mend

seer-pas-sdk 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

seer_pas_sdk/common/__init__.py CHANGED Viewed

@@ -99,7 +99,7 @@ def dict_to_df(data):
 # Most cases appear to be a .tsv file.
-def download_df(url, is_tsv=True, dtype={}):
+def download_df(url, is_tsv=True, dtype={}, usecols=None):
     """
     Fetches a TSV/CSV file from a URL and returns as a Pandas DataFrame.
@@ -114,6 +114,9 @@ def download_df(url, is_tsv=True, dtype={}):
     dtype : dict
         Data type conversion when intaking columns. e.g. {'a': str, 'b': np.float64}
+    usecols : list
+        Subset of columns to download. If not specified, downloads all columns.
     Returns
     -------
     pandas.core.frame.DataFrame
@@ -139,11 +142,9 @@ def download_df(url, is_tsv=True, dtype={}):
     if not url:
         return pd.DataFrame()
-    url_content = io.StringIO(requests.get(url).content.decode("utf-8"))
-    if is_tsv:
-        csv = pd.read_csv(url_content, sep="\t", dtype=dtype)
-    else:
-        csv = pd.read_csv(url_content, dtype=dtype)
+    csv = pd.read_csv(
+        url, dtype=dtype, sep="\t" if is_tsv else ",", usecols=usecols
+    )
     return csv
@@ -679,6 +680,52 @@ def camel_case(s):
     return "".join([s[0].lower(), s[1:]])
+def validate_d_zip_file(file):
+    """
+    Return True if a .d.zip file aligns with Seer requirements for PAS upload.
+    Parameters
+    ----------
+    file : str
+        The name of the zip file.
+    Returns
+    -------
+    bool
+        True if the .d.zip file is valid, False otherwise.
+    """
+    if not file.lower().endswith(".d.zip"):
+        return False
+    basename = os.path.basename(file)
+    # Remove the .zip extension to get the .d folder name
+    d_name = basename[:-4]
+    try:
+        with zipfile.ZipFile(file, "r") as zf:
+            names = zf.namelist()
+    except:
+        return False
+    if not names:
+        return False
+    # check for files at the root level
+    root_entries = [n for n in names if "/" not in n.rstrip("/")]
+    if root_entries:
+        return False
+    # find folders
+    top_level = {n.split("/")[0] for n in names}
+    if len(top_level) != 1 or d_name not in top_level:
+        return False
+    return True
 def rename_d_zip_file(source, destination):
     """
     Renames a .d.zip file. The function extracts the contents of the source zip file, renames the inner .d folder, and rezips the contents into the destination zip file.

seer-pas-sdk 1.1.1__py3-none-any.whl → 1.2.1__py3-none-any.whl

seer-pas-sdk 1.1.1py3-none-any.whl → 1.2.1py3-none-any.whl