PyPI - pyerrors - Versions diffs - 2.16.0__tar.gz → 2.17.0__tar.gz - Mend

pyerrors 2.16.0tar.gz → 2.17.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{pyerrors-2.16.0 → pyerrors-2.17.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyerrors
-Version: 2.16.0
+Version: 2.17.0
 Summary: Error propagation and statistical analysis for Monte Carlo simulations
 Home-page: https://github.com/fjosw/pyerrors
 Author: Fabian Joswig

{pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/pandas.py RENAMED Viewed

@@ -145,9 +145,9 @@ def _serialize_df(df, gz=False):
         serialize = _need_to_serialize(out[column])
         if serialize is True:
-            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if x is not None else None)
+            out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if not _is_null(x) else None)
             if gz is True:
-                out[column] = out[column].transform(lambda x: gzip.compress((x if x is not None else '').encode('utf-8')))
+                out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8')) if not _is_null(x) else gzip.compress(b''))
     return out
@@ -166,37 +166,49 @@ def _deserialize_df(df, auto_gamma=False):
     ------
     In case any column of the DataFrame is gzipped it is gunzipped in the process.
     """
-    for column in df.select_dtypes(include="object"):
-        if isinstance(df[column][0], bytes):
-            if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
-                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
-        if not all([e is None for e in df[column]]):
+    # In pandas 3+, string columns use 'str' dtype instead of 'object'
+    string_like_dtypes = ["object", "str"] if int(pd.__version__.split(".")[0]) >= 3 else ["object"]
+    for column in df.select_dtypes(include=string_like_dtypes):
+        if len(df[column]) == 0:
+            continue
+        if isinstance(df[column].iloc[0], bytes):
+            if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"):
+                df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if not pd.isna(x) else '')
+        if df[column].notna().any():
             df[column] = df[column].replace({r'^$': None}, regex=True)
             i = 0
-            while df[column][i] is None:
+            while i < len(df[column]) and pd.isna(df[column].iloc[i]):
                 i += 1
-            if isinstance(df[column][i], str):
-                if '"program":' in df[column][i][:20]:
-                    df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
+            if i < len(df[column]) and isinstance(df[column].iloc[i], str):
+                if '"program":' in df[column].iloc[i][:20]:
+                    df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if not pd.isna(x) else None)
                     if auto_gamma is True:
-                        if isinstance(df[column][i], list):
-                            df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
+                        if isinstance(df[column].iloc[i], list):
+                            df[column].apply(lambda x: [o.gm() if o is not None else x for o in x] if x is not None else x)
                         else:
                             df[column].apply(lambda x: x.gm() if x is not None else x)
+        # Convert NA values back to Python None for compatibility with `x is None` checks
+        if df[column].isna().any():
+            df[column] = df[column].astype(object).where(df[column].notna(), None)
     return df
 def _need_to_serialize(col):
     serialize = False
     i = 0
-    while i < len(col) and col[i] is None:
+    while i < len(col) and _is_null(col.iloc[i]):
         i += 1
     if i == len(col):
         return serialize
-    if isinstance(col[i], (Obs, Corr)):
+    if isinstance(col.iloc[i], (Obs, Corr)):
         serialize = True
-    elif isinstance(col[i], list):
-        if all(isinstance(o, Obs) for o in col[i]):
+    elif isinstance(col.iloc[i], list):
+        if all(isinstance(o, Obs) for o in col.iloc[i]):
             serialize = True
     return serialize
+def _is_null(val):
+    """Check if a value is null (None or NA), handling list/array values."""
+    return False if isinstance(val, (list, np.ndarray)) else pd.isna(val)

{pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/sfcf.py RENAMED Viewed

@@ -5,6 +5,7 @@ import numpy as np  # Thinly-wrapped numpy
 from ..obs import Obs
 from .utils import sort_names, check_idl
 import itertools
+import warnings
 sep = "/"
@@ -603,42 +604,82 @@ def _read_chunk_data(chunk, start_read, T, corr_line, b2b, pattern, im, single):
     return data
+def _check_append_rep(content, start_list):
+    data_len_list = []
+    header_len_list = []
+    has_regular_len_heads = True
+    for chunk_num in range(len(start_list)):
+        start = start_list[chunk_num]
+        if chunk_num == len(start_list) - 1:
+            stop = len(content)
+        else:
+            stop = start_list[chunk_num + 1]
+        chunk = content[start:stop]
+        for linenumber, line in enumerate(chunk):
+            if line.startswith("[correlator]"):
+                header_len = linenumber
+                break
+        header_len_list.append(header_len)
+        data_len_list.append(len(chunk) - header_len)
+    if len(set(header_len_list)) > 1:
+        warnings.warn("Not all headers have the same length. Data parts do.")
+        has_regular_len_heads = False
+    if len(set(data_len_list)) > 1:
+        raise Exception("Irregularities in file structure found, not all run data are of the same output length")
+    return has_regular_len_heads
+def _read_chunk_structure(chunk, pattern, b2b):
+    start_read = 0
+    for linenumber, line in enumerate(chunk):
+        if line.startswith("gauge_name"):
+            gauge_line = linenumber
+        elif line.startswith("[correlator]"):
+            corr_line = linenumber
+            found_pat = ""
+            for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
+                found_pat += li
+            if re.search(pattern, found_pat):
+                start_read = corr_line + 7 + b2b
+                break
+    if start_read == 0:
+        raise ValueError("Did not find pattern\n", pattern)
+    endline = corr_line + 6 + b2b
+    while not chunk[endline] == "\n":
+        endline += 1
+    T = endline - start_read
+    return gauge_line, corr_line, start_read, T
 def _read_append_rep(filename, pattern, b2b, im, single, idl_func, cfg_func_args):
     with open(filename, 'r') as fp:
         content = fp.readlines()
-        data_starts = []
+        chunk_start_lines = []
         for linenumber, line in enumerate(content):
             if "[run]" in line:
-                data_starts.append(linenumber)
-        if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
-            raise Exception("Irregularities in file structure found, not all runs have the same output length")
-        chunk = content[:data_starts[1]]
-        for linenumber, line in enumerate(chunk):
-            if line.startswith("gauge_name"):
-                gauge_line = linenumber
-            elif line.startswith("[correlator]"):
-                corr_line = linenumber
-                found_pat = ""
-                for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
-                    found_pat += li
-                if re.search(pattern, found_pat):
-                    start_read = corr_line + 7 + b2b
-                    break
-                else:
-                    raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
-        endline = corr_line + 6 + b2b
-        while not chunk[endline] == "\n":
-            endline += 1
-        T = endline - start_read
-        # all other chunks should follow the same structure
+                chunk_start_lines.append(linenumber)
+        has_regular_len_heads = _check_append_rep(content, chunk_start_lines)
+        if has_regular_len_heads:
+            chunk = content[:chunk_start_lines[1]]
+            try:
+                gauge_line, corr_line, start_read, T = _read_chunk_structure(chunk, pattern, b2b)
+            except ValueError:
+                raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename, "lines", 1, "to", chunk_start_lines[1] + 1)
+        # if has_regular_len_heads is true, all other chunks should follow the same structure
         rep_idl = []
         rep_data = []
-        for cnfg in range(len(data_starts)):
-            start = data_starts[cnfg]
-            stop = start + data_starts[1]
+        for chunk_num in range(len(chunk_start_lines)):
+            start = chunk_start_lines[chunk_num]
+            if chunk_num == len(chunk_start_lines) - 1:
+                stop = len(content)
+            else:
+                stop = chunk_start_lines[chunk_num + 1]
             chunk = content[start:stop]
+            if not has_regular_len_heads:
+                gauge_line, corr_line, start_read, T = _read_chunk_structure(chunk, pattern, b2b)
             try:
                 idl = idl_func(chunk[gauge_line], *cfg_func_args)
             except Exception:

pyerrors-2.17.0/pyerrors/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "2.17.0"

{pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pyerrors
-Version: 2.16.0
+Version: 2.17.0
 Summary: Error propagation and statistical analysis for Monte Carlo simulations
 Home-page: https://github.com/fjosw/pyerrors
 Author: Fabian Joswig