pyerrors 2.16.0__tar.gz → 2.17.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {pyerrors-2.16.0 → pyerrors-2.17.0}/PKG-INFO +1 -1
  2. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/pandas.py +30 -18
  3. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/sfcf.py +68 -27
  4. pyerrors-2.17.0/pyerrors/version.py +1 -0
  5. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors.egg-info/PKG-INFO +1 -1
  6. pyerrors-2.16.0/pyerrors/version.py +0 -1
  7. {pyerrors-2.16.0 → pyerrors-2.17.0}/LICENSE +0 -0
  8. {pyerrors-2.16.0 → pyerrors-2.17.0}/README.md +0 -0
  9. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/__init__.py +0 -0
  10. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/correlators.py +0 -0
  11. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/covobs.py +0 -0
  12. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/dirac.py +0 -0
  13. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/fits.py +0 -0
  14. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/__init__.py +0 -0
  15. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/bdio.py +0 -0
  16. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/dobs.py +0 -0
  17. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/hadrons.py +0 -0
  18. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/json.py +0 -0
  19. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/misc.py +0 -0
  20. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/openQCD.py +0 -0
  21. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/input/utils.py +0 -0
  22. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/integrate.py +0 -0
  23. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/linalg.py +0 -0
  24. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/misc.py +0 -0
  25. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/mpm.py +0 -0
  26. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/obs.py +0 -0
  27. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/roots.py +0 -0
  28. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors/special.py +0 -0
  29. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors.egg-info/SOURCES.txt +0 -0
  30. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors.egg-info/dependency_links.txt +0 -0
  31. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors.egg-info/requires.txt +0 -0
  32. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyerrors.egg-info/top_level.txt +0 -0
  33. {pyerrors-2.16.0 → pyerrors-2.17.0}/pyproject.toml +0 -0
  34. {pyerrors-2.16.0 → pyerrors-2.17.0}/setup.cfg +0 -0
  35. {pyerrors-2.16.0 → pyerrors-2.17.0}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyerrors
3
- Version: 2.16.0
3
+ Version: 2.17.0
4
4
  Summary: Error propagation and statistical analysis for Monte Carlo simulations
5
5
  Home-page: https://github.com/fjosw/pyerrors
6
6
  Author: Fabian Joswig
@@ -145,9 +145,9 @@ def _serialize_df(df, gz=False):
145
145
  serialize = _need_to_serialize(out[column])
146
146
 
147
147
  if serialize is True:
148
- out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if x is not None else None)
148
+ out[column] = out[column].transform(lambda x: create_json_string(x, indent=0) if not _is_null(x) else None)
149
149
  if gz is True:
150
- out[column] = out[column].transform(lambda x: gzip.compress((x if x is not None else '').encode('utf-8')))
150
+ out[column] = out[column].transform(lambda x: gzip.compress(x.encode('utf-8')) if not _is_null(x) else gzip.compress(b''))
151
151
  return out
152
152
 
153
153
 
@@ -166,37 +166,49 @@ def _deserialize_df(df, auto_gamma=False):
166
166
  ------
167
167
  In case any column of the DataFrame is gzipped it is gunzipped in the process.
168
168
  """
169
- for column in df.select_dtypes(include="object"):
170
- if isinstance(df[column][0], bytes):
171
- if df[column][0].startswith(b"\x1f\x8b\x08\x00"):
172
- df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8'))
173
-
174
- if not all([e is None for e in df[column]]):
169
+ # In pandas 3+, string columns use 'str' dtype instead of 'object'
170
+ string_like_dtypes = ["object", "str"] if int(pd.__version__.split(".")[0]) >= 3 else ["object"]
171
+ for column in df.select_dtypes(include=string_like_dtypes):
172
+ if len(df[column]) == 0:
173
+ continue
174
+ if isinstance(df[column].iloc[0], bytes):
175
+ if df[column].iloc[0].startswith(b"\x1f\x8b\x08\x00"):
176
+ df[column] = df[column].transform(lambda x: gzip.decompress(x).decode('utf-8') if not pd.isna(x) else '')
177
+
178
+ if df[column].notna().any():
175
179
  df[column] = df[column].replace({r'^$': None}, regex=True)
176
180
  i = 0
177
- while df[column][i] is None:
181
+ while i < len(df[column]) and pd.isna(df[column].iloc[i]):
178
182
  i += 1
179
- if isinstance(df[column][i], str):
180
- if '"program":' in df[column][i][:20]:
181
- df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if x is not None else None)
183
+ if i < len(df[column]) and isinstance(df[column].iloc[i], str):
184
+ if '"program":' in df[column].iloc[i][:20]:
185
+ df[column] = df[column].transform(lambda x: import_json_string(x, verbose=False) if not pd.isna(x) else None)
182
186
  if auto_gamma is True:
183
- if isinstance(df[column][i], list):
184
- df[column].apply(lambda x: [o.gm() if o is not None else x for o in x])
187
+ if isinstance(df[column].iloc[i], list):
188
+ df[column].apply(lambda x: [o.gm() if o is not None else x for o in x] if x is not None else x)
185
189
  else:
186
190
  df[column].apply(lambda x: x.gm() if x is not None else x)
191
+ # Convert NA values back to Python None for compatibility with `x is None` checks
192
+ if df[column].isna().any():
193
+ df[column] = df[column].astype(object).where(df[column].notna(), None)
187
194
  return df
188
195
 
189
196
 
190
197
  def _need_to_serialize(col):
191
198
  serialize = False
192
199
  i = 0
193
- while i < len(col) and col[i] is None:
200
+ while i < len(col) and _is_null(col.iloc[i]):
194
201
  i += 1
195
202
  if i == len(col):
196
203
  return serialize
197
- if isinstance(col[i], (Obs, Corr)):
204
+ if isinstance(col.iloc[i], (Obs, Corr)):
198
205
  serialize = True
199
- elif isinstance(col[i], list):
200
- if all(isinstance(o, Obs) for o in col[i]):
206
+ elif isinstance(col.iloc[i], list):
207
+ if all(isinstance(o, Obs) for o in col.iloc[i]):
201
208
  serialize = True
202
209
  return serialize
210
+
211
+
212
+ def _is_null(val):
213
+ """Check if a value is null (None or NA), handling list/array values."""
214
+ return False if isinstance(val, (list, np.ndarray)) else pd.isna(val)
@@ -5,6 +5,7 @@ import numpy as np # Thinly-wrapped numpy
5
5
  from ..obs import Obs
6
6
  from .utils import sort_names, check_idl
7
7
  import itertools
8
+ import warnings
8
9
 
9
10
 
10
11
  sep = "/"
@@ -603,42 +604,82 @@ def _read_chunk_data(chunk, start_read, T, corr_line, b2b, pattern, im, single):
603
604
  return data
604
605
 
605
606
 
607
+ def _check_append_rep(content, start_list):
608
+ data_len_list = []
609
+ header_len_list = []
610
+ has_regular_len_heads = True
611
+ for chunk_num in range(len(start_list)):
612
+ start = start_list[chunk_num]
613
+ if chunk_num == len(start_list) - 1:
614
+ stop = len(content)
615
+ else:
616
+ stop = start_list[chunk_num + 1]
617
+ chunk = content[start:stop]
618
+ for linenumber, line in enumerate(chunk):
619
+ if line.startswith("[correlator]"):
620
+ header_len = linenumber
621
+ break
622
+ header_len_list.append(header_len)
623
+ data_len_list.append(len(chunk) - header_len)
624
+
625
+ if len(set(header_len_list)) > 1:
626
+ warnings.warn("Not all headers have the same length. Data parts do.")
627
+ has_regular_len_heads = False
628
+
629
+ if len(set(data_len_list)) > 1:
630
+ raise Exception("Irregularities in file structure found, not all run data are of the same output length")
631
+ return has_regular_len_heads
632
+
633
+
634
+ def _read_chunk_structure(chunk, pattern, b2b):
635
+ start_read = 0
636
+ for linenumber, line in enumerate(chunk):
637
+ if line.startswith("gauge_name"):
638
+ gauge_line = linenumber
639
+ elif line.startswith("[correlator]"):
640
+ corr_line = linenumber
641
+ found_pat = ""
642
+ for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
643
+ found_pat += li
644
+ if re.search(pattern, found_pat):
645
+ start_read = corr_line + 7 + b2b
646
+ break
647
+ if start_read == 0:
648
+ raise ValueError("Did not find pattern\n", pattern)
649
+ endline = corr_line + 6 + b2b
650
+ while not chunk[endline] == "\n":
651
+ endline += 1
652
+ T = endline - start_read
653
+ return gauge_line, corr_line, start_read, T
654
+
655
+
606
656
  def _read_append_rep(filename, pattern, b2b, im, single, idl_func, cfg_func_args):
607
657
  with open(filename, 'r') as fp:
608
658
  content = fp.readlines()
609
- data_starts = []
659
+ chunk_start_lines = []
610
660
  for linenumber, line in enumerate(content):
611
661
  if "[run]" in line:
612
- data_starts.append(linenumber)
613
- if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
614
- raise Exception("Irregularities in file structure found, not all runs have the same output length")
615
- chunk = content[:data_starts[1]]
616
- for linenumber, line in enumerate(chunk):
617
- if line.startswith("gauge_name"):
618
- gauge_line = linenumber
619
- elif line.startswith("[correlator]"):
620
- corr_line = linenumber
621
- found_pat = ""
622
- for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
623
- found_pat += li
624
- if re.search(pattern, found_pat):
625
- start_read = corr_line + 7 + b2b
626
- break
627
- else:
628
- raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
629
- endline = corr_line + 6 + b2b
630
- while not chunk[endline] == "\n":
631
- endline += 1
632
- T = endline - start_read
633
-
634
- # all other chunks should follow the same structure
662
+ chunk_start_lines.append(linenumber)
663
+ has_regular_len_heads = _check_append_rep(content, chunk_start_lines)
664
+ if has_regular_len_heads:
665
+ chunk = content[:chunk_start_lines[1]]
666
+ try:
667
+ gauge_line, corr_line, start_read, T = _read_chunk_structure(chunk, pattern, b2b)
668
+ except ValueError:
669
+ raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename, "lines", 1, "to", chunk_start_lines[1] + 1)
670
+ # if has_regular_len_heads is true, all other chunks should follow the same structure
635
671
  rep_idl = []
636
672
  rep_data = []
637
673
 
638
- for cnfg in range(len(data_starts)):
639
- start = data_starts[cnfg]
640
- stop = start + data_starts[1]
674
+ for chunk_num in range(len(chunk_start_lines)):
675
+ start = chunk_start_lines[chunk_num]
676
+ if chunk_num == len(chunk_start_lines) - 1:
677
+ stop = len(content)
678
+ else:
679
+ stop = chunk_start_lines[chunk_num + 1]
641
680
  chunk = content[start:stop]
681
+ if not has_regular_len_heads:
682
+ gauge_line, corr_line, start_read, T = _read_chunk_structure(chunk, pattern, b2b)
642
683
  try:
643
684
  idl = idl_func(chunk[gauge_line], *cfg_func_args)
644
685
  except Exception:
@@ -0,0 +1 @@
1
+ __version__ = "2.17.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pyerrors
3
- Version: 2.16.0
3
+ Version: 2.17.0
4
4
  Summary: Error propagation and statistical analysis for Monte Carlo simulations
5
5
  Home-page: https://github.com/fjosw/pyerrors
6
6
  Author: Fabian Joswig
@@ -1 +0,0 @@
1
- __version__ = "2.16.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes