libgunshotmatch 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of libgunshotmatch might be problematic. Click here for more details.

@@ -29,5 +29,5 @@ Base library for GunShotMatch.
29
29
  __author__: str = "Dominic Davis-Foster"
30
30
  __copyright__: str = "2020-2023 Dominic Davis-Foster"
31
31
  __license__: str = "MIT License"
32
- __version__: str = "0.11.1"
32
+ __version__: str = "0.12.0"
33
33
  __email__: str = "dominic@davis-foster.co.uk"
@@ -52,15 +52,18 @@ def filter_alignment_to_consolidate(project: Project) -> Alignment:
52
52
 
53
53
  # Sort expr_code and peakpos into order from datafile_data
54
54
  desired_order = list(project.datafile_data)[::-1]
55
- sort_map = [desired_order.index(code) for code in project.alignment.expr_code]
55
+ sort_map = [project.alignment.expr_code.index(code) for code in desired_order]
56
56
  expr_code = [project.alignment.expr_code[idx] for idx in sort_map]
57
57
  peakpos = [project.alignment.peakpos[idx] for idx in sort_map]
58
+ assert desired_order == expr_code
58
59
 
59
- consolidated_peak_retention_times = [cp.rt_list for cp in project.consolidated_peaks]
60
+ consolidated_peak_retention_times = []
61
+ for cp in project.consolidated_peaks:
62
+ consolidated_peak_retention_times.append([None if numpy.isnan(rt) else rt for rt in cp.rt_list])
60
63
 
61
64
  aligned_peaks_surviving_consolidate = []
62
65
  for aligned_peaks in zip(*peakpos):
63
- aprt = [p.rt for p in reversed(aligned_peaks)]
66
+ aprt = [None if p is None else p.rt for p in reversed(aligned_peaks)]
64
67
  if aprt in consolidated_peak_retention_times:
65
68
  aligned_peaks_surviving_consolidate.append(aligned_peaks)
66
69
 
@@ -484,9 +484,12 @@ class ConsolidatedPeak:
484
484
  else:
485
485
  ms_list.append(MassSpectrum.from_dict(msd))
486
486
 
487
+ rt_list = [float("nan") if hn == -65535 else hn for hn in d["rt_list"]]
488
+ area_list = [float("nan") if hn == -65535 else hn for hn in d["area_list"]]
489
+
487
490
  return cls(
488
- rt_list=d["rt_list"],
489
- area_list=d["area_list"],
491
+ rt_list=rt_list,
492
+ area_list=area_list,
490
493
  ms_list=ms_list,
491
494
  meta=d["meta"],
492
495
  ms_comparison=d["ms_comparison"],
@@ -60,16 +60,21 @@ def read_gzip_json(path: PathLike) -> JSONOutput:
60
60
  return sdjson.load(f)
61
61
 
62
62
 
63
- def write_gzip_json(path: PathLike, data: JSONInput, indent: Optional[int] = 2) -> None:
63
+ def write_gzip_json(path: PathLike, data: JSONInput, indent: Optional[int] = 2, mtime: int = 0) -> None:
64
64
  """
65
65
  Write JSON to a gzip file.
66
66
 
67
67
  :param path: The filename to write to.
68
68
  :param data: The JSON-serializable data to output.
69
69
  :param indent: Number of spaces used to indent JSON.
70
+ :param mtime: Modification time for gzip header
71
+
72
+ :rtype:
73
+
74
+ .. versionchanged:: 0.12.0 Added ``mtime`` argument.
70
75
  """
71
76
 
72
77
  json_data = sdjson.dumps(data, indent=indent)
73
78
 
74
- with gzip.open(PathPlus(path), 'w') as f:
79
+ with gzip.GzipFile(PathPlus(path), 'w', mtime=mtime) as f:
75
80
  f.write(json_data.encode("utf-8"))
libgunshotmatch/peak.py CHANGED
@@ -27,13 +27,14 @@ Classes representing peaks, and functions for peak filtering.
27
27
  #
28
28
 
29
29
  # stdlib
30
- from typing import Any, Collection, Dict, List, Mapping, Optional, Sequence, Type, Union
30
+ from typing import TYPE_CHECKING, Any, Collection, Dict, List, Mapping, Optional, Sequence, Type, Union
31
31
 
32
32
  # 3rd party
33
33
  import numpy
34
34
  import pandas # type: ignore[import-untyped]
35
35
  import sdjson
36
36
  from domdf_python_tools.paths import PathPlus
37
+ from domdf_python_tools.stringlist import StringList
37
38
  from domdf_python_tools.typing import PathLike
38
39
  from pyms.BillerBiemann import num_ions_threshold
39
40
  from pyms.DPA.Alignment import exprl2alignment
@@ -42,10 +43,15 @@ from pyms.Experiment import Experiment
42
43
  from pyms.IonChromatogram import IonChromatogram
43
44
  from pyms.Noise.Analysis import window_analyzer
44
45
  from pyms.Peak.Class import AbstractPeak, ICPeak, Peak
46
+ from pyms.Peak.List import composite_peak
45
47
  from pyms.Peak.List.Function import sele_peaks_by_rt
46
48
  from pyms.Spectrum import MassSpectrum
47
49
  from pyms_nist_search import SearchResult
48
50
 
51
+ if TYPE_CHECKING:
52
+ # this package
53
+ from libgunshotmatch.project import Project
54
+
49
55
  __all__ = (
50
56
  "PeakList",
51
57
  "QualifiedPeak",
@@ -55,6 +61,7 @@ __all__ = (
55
61
  "filter_peaks",
56
62
  "peak_from_dict",
57
63
  "write_alignment",
64
+ "write_project_alignment",
58
65
  "base_peak_mass",
59
66
  )
60
67
 
@@ -401,6 +408,107 @@ def align_peaks(
401
408
  return A1
402
409
 
403
410
 
411
+ def _format_rt(rt: Optional[float]) -> str:
412
+ return "NA" if rt is None or numpy.isnan(rt) else f"{rt:.3f}"
413
+
414
+
415
+ def _format_area(area: Optional[float]) -> str:
416
+ return "NA" if area is None else f"{area:.0f}"
417
+
418
+
419
+ def _alignment_write_csv(
420
+ project: "Project",
421
+ output_dir_p: PathPlus,
422
+ ) -> None:
423
+
424
+ # Sort expr_code and peakpos into order from datafile_data
425
+ desired_order = list(project.datafile_data)
426
+ sort_map = [project.alignment.expr_code.index(code) for code in desired_order]
427
+ expr_code = [project.alignment.expr_code[idx] for idx in sort_map]
428
+ peakpos = [project.alignment.peakpos[idx] for idx in sort_map]
429
+ assert desired_order == expr_code
430
+
431
+ # write headers
432
+ header = ["UID", "RTavg", *(f'"{item}"' for item in project.datafile_data)]
433
+
434
+ rt_stringlist = StringList([','.join(header)])
435
+ area_stringlist = StringList([','.join(header)])
436
+
437
+ # for each alignment position write alignment's peak and area
438
+ for peak_idx in range(len(peakpos[0])): # loop through peak lists (rows)
439
+ rts, areas, new_peak_list = [], [], []
440
+
441
+ for row in peakpos:
442
+ peak = row[peak_idx]
443
+
444
+ if peak is None:
445
+ rts.append(None)
446
+ areas.append(None)
447
+ else:
448
+ rts.append(peak.rt / 60)
449
+ areas.append(peak.area)
450
+ new_peak_list.append(peak)
451
+
452
+ compo_peak = composite_peak(new_peak_list)
453
+
454
+ if compo_peak is None:
455
+ continue
456
+
457
+ uid, mean_rt = compo_peak.UID, f"{float(compo_peak.rt / 60):.3f}"
458
+ rt_stringlist.append(','.join([uid, mean_rt, *map(_format_rt, rts)]))
459
+ area_stringlist.append(','.join([uid, mean_rt, *map(_format_area, areas)]))
460
+
461
+ (output_dir_p / f"{project.name}_alignment_rt.csv").write_lines(rt_stringlist)
462
+ (output_dir_p / f"{project.name}_alignment_area.csv").write_lines(area_stringlist)
463
+
464
+
465
+ def write_project_alignment(
466
+ project: "Project",
467
+ output_dir: PathLike,
468
+ require_all_datafiles: bool = False,
469
+ ) -> None:
470
+ """
471
+ Write the alignment data (retention times, peak areas, mass spectra) to disk.
472
+
473
+ The output files are as follows:
474
+
475
+ * :file:`{{project.name}}_alignment_rt.csv`, containing the aligned retention times.
476
+ * :file:`{{project.name}}_alignment_area.csv`, containing the peak areas for the corresponding aligned retention times.
477
+ * :file:`{{project.name}}_alignment_rt.json`, containing the aligned retention times.
478
+ * :file:`{{project.name}}_alignment_area.json`, containing the peak areas for the corresponding aligned retention times.
479
+ * :file:`{{project.name}}_alignment_ms.json`, containing the mass spectra for the corresponding aligned retention times.
480
+
481
+ :param project:
482
+ :param output_dir: Directory to store the output files in.
483
+ :param require_all_datafiles: Whether the peak must be present in all experiments to be included in the data frame.
484
+
485
+ :rtype:
486
+
487
+ .. versionadded:: 0.12.0 Added as an alternative to :func:`~.write_alignment`. This function sorts the columns to match the order of ``project.datafile_data``.
488
+ """
489
+
490
+ output_dir_p = PathPlus(output_dir)
491
+
492
+ _alignment_write_csv(project, output_dir_p)
493
+
494
+ rt_alignment = project.alignment.get_peak_alignment(require_all_expr=require_all_datafiles)
495
+ rt_alignment_filename = output_dir_p / f"{project.name}_alignment_rt.json"
496
+ rt_alignment_filename.write_clean(rt_alignment.to_json(indent=2))
497
+
498
+ area_alignment = project.alignment.get_area_alignment(require_all_expr=require_all_datafiles)
499
+ area_alignment_filename = output_dir_p / f"{project.name}_alignment_area.json"
500
+ area_alignment_filename.write_clean(area_alignment.to_json(indent=2))
501
+
502
+ ms_alignment = project.alignment.get_ms_alignment(require_all_expr=require_all_datafiles)
503
+ # ms_alignment.to_json(output_dir_p / 'alignment_ms.json')
504
+ alignment_ms_filename = (output_dir_p / f"{project.name}_alignment_ms.json")
505
+ alignment_ms_filename.dump_json(
506
+ ms_alignment.to_dict(),
507
+ json_library=sdjson, # type: ignore[arg-type]
508
+ indent=2,
509
+ )
510
+
511
+
404
512
  def write_alignment(
405
513
  alignment: Alignment,
406
514
  project_name: str,
@@ -434,11 +542,11 @@ def write_alignment(
434
542
 
435
543
  rt_alignment = alignment.get_peak_alignment(require_all_expr=require_all_datafiles)
436
544
  rt_alignment_filename = output_dir_p / f"{project_name}_alignment_rt.json"
437
- rt_alignment_filename.write_clean(rt_alignment.to_json())
545
+ rt_alignment_filename.write_clean(rt_alignment.to_json(indent=2))
438
546
 
439
547
  area_alignment = alignment.get_area_alignment(require_all_expr=require_all_datafiles)
440
548
  area_alignment_filename = output_dir_p / f"{project_name}_alignment_area.json"
441
- area_alignment_filename.write_clean(area_alignment.to_json())
549
+ area_alignment_filename.write_clean(area_alignment.to_json(indent=2))
442
550
 
443
551
  ms_alignment = alignment.get_ms_alignment(require_all_expr=require_all_datafiles)
444
552
  # ms_alignment.to_json(output_dir_p / 'alignment_ms.json')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: libgunshotmatch
3
- Version: 0.11.1
3
+ Version: 0.12.0
4
4
  Summary: Base library for GunShotMatch.
5
5
  Author-email: Dominic Davis-Foster <dominic@davis-foster.co.uk>
6
6
  License: MIT
@@ -135,7 +135,7 @@ libgunshotmatch
135
135
  .. |language| image:: https://img.shields.io/github/languages/top/GunShotMatch/libgunshotmatch
136
136
  :alt: GitHub top language
137
137
 
138
- .. |commits-since| image:: https://img.shields.io/github/commits-since/GunShotMatch/libgunshotmatch/v0.11.1
138
+ .. |commits-since| image:: https://img.shields.io/github/commits-since/GunShotMatch/libgunshotmatch/v0.12.0
139
139
  :target: https://github.com/GunShotMatch/libgunshotmatch/pulse
140
140
  :alt: GitHub commits since tagged version
141
141
 
@@ -1,22 +1,22 @@
1
1
  libgunshotmatch/comparison/__init__.py,sha256=g-KDfzG-oyRf2aeXlMaJJ3aWfQtQkr5-bGhT70O44XA,3848
2
2
  libgunshotmatch/comparison/_utils.py,sha256=aZ6fnFM4M_vTsKq5t79V4tfven1eVdG4EUCuYFuXMVs,1895
3
- libgunshotmatch/comparison/projects.py,sha256=Dq294cDndmQI2FnIcVXwleCQF0mF2m8U5wWnO5_aD9A,3356
3
+ libgunshotmatch/comparison/projects.py,sha256=fN1-dbD8LNPKcMerKHQwwOVeOwFPcd6E9hZ8QQFJOi4,3505
4
4
  libgunshotmatch/comparison/unknowns.py,sha256=kvJJDtfpMp-c3hjCOz7YtvuuDR0YksWmyUuaSBC_5Jg,2710
5
- libgunshotmatch/consolidate/__init__.py,sha256=qjY5meI2TMCpMGwzLSo4NZ4HbhH8VJoR1t9ev4ObBmA,24706
5
+ libgunshotmatch/consolidate/__init__.py,sha256=LyXgWTYy2YRmavCzzU4u-b-mLi2xDb1_QYk62lqhV9s,24849
6
6
  libgunshotmatch/consolidate/_fields.py,sha256=0kfPXJ0EG7GhdFiNzvcmd6W4i1x6Y0s2Y58z3RltPiA,2759
7
7
  libgunshotmatch/consolidate/_spectra.py,sha256=24aDoPwGWEyIFCH-fwRa4nifNMPqUyd-qTogk3BMeLY,2319
8
8
  libgunshotmatch/method/__init__.py,sha256=TP_3rvv3WEbV6Y5E_QWd2lcORPFnbXV4YRKGwXDa4ds,9423
9
9
  libgunshotmatch/method/_fields.py,sha256=HBFl0XmHBaAOHYdABs2NRbjtngGEw7kY0xP6D2Fl7h4,4941
10
- libgunshotmatch/__init__.py,sha256=RYV-3nDk5ruAuFXDnZuk5LbiFM_503YfSuKwtGTdddk,1434
10
+ libgunshotmatch/__init__.py,sha256=sLe-ceqI8gWFylKLQsa3-o_f9N4Iz7gfvMxWAlZPj2E,1434
11
11
  libgunshotmatch/datafile.py,sha256=4C4BiR95PBfIUbiUKQzVfh57tgZu_l6rLS8bIgrgPUs,16246
12
- libgunshotmatch/gzip_util.py,sha256=PcfT4QC4TM0KI3uCGetRpjChbTXTGmNEJBr7BghO3i8,2414
13
- libgunshotmatch/peak.py,sha256=jIbuOTj_FWTeb46pZtzSwnGsMK_c71Dd9_T9Gj2DxNk,17616
12
+ libgunshotmatch/gzip_util.py,sha256=bhYUdFmaSNKPV66vTvWkebbLhtMSPiLWBP20JVqgjXw,2562
13
+ libgunshotmatch/peak.py,sha256=w2oWWmMQ9JP2JMK7vaboOTbv83TvgOXuTCxGxGToHGc,21638
14
14
  libgunshotmatch/project.py,sha256=498xoKkDSFDsFodwoRwwEfRL-FOX_hnzyDLLgPNxWr0,6987
15
15
  libgunshotmatch/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  libgunshotmatch/search.py,sha256=GMP7_DMnXFq3TG8AT1WARIn4ObCfOGfTDakpGeKzJYw,3123
17
17
  libgunshotmatch/utils.py,sha256=7WVJPd9wNBFFoAFqh2J6Lsu7BGrGksW32ZqpH7ZIL_I,5776
18
- libgunshotmatch-0.11.1.dist-info/LICENSE,sha256=bFtJt-lyVJHV-88FeFa_r3BEOsmpna5qG2KOl9JUNfU,1064
19
- libgunshotmatch-0.11.1.dist-info/METADATA,sha256=X6IRaQ7QLl3ugK51lSmpa205f0Ds1QmbVICEaFucBtw,6394
20
- libgunshotmatch-0.11.1.dist-info/WHEEL,sha256=pUf8gZsdmDXXTtqZfolZFpfoEwFoEdADIuUvQVl5qAY,83
21
- libgunshotmatch-0.11.1.dist-info/entry_points.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- libgunshotmatch-0.11.1.dist-info/RECORD,,
18
+ libgunshotmatch-0.12.0.dist-info/LICENSE,sha256=bFtJt-lyVJHV-88FeFa_r3BEOsmpna5qG2KOl9JUNfU,1064
19
+ libgunshotmatch-0.12.0.dist-info/METADATA,sha256=DA8EiUE-EPbsq2MQd-8MAKiaAuuGC50L2GXS54WYpxg,6394
20
+ libgunshotmatch-0.12.0.dist-info/WHEEL,sha256=BKn3NsEitvGOujn9xNlxxTBtBPCGWWsB3u3zHbttCmw,83
21
+ libgunshotmatch-0.12.0.dist-info/entry_points.txt,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ libgunshotmatch-0.12.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: whey (0.1.0)
2
+ Generator: whey (0.1.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any