PySAR 2.5.0__tar.gz → 2.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {pysar-2.5.0 → pysar-2.5.1}/PKG-INFO +6 -5
  2. {pysar-2.5.0 → pysar-2.5.1}/PySAR.egg-info/PKG-INFO +6 -5
  3. {pysar-2.5.0 → pysar-2.5.1}/PySAR.egg-info/requires.txt +0 -1
  4. {pysar-2.5.0 → pysar-2.5.1}/README.md +5 -3
  5. {pysar-2.5.0 → pysar-2.5.1}/docs/conf.py +1 -2
  6. {pysar-2.5.0 → pysar-2.5.1}/pySAR/__init__.py +1 -1
  7. {pysar-2.5.0 → pysar-2.5.1}/pySAR/descriptors.py +0 -1
  8. {pysar-2.5.0 → pysar-2.5.1}/pySAR/evaluate.py +2 -2
  9. {pysar-2.5.0 → pysar-2.5.1}/pySAR/globals_.py +0 -3
  10. {pysar-2.5.0 → pysar-2.5.1}/pySAR/model.py +6 -12
  11. {pysar-2.5.0 → pysar-2.5.1}/pySAR/pySAR.py +10 -11
  12. {pysar-2.5.0 → pysar-2.5.1}/pySAR/utils.py +0 -37
  13. {pysar-2.5.0 → pysar-2.5.1}/pyproject.toml +1 -2
  14. {pysar-2.5.0 → pysar-2.5.1}/tests/test_model.py +2 -2
  15. {pysar-2.5.0 → pysar-2.5.1}/tests/test_pySAR.py +2 -2
  16. {pysar-2.5.0 → pysar-2.5.1}/tests/test_utils.py +0 -38
  17. {pysar-2.5.0 → pysar-2.5.1}/LICENSE +0 -0
  18. {pysar-2.5.0 → pysar-2.5.1}/PySAR.egg-info/SOURCES.txt +0 -0
  19. {pysar-2.5.0 → pysar-2.5.1}/PySAR.egg-info/dependency_links.txt +0 -0
  20. {pysar-2.5.0 → pysar-2.5.1}/PySAR.egg-info/not-zip-safe +0 -0
  21. {pysar-2.5.0 → pysar-2.5.1}/PySAR.egg-info/top_level.txt +0 -0
  22. {pysar-2.5.0 → pysar-2.5.1}/pySAR/encoding.py +0 -0
  23. {pysar-2.5.0 → pysar-2.5.1}/pySAR/plots.py +0 -0
  24. {pysar-2.5.0 → pysar-2.5.1}/pySAR/py.typed +0 -0
  25. {pysar-2.5.0 → pysar-2.5.1}/pySAR/pyDSP.py +0 -0
  26. {pysar-2.5.0 → pysar-2.5.1}/setup.cfg +0 -0
  27. {pysar-2.5.0 → pysar-2.5.1}/tests/test_descriptors.py +0 -0
  28. {pysar-2.5.0 → pysar-2.5.1}/tests/test_encoding.py +0 -0
  29. {pysar-2.5.0 → pysar-2.5.1}/tests/test_evaluate.py +0 -0
  30. {pysar-2.5.0 → pysar-2.5.1}/tests/test_plots.py +0 -0
  31. {pysar-2.5.0 → pysar-2.5.1}/tests/test_pyDSP.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PySAR
3
- Version: 2.5.0
3
+ Version: 2.5.1
4
4
  Summary: Analysing Sequence Activity Relationships (SARs) of protein sequences and their mutants using Machine Learning.
5
5
  Author-email: AJ McKenna <amckenna41@qub.ac.uk>
6
6
  Maintainer-email: AJ McKenna <amckenna41@qub.ac.uk>
@@ -33,7 +33,6 @@ License-File: LICENSE
33
33
  Requires-Dist: numpy>=1.21
34
34
  Requires-Dist: pandas>=1.3
35
35
  Requires-Dist: scipy>=1.7
36
- Requires-Dist: delayed>=0.11
37
36
  Requires-Dist: scikit-learn>=1.0
38
37
  Requires-Dist: matplotlib>=3.4
39
38
  Requires-Dist: seaborn>=0.11
@@ -50,7 +49,7 @@ Requires-Dist: sphinx; extra == "docs"
50
49
  Dynamic: license-file
51
50
 
52
51
  <p align="center">
53
- <img src="https://raw.githubusercontent.com/amckenna41/pySAR/master/images/pySAR.png" alt="pySARLogo" height="300" width="400"/>
52
+ <img src="https://raw.githubusercontent.com/amckenna41/pySAR/master/images/pySAR.png" alt="pySARLogo" height="400" width="350"/>
54
53
  </p>
55
54
 
56
55
  # pySAR - Python Sequence Activity Relationship #
@@ -126,7 +125,6 @@ Requirements
126
125
  * [pandas][pandas] >= 1.3
127
126
  * [scikit-learn][sklearn] >= 1.0
128
127
  * [scipy][scipy] >= 1.7
129
- * [delayed][delayed] >= 0.11
130
128
  * [tqdm][tqdm] >= 4.60
131
129
  * [matplotlib][matplotlib] >= 3.4
132
130
  * [seaborn][seaborn] >= 0.11
@@ -711,6 +709,10 @@ Journal of Chemical Information and Modeling 2020 60 (6), 2773-2790
711
709
  DOI: 10.1021/acs.jcim.0c00073 <br><br>
712
710
  \[8\]: Medina-Ortiz, D., Contreras, S., Amado-Hinojosa, J., Torres-Almonacid, J., Asenjo, J. A., Navarrete, M., & Olivera-Nappa, Á. (2020). Combination of digital signal processing and assembled predictive models facilitates the rational design of proteins. ArXiv [Cs.CE]. <br>
713
711
 
712
+
713
+ [<img src="https://img.shields.io/github/stars/amckenna41/pySAR?color=green&label=star%20it%20on%20GitHub" width="132" height="20" alt="Star it on GitHub">](https://github.com/amckenna41/pySAR)
714
+
715
+
714
716
  <a href="https://www.buymeacoffee.com/amckenna41" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a>
715
717
 
716
718
  [Back to top](#TOP)
@@ -727,7 +729,6 @@ DOI: 10.1021/acs.jcim.0c00073 <br><br>
727
729
  [tqdm]: https://tqdm.github.io/
728
730
  [seaborn]: https://seaborn.pydata.org/
729
731
  [matplotlib]: https://matplotlib.org/
730
- [delayed]: https://pypi.org/project/delayed/
731
732
  [PyPi]: https://pypi.org/project/pysar/
732
733
  [article]: https://www.sciencedirect.com/science/article/abs/pii/S1532046422000326
733
734
  [pdf]: https://github.com/amckenna41/pySAR/blob/master/pySAR_research.pdf
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PySAR
3
- Version: 2.5.0
3
+ Version: 2.5.1
4
4
  Summary: Analysing Sequence Activity Relationships (SARs) of protein sequences and their mutants using Machine Learning.
5
5
  Author-email: AJ McKenna <amckenna41@qub.ac.uk>
6
6
  Maintainer-email: AJ McKenna <amckenna41@qub.ac.uk>
@@ -33,7 +33,6 @@ License-File: LICENSE
33
33
  Requires-Dist: numpy>=1.21
34
34
  Requires-Dist: pandas>=1.3
35
35
  Requires-Dist: scipy>=1.7
36
- Requires-Dist: delayed>=0.11
37
36
  Requires-Dist: scikit-learn>=1.0
38
37
  Requires-Dist: matplotlib>=3.4
39
38
  Requires-Dist: seaborn>=0.11
@@ -50,7 +49,7 @@ Requires-Dist: sphinx; extra == "docs"
50
49
  Dynamic: license-file
51
50
 
52
51
  <p align="center">
53
- <img src="https://raw.githubusercontent.com/amckenna41/pySAR/master/images/pySAR.png" alt="pySARLogo" height="300" width="400"/>
52
+ <img src="https://raw.githubusercontent.com/amckenna41/pySAR/master/images/pySAR.png" alt="pySARLogo" height="400" width="350"/>
54
53
  </p>
55
54
 
56
55
  # pySAR - Python Sequence Activity Relationship #
@@ -126,7 +125,6 @@ Requirements
126
125
  * [pandas][pandas] >= 1.3
127
126
  * [scikit-learn][sklearn] >= 1.0
128
127
  * [scipy][scipy] >= 1.7
129
- * [delayed][delayed] >= 0.11
130
128
  * [tqdm][tqdm] >= 4.60
131
129
  * [matplotlib][matplotlib] >= 3.4
132
130
  * [seaborn][seaborn] >= 0.11
@@ -711,6 +709,10 @@ Journal of Chemical Information and Modeling 2020 60 (6), 2773-2790
711
709
  DOI: 10.1021/acs.jcim.0c00073 <br><br>
712
710
  \[8\]: Medina-Ortiz, D., Contreras, S., Amado-Hinojosa, J., Torres-Almonacid, J., Asenjo, J. A., Navarrete, M., & Olivera-Nappa, Á. (2020). Combination of digital signal processing and assembled predictive models facilitates the rational design of proteins. ArXiv [Cs.CE]. <br>
713
711
 
712
+
713
+ [<img src="https://img.shields.io/github/stars/amckenna41/pySAR?color=green&label=star%20it%20on%20GitHub" width="132" height="20" alt="Star it on GitHub">](https://github.com/amckenna41/pySAR)
714
+
715
+
714
716
  <a href="https://www.buymeacoffee.com/amckenna41" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a>
715
717
 
716
718
  [Back to top](#TOP)
@@ -727,7 +729,6 @@ DOI: 10.1021/acs.jcim.0c00073 <br><br>
727
729
  [tqdm]: https://tqdm.github.io/
728
730
  [seaborn]: https://seaborn.pydata.org/
729
731
  [matplotlib]: https://matplotlib.org/
730
- [delayed]: https://pypi.org/project/delayed/
731
732
  [PyPi]: https://pypi.org/project/pysar/
732
733
  [article]: https://www.sciencedirect.com/science/article/abs/pii/S1532046422000326
733
734
  [pdf]: https://github.com/amckenna41/pySAR/blob/master/pySAR_research.pdf
@@ -1,7 +1,6 @@
1
1
  numpy>=1.21
2
2
  pandas>=1.3
3
3
  scipy>=1.7
4
- delayed>=0.11
5
4
  scikit-learn>=1.0
6
5
  matplotlib>=3.4
7
6
  seaborn>=0.11
@@ -1,5 +1,5 @@
1
1
  <p align="center">
2
- <img src="https://raw.githubusercontent.com/amckenna41/pySAR/master/images/pySAR.png" alt="pySARLogo" height="300" width="400"/>
2
+ <img src="https://raw.githubusercontent.com/amckenna41/pySAR/master/images/pySAR.png" alt="pySARLogo" height="400" width="350"/>
3
3
  </p>
4
4
 
5
5
  # pySAR - Python Sequence Activity Relationship #
@@ -75,7 +75,6 @@ Requirements
75
75
  * [pandas][pandas] >= 1.3
76
76
  * [scikit-learn][sklearn] >= 1.0
77
77
  * [scipy][scipy] >= 1.7
78
- * [delayed][delayed] >= 0.11
79
78
  * [tqdm][tqdm] >= 4.60
80
79
  * [matplotlib][matplotlib] >= 3.4
81
80
  * [seaborn][seaborn] >= 0.11
@@ -660,6 +659,10 @@ Journal of Chemical Information and Modeling 2020 60 (6), 2773-2790
660
659
  DOI: 10.1021/acs.jcim.0c00073 <br><br>
661
660
  \[8\]: Medina-Ortiz, D., Contreras, S., Amado-Hinojosa, J., Torres-Almonacid, J., Asenjo, J. A., Navarrete, M., & Olivera-Nappa, Á. (2020). Combination of digital signal processing and assembled predictive models facilitates the rational design of proteins. ArXiv [Cs.CE]. <br>
662
661
 
662
+
663
+ [<img src="https://img.shields.io/github/stars/amckenna41/pySAR?color=green&label=star%20it%20on%20GitHub" width="132" height="20" alt="Star it on GitHub">](https://github.com/amckenna41/pySAR)
664
+
665
+
663
666
  <a href="https://www.buymeacoffee.com/amckenna41" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/default-orange.png" alt="Buy Me A Coffee" height="41" width="174"></a>
664
667
 
665
668
  [Back to top](#TOP)
@@ -676,7 +679,6 @@ DOI: 10.1021/acs.jcim.0c00073 <br><br>
676
679
  [tqdm]: https://tqdm.github.io/
677
680
  [seaborn]: https://seaborn.pydata.org/
678
681
  [matplotlib]: https://matplotlib.org/
679
- [delayed]: https://pypi.org/project/delayed/
680
682
  [PyPi]: https://pypi.org/project/pysar/
681
683
  [article]: https://www.sciencedirect.com/science/article/abs/pii/S1532046422000326
682
684
  [pdf]: https://github.com/amckenna41/pySAR/blob/master/pySAR_research.pdf
@@ -15,7 +15,7 @@ sys.path.insert(0, os.path.abspath('..'))
15
15
  project = 'pySAR'
16
16
  copyright = '2026, AJ McKenna'
17
17
  author = 'AJ McKenna'
18
- release = '2.5.0'
18
+ release = '2.5.1'
19
19
 
20
20
  # -- General configuration ---------------------------------------------------
21
21
  # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -36,7 +36,6 @@ autodoc_mock_imports = [
36
36
  'matplotlib',
37
37
  'seaborn',
38
38
  'tqdm',
39
- 'delayed',
40
39
  'aaindex',
41
40
  'protpy',
42
41
  ]
@@ -1,6 +1,6 @@
1
1
  """ pySAR software metadata. """
2
2
  __name__ = 'pySAR'
3
- __version__ = "2.5.0"
3
+ __version__ = "2.5.1"
4
4
  __description__ = 'A Python package used to analysis Sequence Activity Relationships (SARs) of protein sequences and their mutants using Machine Learning.'
5
5
  __author__ = 'AJ McKenna: https://github.com/amckenna41'
6
6
  __authorEmail__ = 'amckenna41@qub.ac.uk'
@@ -8,7 +8,6 @@ import pandas as pd
8
8
  import numpy as np
9
9
  from difflib import get_close_matches
10
10
  import json
11
- from json import JSONDecodeError
12
11
  import itertools
13
12
  import time
14
13
  from tqdm import tqdm
@@ -184,8 +184,8 @@ class Evaluate():
184
184
  :rpd: float
185
185
  the RPD score for the model.
186
186
  """
187
- mse = self.mse_()
188
- return self.Y_true.std() / np.sqrt(mse) if mse > 0 else np.inf
187
+ # reuse already-computed self.mse to avoid a redundant sklearn call
188
+ return self.Y_true.std() / np.sqrt(self.mse) if self.mse > 0 else np.inf
189
189
 
190
190
  def explained_var_(self, multioutput='uniform_average'):
191
191
  """
@@ -8,14 +8,11 @@ from datetime import datetime
8
8
  NOW = datetime.now()
9
9
 
10
10
  #output dir is the default directory used to store all outputs generated
11
- global OUTPUT_DIR
12
11
  OUTPUT_DIR = 'outputs'
13
12
 
14
13
  #current datetime appended to output assets & directories to uniquely identify them
15
- global CURRENT_DATETIME
16
14
  CURRENT_DATETIME = NOW.strftime('%Y-%m-%d_%H-%M-%S')
17
15
 
18
16
  #output folder is the default folder within the OUTPUT_DIR used to store all
19
17
  #outputs generated from one run of the program.
20
- global OUTPUT_FOLDER
21
18
  OUTPUT_FOLDER = os.path.join(OUTPUT_DIR, f'model_output_{CURRENT_DATETIME}')
@@ -92,7 +92,6 @@ class Model():
92
92
  'sgd': SGDRegressor,
93
93
  'stochasticgradientdescent': SGDRegressor,
94
94
  'gbr': GradientBoostingRegressor,
95
- 'gradientboost': GradientBoostingRegressor,
96
95
  'gradientboostingregressor': GradientBoostingRegressor,
97
96
  'svr': SVR,
98
97
  'supportvectorregression': SVR,
@@ -123,15 +122,8 @@ class Model():
123
122
  else:
124
123
  self.parameters = parameters
125
124
 
126
- #list of valid models available to use for this class
127
- self.valid_models = ['plsregression', 'randomforestregressor', 'adaboostregressor',\
128
- 'baggingregressor', 'decisiontreeregressor', 'gbr',
129
- 'gradientboostingregressor', 'linearregression', 'lasso', 'ridge',
130
- 'svr', 'supportvectorregression', 'sgd', 'stochasticgradientdescent',
131
- 'kneighborsregressor', 'knearestneighbors', 'knn', 'elasticnet',
132
- 'extratreesregressor', 'extratrees', 'histgradientboostingregressor',
133
- 'histgradientboosting', 'hgbr', 'gaussianprocessregressor',
134
- 'gaussianprocess', 'gpr']
125
+ #derive valid model names directly from MODEL_CONSTRUCTORS to avoid duplication and sync issues
126
+ self.valid_models = list(self.MODEL_CONSTRUCTORS.keys())
135
127
 
136
128
  #raise error if algorithm parameter isnt string type
137
129
  if not(isinstance(self.algorithm, str)):
@@ -311,8 +303,8 @@ class Model():
311
303
  try:
312
304
  with open(save_path, 'wb') as file:
313
305
  pickle.dump(self.model, file)
314
- except (pickle.PickleError):
315
- print(f"Error pickling model with path: {save_path}.")
306
+ except pickle.PickleError as e:
307
+ raise RuntimeError(f"Error pickling model with path: {save_path}.") from e
316
308
 
317
309
  def hyperparameter_tuning(self, param_grid=None, metric='r2', cv=5, n_jobs=None, verbose=2):
318
310
  """
@@ -365,6 +357,8 @@ class Model():
365
357
 
366
358
  #cv must be of type int and be between 5 and 10, if not then default of 5 is used
367
359
  if not isinstance(cv, int) or cv < 5 or cv > 10:
360
+ import warnings
361
+ warnings.warn(f'Invalid cv value {cv!r}; must be an int between 5 and 10. Defaulting to 5.', UserWarning, stacklevel=2)
368
362
  cv = 5
369
363
 
370
364
  #copy to avoid mutating caller's dict; filter out parameter names invalid for this model
@@ -221,7 +221,7 @@ class PySAR():
221
221
 
222
222
  #verify no invalid amino acids found in sequences, if so then raise error
223
223
  invalid_seqs = valid_sequence(self.sequences)
224
- if (invalid_seqs != None):
224
+ if invalid_seqs is not None:
225
225
  raise ValueError(f'Invalid amino acids found in protein sequence dataset: {invalid_seqs}.')
226
226
 
227
227
  #get closest match for activity column name in dataset
@@ -270,7 +270,7 @@ class PySAR():
270
270
  array of the encoded protein sequences in dataset via user input index/indices.
271
271
  """
272
272
  #validate AAI indices are present in the input parameter, if not raise error
273
- if (aai_indices == None or aai_indices == ""):
273
+ if aai_indices is None or aai_indices == "":
274
274
  raise ValueError(f'AAI indices input parameter cannot be None or empty: {aai_indices}.')
275
275
 
276
276
  #check input indices is of correct type (str/list), if not raise type error
@@ -352,7 +352,7 @@ class PySAR():
352
352
  pandas Dataframe storing metrics and results of encoding.
353
353
  """
354
354
  #validate AAI indices are present in the input parameter
355
- if (aai_indices == None or aai_indices == "" or aai_indices == []):
355
+ if aai_indices is None or aai_indices == "" or aai_indices == []:
356
356
  raise ValueError(f'AAI indices input parameter cannot be None or empty: {aai_indices}.')
357
357
 
358
358
  #check input indices is of correct type (str/list), if not raise type error
@@ -385,7 +385,7 @@ class PySAR():
385
385
  #else use the AAI indices encoding's themselves as the feature/training data (X)
386
386
  if (self.use_dsp):
387
387
  #if input spectrum is none or empty, raise error.
388
- if (self.spectrum == None or self.spectrum == ""):
388
+ if self.spectrum is None or self.spectrum == "":
389
389
  raise ValueError(f'Spectrum cannot be None or empty: {self.spectrum}.')
390
390
  pyDSP = PyDSP(self.config_file, protein_seqs=encoded_seqs)
391
391
  X = pd.DataFrame(pyDSP.spectrum_encoding) #set training data to FFT spectrum encoding
@@ -471,7 +471,7 @@ class PySAR():
471
471
  inputted descriptor(s).
472
472
  """
473
473
  #raise error if no descriptors specified in input
474
- if (descriptors == None or descriptors == "" or descriptors == []):
474
+ if descriptors is None or descriptors == "" or descriptors == []:
475
475
  raise ValueError(f'Descriptors input parameter cannot be None or empty: {descriptors}.')
476
476
 
477
477
  #check input descriptor is of correct type str or list, if not raise type error
@@ -551,7 +551,7 @@ class PySAR():
551
551
  pandas dataframe storing metrics and results of encoding.
552
552
  """
553
553
  #raise error if no descriptor specified in input
554
- if (descriptors == None or descriptors == ""):
554
+ if descriptors is None or descriptors == "" or descriptors == []:
555
555
  raise ValueError(f'Descriptors input parameter cannot be None or empty: {descriptors}.')
556
556
 
557
557
  #check input descriptor is of correct type (str or list), if not raise type error
@@ -633,7 +633,7 @@ class PySAR():
633
633
  desc_df['Group'] = desc_df['Group'].astype(pd.StringDtype())
634
634
 
635
635
  #ensure aai indices attribute doesn't show up in output results
636
- if (self.aai_indices != None):
636
+ if self.aai_indices is not None:
637
637
  self.aai_indices = None
638
638
 
639
639
  #print out results from encoding
@@ -684,8 +684,8 @@ class PySAR():
684
684
  pandas dataframe storing metrics and results of encoding.
685
685
  """
686
686
  #validate AAI indices and Descriptors are present in the input parameters, return error if either is None
687
- if (descriptors == None or descriptors == "") or (aai_indices == None or aai_indices == ""):
688
- raise ValueError('AAI Indices and Descriptor input parameters must not be empty or None.')
687
+ if (descriptors is None or descriptors in ("", [])) or (aai_indices is None or aai_indices in ("", [])):
688
+ raise ValueError('AAI Indices and Descriptor input parameters must not be empty or None.')
689
689
 
690
690
  #check input descriptor & indices are of correct type (str/list), if not raise type error
691
691
  if (not isinstance(aai_indices, str) and (not isinstance(aai_indices, list)) or \
@@ -810,8 +810,7 @@ class PySAR():
810
810
  evaluation.rmse, evaluation.mse, evaluation.mae, evaluation.rpd, evaluation.explained_var]
811
811
 
812
812
  #convert Index, Category, Descriptor and Group from default Object type -> String datatypes
813
- # aai_desc_df['Index'] = aai_desc_df['Index'].astype(pd.StringDtype())
814
- aai_desc_df['Index'] = aai_desc_df['Index'].astype("string")
813
+ aai_desc_df['Index'] = aai_desc_df['Index'].astype(pd.StringDtype())
815
814
  aai_desc_df['Category'] = aai_desc_df['Category'].astype(pd.StringDtype())
816
815
  aai_desc_df['Descriptor'] = aai_desc_df['Descriptor'].astype(pd.StringDtype())
817
816
  aai_desc_df['Group'] = aai_desc_df['Group'].astype(pd.StringDtype())
@@ -149,43 +149,6 @@ def remove_gaps(sequences):
149
149
  cleaned = ''.join(str(c) for c in sequences if str(c) != '-')
150
150
  return [cleaned]
151
151
 
152
- def flatten(array):
153
- """
154
- Lambda function for flattening list of lists or array of lists into one
155
- 1-dimensional array/list. Input must contain an array of arrays of the same
156
- length. Input will be flattened into a 1-dimensional array of size (M * N, 1)
157
- where M = len(array) and N = len(array[0]). The flattened output can then be
158
- reshaped into the required shape and format.
159
-
160
- Parameters
161
- ==========
162
- :array: np.ndarray/list
163
- array of arrays or list of lists to be flattened.
164
-
165
- Returns
166
- =======
167
- :flatten(array/list): np.ndarray/list
168
- flattened 1-dimensional list or array.
169
- """
170
- #if input is a string then return input as cannot be flattened
171
- if (isinstance(array, str)):
172
- return array
173
-
174
- #create flatten lambda function
175
- _flatten = lambda array: [item for sublist in array for item in sublist]
176
-
177
- #flatten array/list
178
- try:
179
- flattened_array = _flatten(array)
180
- except (TypeError, ValueError):
181
- raise TypeError(f'Error flattening array of type: {type(array)} and size {len(array)}.')
182
-
183
- #if input is a numpy array then reshape to 1D numpy array else return list
184
- if (isinstance(array,np.ndarray)):
185
- return (np.array(flattened_array).reshape([-1, 1]))
186
- else:
187
- return flattened_array
188
-
189
152
  def zero_padding(sequences):
190
153
  """
191
154
  Pad sequences in input array with 0's such that every sequence is of the same length
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "PySAR"
7
- version = "2.5.0"
7
+ version = "2.5.1"
8
8
  description = "Analysing Sequence Activity Relationships (SARs) of protein sequences and their mutants using Machine Learning."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -54,7 +54,6 @@ dependencies = [
54
54
  "numpy>=1.21",
55
55
  "pandas>=1.3",
56
56
  "scipy>=1.7",
57
- "delayed>=0.11",
58
57
  "scikit-learn>=1.0",
59
58
  "matplotlib>=3.4",
60
59
  "seaborn>=0.11",
@@ -96,7 +96,7 @@ class ModelTests(unittest.TestCase):
96
96
  aliases = [
97
97
  ('plsreg', 'plsregression', 'PLSRegression'),
98
98
  ('randomfor', 'randomforestregressor', 'RandomForestRegressor'),
99
- ('adaboo', 'adaboostregressor', 'AdaBoostRegressor'),
99
+ ('adaboost', 'adaboostregressor', 'AdaBoostRegressor'),
100
100
  ('bagging', 'baggingregressor', 'BaggingRegressor'),
101
101
  ('decisiontree', 'decisiontreeregressor', 'DecisionTreeRegressor'),
102
102
  ('linear', 'linearregression', 'LinearRegression'),
@@ -299,7 +299,7 @@ class ModelTests(unittest.TestCase):
299
299
  def test_hyperparameter_tuning(self):
300
300
  """ Testing hyperparamter tuning functionality. """
301
301
  #1.)
302
- model = Model(self.dummy_X, self.dummy_Y, algorithm="adaboost")
302
+ model = Model(self.dummy_X, self.dummy_Y, algorithm="adaboostregressor")
303
303
  X_train, X_test, Y_train, Y_test = model.train_test_split(test_split=0.2)
304
304
  model.fit()
305
305
  param_grid = {'n_estimators': [50,100,150], 'learning_rate': [0.5,0.75,1], 'loss': ['linear','exponential']}
@@ -69,8 +69,8 @@ class PySARTests(unittest.TestCase):
69
69
  # @unittest.skip("Skipping metadata tests.")
70
70
  def test_pySAR_metadata(self):
71
71
  """ Testing correct pySAR version and metadata. """
72
- self.assertEqual(pysar_.__version__, "2.5.0",
73
- f"pySAR version is not correct, expected 2.5.0, got {pysar_.__version__}.")
72
+ self.assertEqual(pysar_.__version__, "2.5.1",
73
+ f"pySAR version is not correct, expected 2.5.1, got {pysar_.__version__}.")
74
74
  self.assertEqual(pysar_.__name__, "pySAR",
75
75
  f"pySAR software name is not correct, expected pySAR, got {pysar_.__name__}.")
76
76
  self.assertEqual(pysar_.__author__, "AJ McKenna: https://github.com/amckenna41",
@@ -24,8 +24,6 @@ class UtilsTest(unittest.TestCase):
24
24
  testing correct utils.valid_sequence functionality.
25
25
  test_remove_gaps:
26
26
  testing correct utils.remove_gaps functionality.
27
- test_flatten:
28
- testing correct utils.flatten functionality.
29
27
  test_zero_padding:
30
28
  testing correct utils.zero_padding functionality.
31
29
  test_save_results:
@@ -129,42 +127,6 @@ class UtilsTest(unittest.TestCase):
129
127
  self.assertIsInstance(seq4_test, str, f"Expected output to be of type str, got {type(seq4_test)}.")
130
128
  self.assertNotIn('-', seq4_test, "Expected there to be no gaps (-) in the sequence.")
131
129
 
132
- def test_flatten(self):
133
- """ Test flatten utility function that flattens an array or list. """
134
- seq1 = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
135
- seq2 = np.array([[1, 2, 3], [4, 5, 6],[7, 8, 9]], np.int32)
136
- seq3 = np.random.randint(10,90,(4,5,2))
137
- seq4 = ["A", "B", "C", "D", "E", "F"]
138
- seq5 = "TUVWXYZ"
139
- #1.)
140
- flattened_array = utils.flatten(seq1)
141
- self.assertEqual(flattened_array.shape, (6,1), f"Expected output shape to be (6,1), got {flattened_array.shape}.")
142
- self.assertIsInstance(flattened_array, np.ndarray, f"Expected output to be of type np.ndarray, got {type(flattened_array)}.")
143
- self.assertEqual(flattened_array.ndim, 2, f"Expected 2 output dimensions, got {flattened_array.ndim}.")
144
- self.assertTrue((np.array([[1],[2],[3],[4],[5],[6]]) == flattened_array).all(),
145
- f"Output array doesn't match expected:\n{flattened_array}.")
146
- #2.)
147
- flattened_array_2 = utils.flatten(seq2)
148
- self.assertEqual(flattened_array_2.shape, (9,1), f"Expected output shape to be (9,1), got {flattened_array_2.shape}.")
149
- self.assertIsInstance(flattened_array_2, np.ndarray, f"Expected output to be of type np.ndarray, got {type(flattened_array_2)}.")
150
- self.assertEqual(flattened_array_2.ndim, 2, f"Expected 2 output dimensions, got {flattened_array_2.ndim}.")
151
- self.assertTrue((np.array([[1],[2],[3],[4],[5],[6],[7],[8],[9]]) == flattened_array_2).all(),
152
- f"Output array doesn't match expected:\n{flattened_array_2}.")
153
- #3.)
154
- flattened_array_3 = utils.flatten(seq3)
155
- self.assertEqual(flattened_array_3.shape, (40,1), f"Expected output shape to be (40,1), got {flattened_array_3.shape}.")
156
- self.assertIsInstance(flattened_array_3, np.ndarray, f"Expected output to be of type np.ndarray, got {type(flattened_array_3)}.")
157
- self.assertEqual(flattened_array_3.ndim, 2, f"Expected 2 output dimensions, got {flattened_array_3.ndim}.")
158
- #4.)
159
- flattened_array_4 = utils.flatten(seq4)
160
- self.assertEqual(len(flattened_array_4), 6, f"Expected length of output to be 6, got {len(flattened_array_4)}.")
161
- self.assertIsInstance(flattened_array_4, list, f"Expected output to be of type list, got {type(flattened_array_4)}.")
162
- self.assertEqual(flattened_array_4, seq4, f"Output doesn't match expected sequence {seq4}.")
163
- #5.)
164
- flattened_array_5 = utils.flatten(seq5)
165
- self.assertEqual(flattened_array_5, seq5, f"Output doesn't match expected sequence {seq5}.")
166
- self.assertIsInstance(flattened_array_5, str, f"Expected output to be of type string, got {type(flattened_array_5)}.")
167
-
168
130
  def test_zero_padding(self):
169
131
  """ Test zero padding utility function that pads an array or list with 0's. """
170
132
  seq1 = np.array([[1, 2, 3, 4, 5], [6, 7, 8]], dtype=object)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes