phykit 2.0.0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {phykit-2.0.0 → phykit-2.0.1}/PKG-INFO +1 -1
  2. {phykit-2.0.0 → phykit-2.0.1}/phykit/phykit.py +5 -0
  3. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/pairwise_identity.py +11 -6
  4. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/base.py +9 -0
  5. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/saturation.py +25 -4
  6. phykit-2.0.1/phykit/version.py +1 -0
  7. {phykit-2.0.0 → phykit-2.0.1}/phykit.egg-info/PKG-INFO +1 -1
  8. phykit-2.0.0/phykit/version.py +0 -1
  9. {phykit-2.0.0 → phykit-2.0.1}/LICENSE.md +0 -0
  10. {phykit-2.0.0 → phykit-2.0.1}/README.md +0 -0
  11. {phykit-2.0.0 → phykit-2.0.1}/phykit/__init__.py +0 -0
  12. {phykit-2.0.0 → phykit-2.0.1}/phykit/__main__.py +0 -0
  13. {phykit-2.0.0 → phykit-2.0.1}/phykit/helpers/__init__.py +0 -0
  14. {phykit-2.0.0 → phykit-2.0.1}/phykit/helpers/boolean_argument_parsing.py +0 -0
  15. {phykit-2.0.0 → phykit-2.0.1}/phykit/helpers/files.py +0 -0
  16. {phykit-2.0.0 → phykit-2.0.1}/phykit/helpers/stats_summary.py +0 -0
  17. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/__init__.py +0 -0
  18. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/__init__.py +0 -0
  19. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/alignment_length.py +0 -0
  20. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/alignment_length_no_gaps.py +0 -0
  21. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/alignment_recoding.py +0 -0
  22. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/base.py +0 -0
  23. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/column_score.py +0 -0
  24. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/compositional_bias_per_site.py +0 -0
  25. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/create_concatenation_matrix.py +0 -0
  26. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/dna_threader.py +0 -0
  27. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/evolutionary_rate_per_site.py +0 -0
  28. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/faidx.py +0 -0
  29. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/gc_content.py +0 -0
  30. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/parsimony_informative_sites.py +0 -0
  31. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/rcv.py +0 -0
  32. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/rcvt.py +0 -0
  33. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/rename_fasta_entries.py +0 -0
  34. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/sum_of_pairs_score.py +0 -0
  35. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/alignment/variable_sites.py +0 -0
  36. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/base.py +0 -0
  37. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/__init__.py +0 -0
  38. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/bipartition_support_stats.py +0 -0
  39. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/branch_length_multiplier.py +0 -0
  40. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/collapse_branches.py +0 -0
  41. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/covarying_evolutionary_rates.py +0 -0
  42. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/dvmc.py +0 -0
  43. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/evolutionary_rate.py +0 -0
  44. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/hidden_paralogy_check.py +0 -0
  45. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/internal_branch_stats.py +0 -0
  46. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/internode_labeler.py +0 -0
  47. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/last_common_ancestor_subtree.py +0 -0
  48. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/lb_score.py +0 -0
  49. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/monophyly_check.py +0 -0
  50. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/nearest_neighbor_interchange.py +0 -0
  51. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/patristic_distances.py +0 -0
  52. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/polytomy_test.py +0 -0
  53. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/print_tree.py +0 -0
  54. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/prune_tree.py +0 -0
  55. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/rename_tree_tips.py +0 -0
  56. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/rf_distance.py +0 -0
  57. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/root_tree.py +0 -0
  58. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/spurious_sequence.py +0 -0
  59. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/terminal_branch_stats.py +0 -0
  60. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/tip_labels.py +0 -0
  61. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/tip_to_tip_distance.py +0 -0
  62. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/tip_to_tip_node_distance.py +0 -0
  63. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/total_tree_length.py +0 -0
  64. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/treeness.py +0 -0
  65. {phykit-2.0.0 → phykit-2.0.1}/phykit/services/tree/treeness_over_rcv.py +0 -0
  66. {phykit-2.0.0 → phykit-2.0.1}/phykit.egg-info/SOURCES.txt +0 -0
  67. {phykit-2.0.0 → phykit-2.0.1}/phykit.egg-info/dependency_links.txt +0 -0
  68. {phykit-2.0.0 → phykit-2.0.1}/phykit.egg-info/entry_points.txt +0 -0
  69. {phykit-2.0.0 → phykit-2.0.1}/phykit.egg-info/requires.txt +0 -0
  70. {phykit-2.0.0 → phykit-2.0.1}/phykit.egg-info/top_level.txt +0 -0
  71. {phykit-2.0.0 → phykit-2.0.1}/setup.cfg +0 -0
  72. {phykit-2.0.0 → phykit-2.0.1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phykit
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -2475,6 +2475,8 @@ class Phykit(object):
2475
2475
 
2476
2476
  -t/--tree a tree file
2477
2477
 
2478
+ -e/--exclude_gaps if a site has a gap, ignore it
2479
+
2478
2480
  -v/--verbose print out patristic distances
2479
2481
  and uncorrected distances used
2480
2482
  to determine saturation
@@ -2490,6 +2492,9 @@ class Phykit(object):
2490
2492
  parser.add_argument(
2491
2493
  "-v", "--verbose", action="store_true", required=False, help=SUPPRESS
2492
2494
  )
2495
+ parser.add_argument(
2496
+ "-e", "--exclude_gaps", action="store_true", required=False, help=SUPPRESS
2497
+ )
2493
2498
  args = parser.parse_args(argv)
2494
2499
  Saturation(args).run()
2495
2500
 
@@ -57,13 +57,18 @@ class PairwiseIdentity(Alignment):
57
57
  total_compared = 0
58
58
 
59
59
  for res_one, res_two in zip(seq_one, seq_two):
60
- if exclude_gaps and (
61
- res_one not in gap_chars or res_two not in gap_chars
62
- ):
63
- continue
60
+ res_one = res_one.upper()
61
+ res_two = res_two.upper()
62
+
64
63
  total_compared += 1
65
- if res_one == res_two:
66
- identities += 1
64
+
65
+ if exclude_gaps:
66
+ if (res_one not in gap_chars or res_two not in gap_chars):
67
+ if res_one == res_two:
68
+ identities += 1
69
+ else:
70
+ if res_one == res_two:
71
+ identities += 1
67
72
 
68
73
  if total_compared > 0:
69
74
  identity_score = identities / total_compared
@@ -1,4 +1,5 @@
1
1
  import sys
2
+ from typing import List
2
3
 
3
4
  from Bio import Phylo
4
5
 
@@ -27,6 +28,7 @@ class Tree(BaseService):
27
28
  tip_2=None,
28
29
  clade=None,
29
30
  keep=None,
31
+ exclude_gaps=None,
30
32
  ):
31
33
  self.tree_file_path = tree_file_path
32
34
  self.tree1_file_path = tree1_file_path
@@ -47,6 +49,7 @@ class Tree(BaseService):
47
49
  self.tip_2 = tip_2
48
50
  self.clade = clade
49
51
  self.keep = keep
52
+ self.exclude_gaps = exclude_gaps
50
53
 
51
54
  def read_tree_file(self):
52
55
  try:
@@ -141,3 +144,9 @@ class Tree(BaseService):
141
144
  return None
142
145
  except BrokenPipeError:
143
146
  pass
147
+
148
+ def get_gap_chars(is_protein: bool) -> List[str]:
149
+ if is_protein:
150
+ return ["-", "?", "*", "X", "x"]
151
+ else:
152
+ return ["-", "?", "*", "X", "x", "N", "n"]
@@ -30,7 +30,7 @@ class Saturation(Tree):
30
30
  super().__init__(**self.process_args(args))
31
31
 
32
32
  def run(self) -> None:
33
- alignment, _, _ = get_alignment_and_format_helper(
33
+ alignment, _, is_protein = get_alignment_and_format_helper(
34
34
  self.alignment_file_path
35
35
  )
36
36
 
@@ -43,7 +43,7 @@ class Saturation(Tree):
43
43
  patristic_distances,
44
44
  uncorrected_distances,
45
45
  ) = self.loop_through_combos_and_calculate_pds_and_pis(
46
- combos, alignment, tree
46
+ combos, alignment, tree, self.exclude_gaps
47
47
  )
48
48
 
49
49
  # calculate slope and fit the y-intercept to zero
@@ -63,6 +63,7 @@ class Saturation(Tree):
63
63
  return dict(
64
64
  tree_file_path=args.tree,
65
65
  alignment_file_path=args.alignment,
66
+ exclude_gaps=args.exclude_gaps,
66
67
  verbose=args.verbose,
67
68
  )
68
69
 
@@ -71,6 +72,7 @@ class Saturation(Tree):
71
72
  combos: List[Tuple[str, str]],
72
73
  alignment: Align.MultipleSeqAlignment,
73
74
  tree: Newick.Tree,
75
+ exclude_gaps: bool,
74
76
  ) -> Tuple[
75
77
  List[float],
76
78
  List[float]
@@ -81,6 +83,7 @@ class Saturation(Tree):
81
83
  """
82
84
  patristic_distances = []
83
85
  uncorrected_distances = []
86
+ gap_chars = self.get_gap_chars()
84
87
  aln_len = alignment.get_alignment_length()
85
88
  seq_dict = {record.name: record.seq for record in alignment}
86
89
  for combo in combos:
@@ -90,8 +93,26 @@ class Saturation(Tree):
90
93
  # calcualte uncorrected distances
91
94
  seq_one = seq_dict[combo[0]]
92
95
  seq_two = seq_dict[combo[1]]
93
- identities = sum(1 for idx in range(aln_len) if seq_one[idx] == seq_two[idx])
94
- uncorrected_distances.append(1 - (identities / aln_len))
96
+ if exclude_gaps:
97
+ valid_positions = [
98
+ idx for idx in range(aln_len)
99
+ if seq_one[idx] not in gap_chars and seq_two[idx] not in gap_chars
100
+ ]
101
+ adjusted_len = len(valid_positions)
102
+ identities = sum(
103
+ 1 for idx in valid_positions if seq_one[idx].upper() == seq_two[idx].upper()
104
+ )
105
+ else:
106
+ adjusted_len = aln_len
107
+ identities = sum(
108
+ 1 for idx in range(aln_len) if seq_one[idx].upper() == seq_two[idx].upper()
109
+ )
110
+
111
+ if adjusted_len > 0:
112
+ uncorrected_distances.append(1 - (identities / adjusted_len))
113
+ else:
114
+ uncorrected_distances.append(float('nan'))
115
+ uncorrected_distances.append(1 - (identities / aln_len))
95
116
 
96
117
  return patristic_distances, uncorrected_distances
97
118
 
@@ -0,0 +1 @@
1
+ __version__ = "2.0.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: phykit
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Home-page: https://github.com/jlsteenwyk/phykit
5
5
  Author: Jacob L. Steenwyk
6
6
  Author-email: jlsteenwyk@gmail.com
@@ -1 +0,0 @@
1
- __version__ = "2.0.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes