sequenzo 0.1.20__cp39-cp39-win_amd64.whl → 0.1.22__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (41) hide show
  1. sequenzo/big_data/clara/utils/get_weighted_diss.c +195 -195
  2. sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd +0 -0
  3. sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd +0 -0
  4. sequenzo/clustering/hierarchical_clustering.py +1 -1
  5. sequenzo/define_sequence_data.py +4 -4
  6. sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd +0 -0
  7. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +172 -173
  8. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd +0 -0
  9. sequenzo/dissimilarity_measures/utils/seqconc.c +234 -234
  10. sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd +0 -0
  11. sequenzo/dissimilarity_measures/utils/seqdss.c +327 -327
  12. sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd +0 -0
  13. sequenzo/dissimilarity_measures/utils/seqdur.c +327 -327
  14. sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd +0 -0
  15. sequenzo/dissimilarity_measures/utils/seqlength.c +227 -226
  16. sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd +0 -0
  17. sequenzo/multidomain/association_between_domains.py +1 -1
  18. sequenzo/multidomain/combt.py +4 -4
  19. sequenzo/multidomain/linked_polyad.py +3 -3
  20. sequenzo/prefix_tree/__init__.py +1 -1
  21. sequenzo/prefix_tree/individual_level_indicators.py +2 -2
  22. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +2 -2
  23. sequenzo/sequence_characteristics/plot_characteristics.py +2 -2
  24. sequenzo/sequence_characteristics/simple_characteristics.py +2 -2
  25. sequenzo/suffix_tree/__init__.py +1 -1
  26. sequenzo/suffix_tree/individual_level_indicators.py +3 -3
  27. sequenzo/visualization/plot_single_medoid.py +2 -2
  28. sequenzo/visualization/plot_transition_matrix.py +3 -2
  29. sequenzo/visualization/utils/utils.py +2 -2
  30. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +1 -1
  31. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/METADATA +17 -43
  32. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/RECORD +35 -41
  33. sequenzo/big_data/clara/utils/get_weighted_diss.pyx +0 -16
  34. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.pyx +0 -95
  35. sequenzo/dissimilarity_measures/utils/seqconc.pyx +0 -26
  36. sequenzo/dissimilarity_measures/utils/seqdss.pyx +0 -33
  37. sequenzo/dissimilarity_measures/utils/seqdur.pyx +0 -34
  38. sequenzo/dissimilarity_measures/utils/seqlength.pyx +0 -19
  39. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/WHEEL +0 -0
  40. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/licenses/LICENSE +0 -0
  41. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/top_level.txt +0 -0
@@ -307,5 +307,5 @@ def get_association_between_domains(seqdata_dom, assoc=("LRT", "V"), rep_method=
307
307
  print(" - v : Cramer's V statistic (0 to 1, measures association strength).")
308
308
  print(" - p(v) : p-value for Cramer's V (based on chi-squared test) + significance stars: * (p<.05), ** (p<.01), *** (p<.001)")
309
309
  print(" - strength : Qualitative label for association strength based on Cramer's V:")
310
- print(" 0.000.09 None, 0.100.29 Weak, 0.300.49 Moderate, 0.50 Strong")
310
+ print(" 0.00-0.09 -> None, 0.10-0.29 -> Weak, 0.30-0.49 -> Moderate, >=0.50 -> Strong")
311
311
 
@@ -253,7 +253,7 @@ def merge_sparse_combt_types(distance_matrix,
253
253
  distance_matrix = distance_matrix.values
254
254
 
255
255
  if distance_matrix.shape[0] != distance_matrix.shape[1]:
256
- raise ValueError("distance_matrix must be square (n × n)")
256
+ raise ValueError("distance_matrix must be square (n x n)")
257
257
 
258
258
  labels = np.array(labels)
259
259
  if len(labels) != distance_matrix.shape[0]:
@@ -323,7 +323,7 @@ def merge_sparse_combt_types(distance_matrix,
323
323
  best_target = target
324
324
  except Exception as e:
325
325
  if verbose:
326
- print(f"[!] Error computing silhouette for merge {small} {target}: {e}")
326
+ print(f"[!] Error computing silhouette for merge {small} -> {target}: {e}")
327
327
  continue
328
328
 
329
329
  # Execute merge if it maintains quality threshold
@@ -346,7 +346,7 @@ def merge_sparse_combt_types(distance_matrix,
346
346
 
347
347
  if verbose:
348
348
  print(
349
- f"[+] Merged {small} ({reverse_map[small]}, size={old_count}) {best_target} ({reverse_map[best_target]}) | New ASW: {current_score:.4f}")
349
+ f"[+] Merged {small} ({reverse_map[small]}, size={old_count}) -> {best_target} ({reverse_map[best_target]}) | New ASW: {current_score:.4f}")
350
350
 
351
351
  merged = True
352
352
  break
@@ -380,7 +380,7 @@ def merge_sparse_combt_types(distance_matrix,
380
380
  print("\n[>] Merge History Details:")
381
381
  for i, merge in enumerate(merge_info["merge_history"]):
382
382
  print(
383
- f" Merge {i + 1}: {merge['source']} (size={merge['source_size']}) {merge['target']} | ASW: {merge['new_asw']:.4f}")
383
+ f" Merge {i + 1}: {merge['source']} (size={merge['source_size']}) -> {merge['target']} | ASW: {merge['new_asw']:.4f}")
384
384
 
385
385
  # Visualize merge process if requested
386
386
  if visualize_process and merge_info["merge_history"]:
@@ -158,7 +158,7 @@ def linked_polyadic_sequence_analysis(seqlist: List[SequenceData],
158
158
 
159
159
  print("[Step 3] Computing all pairwise dissimilarities using method:", method)
160
160
  alldist = np.asarray(get_distance_matrix(merged_seqdata, method=method, **distance_parameters))
161
- print(" Dissimilarity matrix shape:", alldist.shape)
161
+ print(" -> Dissimilarity matrix shape:", alldist.shape)
162
162
 
163
163
  cj = np.array([n * p for p in range(P)])
164
164
 
@@ -199,7 +199,7 @@ def linked_polyadic_sequence_analysis(seqlist: List[SequenceData],
199
199
  else:
200
200
  raise ValueError("Invalid randomization type 'a'. Should be 1 or 2.")
201
201
 
202
- iterator = tqdm(range(T), desc=" Randomizing polyads") if verbose else range(T)
202
+ iterator = tqdm(range(T), desc="-> Randomizing polyads") if verbose else range(T)
203
203
  random_dists = Parallel(n_jobs=n_jobs)(delayed(random_sample_once)(i) for i in iterator)
204
204
  random_dists = np.array(random_dists)
205
205
 
@@ -231,7 +231,7 @@ def linked_polyadic_sequence_analysis(seqlist: List[SequenceData],
231
231
 
232
232
  print(
233
233
  f"[Step 7] Final summary: mean observed = {np.mean(observed_dists):.2f}, mean randomized = {mean_rand_dist:.2f}")
234
- print(f" Significant polyads (V > 0.95): {np.sum(V_95)} / {n}")
234
+ print(f" -> Significant polyads (V > 0.95): {np.sum(V_95)} / {n}")
235
235
 
236
236
  result = {
237
237
  "mean.dist": {"Obs": np.mean(observed_dists), "Rand": mean_rand_dist},
@@ -3,7 +3,7 @@
3
3
  @File : __init__.py
4
4
  @Time : 02/05/2025 11:05
5
5
  @Desc :
6
- Prefix Tree Framework exposes core indicators and utilities for sequence divergence analysis.
6
+ Prefix Tree Framework - exposes core indicators and utilities for sequence divergence analysis.
7
7
  """
8
8
  from .system_level_indicators import (
9
9
  build_prefix_tree,
@@ -609,7 +609,7 @@ class IndividualDivergence:
609
609
  Where z_{i,t} are the year-wise standardized prefix rarity scores using column-wise
610
610
  standardization with sample standard deviation (ddof=1, as computed by pandas).
611
611
 
612
- The standardized scores can be used with a threshold (e.g., z 1.5) to classify
612
+ The standardized scores can be used with a threshold (e.g., z >= 1.5) to classify
613
613
  individuals as diverged/not diverged, and are particularly useful for visualization.
614
614
 
615
615
  Parameters:
@@ -622,7 +622,7 @@ class IndividualDivergence:
622
622
  Returns:
623
623
  --------
624
624
  List[float]
625
- Standardized rarity scores for each individual. Values z_threshold indicate divergence.
625
+ Standardized rarity scores for each individual. Values >= z_threshold indicate divergence.
626
626
 
627
627
  Notes:
628
628
  ------
@@ -31,7 +31,7 @@ def get_cross_sectional_entropy(
31
31
  ...
32
32
 
33
33
  Additional metrics:
34
- - per_time_entropy_norm: If norm=True, normalized by maximum entropy (|S|), range 01
34
+ - per_time_entropy_norm: If norm=True, normalized by maximum entropy (|S|), range 0-1
35
35
  - effective_states (H_effective): exp(H), equivalent "effective number of states"
36
36
  - summary: Key interpretation points (entropy peaks/valleys, dominant state intervals, average entropy, etc.)
37
37
 
@@ -199,7 +199,7 @@ def get_cross_sectional_entropy(
199
199
  if eff_s is not None:
200
200
  out["Effective States"] = eff_s
201
201
  return out
202
- else: # "dict" —— try to be more readable too
202
+ else: # "dict" -- try to be more readable too
203
203
  res = {
204
204
  "Frequencies": freq_df_wide,
205
205
  "ValidStates": valid_s,
@@ -360,7 +360,7 @@ def plot_cross_sectional_characteristics(seqdata,
360
360
  title="Cross-sectional entropy over time",
361
361
  show_title=True,
362
362
  xlabel="Time",
363
- ylabel="Entropy (01)",
363
+ ylabel="Entropy (0-1)",
364
364
  line_color="#74C9B4",
365
365
  save_as=None,
366
366
  dpi=200,
@@ -396,7 +396,7 @@ def plot_cross_sectional_characteristics(seqdata,
396
396
  xlabel : str, optional (default="Time")
397
397
  Label for the x-axis.
398
398
 
399
- ylabel : str, optional (default="Entropy (01)")
399
+ ylabel : str, optional (default="Entropy (0-1)")
400
400
  Label for the y-axis (main entropy axis).
401
401
 
402
402
  line_color : str, optional (default="#74C9B4")
@@ -91,7 +91,7 @@ def get_subsequences_all_sequences(seqdata, dss: bool = True, with_missing: bool
91
91
  Args:
92
92
  seqdata: SequenceData object or pandas DataFrame containing your sequence data
93
93
  dss (bool): Whether to apply distinct state sequence preprocessing.
94
- If True, consecutive identical states are compressed (e.g., [1,1,2,2] [1,2])
94
+ If True, consecutive identical states are compressed (e.g., [1,1,2,2] -> [1,2])
95
95
  with_missing (bool): Whether to include missing values in the calculation
96
96
 
97
97
  Returns:
@@ -266,7 +266,7 @@ def get_number_of_transitions(seqdata, norm=False, pwight=False) -> pd.DataFrame
266
266
  seq_3 2
267
267
 
268
268
  >>> # Example: sequence [1, 1, 2, 2, 1, 3] has 3 transitions:
269
- >>> # 12 (position 3), 21 (position 5), 13 (position 6)
269
+ >>> # 1->2 (position 3), 2->1 (position 5), 1->3 (position 6)
270
270
 
271
271
  Note:
272
272
  Missing values are automatically ignored. Only counts actual state changes
@@ -3,7 +3,7 @@
3
3
  @File : __init__.py
4
4
  @Time : 08/08/2025 15:50
5
5
  @Desc :
6
- Suffix Tree Framework exposes core indicators and utilities for sequence convergence analysis.
6
+ Suffix Tree Framework - exposes core indicators and utilities for sequence convergence analysis.
7
7
  """
8
8
  from .system_level_indicators import (
9
9
  build_suffix_tree,
@@ -706,7 +706,7 @@ class IndividualConvergence:
706
706
  across individuals with sample standard deviation (ddof=1):
707
707
  z_{i,t} = (x_{i,t} - mean_t) / std_t
708
708
 
709
- The standardized scores can be used with a threshold (e.g., z -1.5) to classify
709
+ The standardized scores can be used with a threshold (e.g., z <= -1.5) to classify
710
710
  individuals as converged/not converged, and are particularly useful for visualization.
711
711
 
712
712
  Note: For convergence (suffix tree), we look for LOW rarity (more typical patterns),
@@ -725,7 +725,7 @@ class IndividualConvergence:
725
725
  Returns:
726
726
  --------
727
727
  List[float]
728
- Standardized rarity scores for each individual. Values -z_threshold indicate convergence.
728
+ Standardized rarity scores for each individual. Values <= -z_threshold indicate convergence.
729
729
 
730
730
  Notes:
731
731
  ------
@@ -1539,7 +1539,7 @@ def compute_quantile_thresholds_by_group(scores, group_labels, quantiles=None):
1539
1539
 
1540
1540
  def compute_quantile_thresholds_by_group_year(scores, group_labels, year_labels, quantiles=None, min_group_year_size=30):
1541
1541
  """
1542
- Compute quantile thresholds by group × year for time-drifting distributions.
1542
+ Compute quantile thresholds by group x year for time-drifting distributions.
1543
1543
 
1544
1544
  Parameters
1545
1545
  ----------
@@ -102,7 +102,7 @@ def compute_medoids_from_distance_matrix(distance_matrix: np.ndarray, seqdata: S
102
102
  :return: Tuple containing the medoid sequences and their indices.
103
103
  """
104
104
  if not isinstance(seqdata, SequenceData):
105
- raise TypeError(" seqdata must be a SequenceData object.")
105
+ raise TypeError("[X] seqdata must be a SequenceData object.")
106
106
 
107
107
  # Process weights
108
108
  if isinstance(weights, str) and weights == "auto":
@@ -131,7 +131,7 @@ def compute_medoids_from_distance_matrix(distance_matrix: np.ndarray, seqdata: S
131
131
  medoid_indices = medoid_indices.tolist()
132
132
 
133
133
  if not all(isinstance(idx, int) for idx in medoid_indices):
134
- raise ValueError(" medoid_indices must be a list of integers.")
134
+ raise ValueError("[X] medoid_indices must be a list of integers.")
135
135
 
136
136
  return medoid_sequences, medoid_indices
137
137
 
@@ -112,7 +112,7 @@ def print_transition_matrix(seqdata: SequenceData, transition_rates: np.ndarray)
112
112
  # Print each row
113
113
  for i, from_state in enumerate(state_labels):
114
114
  # Print row label
115
- print(f"{from_state:>{max_label_width}} ", end=" ")
115
+ print(f"{from_state:>{max_label_width}} ->", end=" ")
116
116
 
117
117
  # Print transition rates
118
118
  for prob in transition_rates[i]:
@@ -131,7 +131,7 @@ def plot_transition_matrix(seqdata: SequenceData,
131
131
  fontsize: int = 12,
132
132
  save_as: Optional[str] = None,
133
133
  dpi: int = 200,
134
- format: str = "%.2f") -> None:
134
+ format: str = ".2f") -> None:
135
135
  """
136
136
  Plot state transition rate matrix as a heatmap.
137
137
 
@@ -157,6 +157,7 @@ def plot_transition_matrix(seqdata: SequenceData,
157
157
  # Generate heatmap using pre-formatted annotation strings
158
158
  ax = sns.heatmap(
159
159
  transition_matrix,
160
+ annot=True,
160
161
  fmt=format,
161
162
  cmap=cmap,
162
163
  xticklabels=seqdata.labels,
@@ -35,10 +35,10 @@ def set_up_time_labels_for_x_axis(seqdata: SequenceData,
35
35
  # If 10 or fewer time points, show all labels
36
36
  xtick_positions = np.arange(num_time_steps)
37
37
  elif num_time_steps <= 20:
38
- # If 1020 time points, show every 2nd label
38
+ # If 10-20 time points, show every 2nd label
39
39
  xtick_positions = np.arange(0, num_time_steps, step=2)
40
40
  else:
41
- # More than 20 time points Pick 10 evenly spaced tick positions
41
+ # More than 20 time points -> Pick 10 evenly spaced tick positions
42
42
  xtick_positions = np.linspace(0, num_time_steps - 1, num=10, dtype=int)
43
43
 
44
44
  # Set x-ticks and labels dynamically
@@ -117,7 +117,7 @@ def sequence_analysis_multi_state_model(seqdata: SequenceData, sublength: int, c
117
117
  **What is person-period data?**
118
118
  Instead of having one row per person with all their time points as columns,
119
119
  person-period data has one row for each person-time combination. For example,
120
- if we track 3 people over 5 time periods, we get 15 rows (3 × 5).
120
+ if we track 3 people over 5 time periods, we get 15 rows (3 x 5).
121
121
 
122
122
  **What are subsequences?**
123
123
  At each time point, we look ahead and record what happens in the next few time periods.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sequenzo
3
- Version: 0.1.20
3
+ Version: 0.1.22
4
4
  Summary: A fast, scalable and intuitive Python package for social sequence analysis.
5
5
  Author-email: Yuqi Liang <yuqi.liang.1900@gmail.com>, Xinyi Li <1836724126@qq.com>, Jan Heinrich Ernst Meyerhoff-Liang <jan.meyerhoff1@gmail.com>
6
6
  License: BSD 3-Clause License
@@ -44,7 +44,7 @@ Classifier: Programming Language :: Python :: 3.12
44
44
  Requires-Python: <3.13,>=3.9
45
45
  Description-Content-Type: text/markdown
46
46
  License-File: LICENSE
47
- Requires-Dist: numpy<2.5,>=1.19.5
47
+ Requires-Dist: numpy>=1.21.0
48
48
  Requires-Dist: pandas>=1.2.5
49
49
  Requires-Dist: matplotlib>=3.4.3
50
50
  Requires-Dist: seaborn>=0.11.2
@@ -54,22 +54,23 @@ Requires-Dist: cython>=0.29.21
54
54
  Requires-Dist: scipy>=1.6.3
55
55
  Requires-Dist: scikit-learn>=0.24.2
56
56
  Requires-Dist: fastcluster>=1.2.6
57
- Requires-Dist: rpy2>=3.5.12; python_version >= "3.12"
58
- Requires-Dist: rpy2>=3.5.6; python_version == "3.11"
59
- Requires-Dist: rpy2>=3.5.6; python_version == "3.10"
60
- Requires-Dist: rpy2>=3.5.6; python_version == "3.9"
61
57
  Requires-Dist: joblib>=1.0.1
62
58
  Requires-Dist: docutils>=0.17
63
59
  Requires-Dist: tqdm<5.0.0,>=4.62.3
64
60
  Requires-Dist: missingno<0.6.0,>=0.5.2
65
61
  Requires-Dist: cffi>=1.15.0
62
+ Provides-Extra: r
63
+ Requires-Dist: rpy2>=3.5.12; python_version >= "3.12" and extra == "r"
64
+ Requires-Dist: rpy2>=3.5.6; python_version == "3.11" and extra == "r"
65
+ Requires-Dist: rpy2>=3.5.6; python_version == "3.10" and extra == "r"
66
+ Requires-Dist: rpy2>=3.5.6; python_version == "3.9" and extra == "r"
66
67
  Provides-Extra: dev
67
68
  Requires-Dist: pytest>=6.2.5; extra == "dev"
68
69
  Requires-Dist: flake8>=3.9.2; extra == "dev"
69
70
  Dynamic: license-file
70
71
 
71
72
  <p align="center">
72
- <img src="https://raw.githubusercontent.com/Liang-Team/Sequenzo/main/assets/logo/FullLogo_NoBuffer.jpg" alt="Sequenzo Logo" width="400">
73
+ <img src="https://raw.githubusercontent.com/Liang-Team/Sequenzo/main/assets/logo/FullLogo_NoBuffer.jpg" alt="Sequenzo Logo" width="300">
73
74
  </p>
74
75
 
75
76
  <p align="center">
@@ -133,17 +134,18 @@ Perfect for research, policy, and business, enabling seamless analysis of catego
133
134
 
134
135
  Sequenzo provides pre-built Python wheels for maximum compatibility — no need to compile from source.
135
136
 
136
- | Platform | Architecture | Python Versions | Status |
137
- |------------------|-------------------------------|-----------------------|-------------------|
138
- | **macOS** | `universal2` (Intel + Apple Silicon) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
139
- | **Windows** | `AMD64` (64-bit) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
140
- | **Linux (glibc)**| `x86_64` (standard Linux) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
141
- | **Linux (musl)** | `x86_64` (Alpine Linux) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
137
+ | Platform | Architecture | Python Versions | Status |
138
+ |------------------|---------------------------------|-----------------------|-------------------|
139
+ | **macOS** | Intel && Apple Silicon (64-bit) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
140
+ | **Windows** | `AMD64` (64-bit) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
141
+ | **Linux (glibc)**| `x86_64` (standard Linux) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
142
+ | **Linux (musl)** | `x86_64` (Alpine Linux) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
142
143
 
143
144
 
144
145
  What do these terms mean?
145
- - **universal2 (macOS)**: One wheel supports both Intel (x86_64) and Apple Silicon (arm64) Macs.
146
- - **manylinux2014 (glibc-based Linux)**: Compatible with most mainstream Linux distributions (e.g., Ubuntu, Debian, CentOS).
146
+ - **macosx_arm64 (macOS)**: One wheel supports Apple Silicon Macs.
147
+ - **macosx_x86_64 (macOS)**: One wheel supports Intel Macs.
148
+ - **manylinux2014_x86_64 (glibc-based Linux)**: Compatible with most mainstream Linux distributions (e.g., Ubuntu, Debian, CentOS).
147
149
  - **musllinux_1_2 (musl-based Linux)**: For lightweight Alpine Linux environments, common in Docker containers.
148
150
  - **AMD64 (Windows)**: Standard 64-bit Windows system architecture.
149
151
 
@@ -181,34 +183,6 @@ If you have some issues with the installation, it might because you have both Py
181
183
  pip3 install sequenzo
182
184
  ```
183
185
 
184
- ### ⚠️ Having Installation or Import Issues?
185
-
186
- **Error:** `ImportError: numpy.core.multiarray failed to import`
187
-
188
- **Most likely cause:** NumPy version mismatch (you have NumPy 1.x, but need 2.x)
189
-
190
- **Quick Fix** (copy-paste these commands):
191
- ```bash
192
- # Check your NumPy version first
193
- python -c "import numpy; print(f'NumPy: {numpy.__version__}')"
194
-
195
- # If you see 1.x.x, upgrade to 2.x:
196
- pip install --upgrade "numpy>=2.0.0"
197
- pip uninstall sequenzo -y
198
- pip install --no-cache-dir sequenzo
199
- ```
200
-
201
- 📖 **Still having issues?**
202
- 1. Run our diagnostic tool to identify the problem:
203
- ```bash
204
- curl -O https://raw.githubusercontent.com/Liang-Team/Sequenzo/main/diagnose.py
205
- python diagnose.py
206
- ```
207
- 2. See our detailed guides:
208
- - **[QUICK_FIX.md](QUICK_FIX.md)** - Simple step-by-step solutions
209
- - **[TROUBLESHOOTING.md](TROUBLESHOOTING.md)** - Comprehensive troubleshooting
210
- - **[docs/WHY_IMPORT_FAILS.md](docs/WHY_IMPORT_FAILS.md)** - Technical explanation
211
-
212
186
  ### Optional R Integration
213
187
 
214
188
  Sequenzo now checks the system environment variables before running ward.D hierarchical clustering.
@@ -1,5 +1,5 @@
1
1
  sequenzo/__init__.py,sha256=uGmuqHVR8JDseOnN0tHgbly9nRzCeNY7A9OHBusdMP4,6990
2
- sequenzo/define_sequence_data.py,sha256=xdIekb8nT8dBPyHPpRalIxF44RLYKRXg59Us-tjZu7k,28627
2
+ sequenzo/define_sequence_data.py,sha256=4ON_-Z8AMtqTlezYLKqbn9msBEs2Ba5RoIuJpgWMBpY,28621
3
3
  sequenzo/openmp_setup.py,sha256=f_8SxfcRhdI_uj_2RHFwiEw8iizB35Mv-3UUccRDt6U,7059
4
4
  sequenzo/big_data/__init__.py,sha256=iSZnGboYhbvsFf75uL8D8XDucXRxYypmFNN1uX5MxJo,152
5
5
  sequenzo/big_data/clara/__init__.py,sha256=pDR5_TSDisEhPtsA2gXGaXXBNTmWidJC_nnd9QMkz-U,700
@@ -8,14 +8,13 @@ sequenzo/big_data/clara/visualization.py,sha256=EpSmtAxRHVqcXlcXvSGiUuBjEETR7zK_
8
8
  sequenzo/big_data/clara/utils/__init__.py,sha256=2_o1tz8HFZVKFy8w8oJWdWlVKtwGjGY3z4PQylHKjt0,726
9
9
  sequenzo/big_data/clara/utils/aggregatecases.py,sha256=ul97pbnRlwxbFbX_0M4j-Bkyxkp7zMAKatSo4eanO24,2899
10
10
  sequenzo/big_data/clara/utils/davies_bouldin.py,sha256=4Y6VFjqopG3CaftQ8tDxQPjxxupJ6Hgv-yTXwCvgN7w,3037
11
- sequenzo/big_data/clara/utils/get_weighted_diss.c,sha256=0GbFqWOW8bmlAXqwH9hyBLjtCurgqBTUc-vzHCkGz5A,483771
12
- sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd,sha256=8hyvAHDQOwohaPKwCJzhFXptO_dJV7kaMqDvCcdSMrk,53248
13
- sequenzo/big_data/clara/utils/get_weighted_diss.pyx,sha256=UYR-u8MDQEuWID3inKhSpBsuxu7qTFmEwLrjNPBMmUw,430
11
+ sequenzo/big_data/clara/utils/get_weighted_diss.c,sha256=Nj6fZduxi-tdZ9c22NJBAzZRtaV6yYY1he0QTVIp3uE,483794
12
+ sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd,sha256=ikQ93kVblKCBPoJ_aYjU_28hQWb_PZgujWD3QK7lM9E,53248
14
13
  sequenzo/big_data/clara/utils/wfcmdd.py,sha256=-1H6CbTteTW-CeuQ_ehVDhnKH3ozcCkUobxoCSRIpYg,7074
15
14
  sequenzo/clustering/KMedoids.py,sha256=asktGP0KKgP4TsOH4bSYEWfE8yY5b9BfKa1d54KzaXI,7147
16
15
  sequenzo/clustering/__init__.py,sha256=duEY0Hq0-7Kc_lv0uFDK3D8IEXby-7Z0Rjff0EgO0KM,875
17
- sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd,sha256=YR0DLeo4VTOeehQo08BRy80LAYAvbk3nNPobVwbIuB4,253440
18
- sequenzo/clustering/hierarchical_clustering.py,sha256=3O2ipqqPMQ0O74H2YpLc2mmldWNFtiQI7Bs16hyXoDk,61572
16
+ sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd,sha256=6u1PJJnSKfNQXw3ZYBc_YTZuEDirJsGzCYqamT-22kw,253440
17
+ sequenzo/clustering/hierarchical_clustering.py,sha256=D4m3Wg3tPWBNXtPTsauxntD5ctx3hGXF9bjL-cB7VM0,61568
19
18
  sequenzo/clustering/src/KMedoid.cpp,sha256=Bb4LaRes004T9vyCmUknRS0NUaNr4ZoEpWvtkYGq-jw,9299
20
19
  sequenzo/clustering/src/PAM.cpp,sha256=UFXdTy1wMWheYa-fUoi8ASQPmn0Ew-AO7fqVQVxn_E8,8357
21
20
  sequenzo/clustering/src/PAMonce.cpp,sha256=C9HqGBRenmF2tnQofALjjU1As02dTw2oqEnuvdhoJIk,7943
@@ -47,7 +46,7 @@ sequenzo/datasets/polyadic_samplep1.csv,sha256=-2HvKSmevfqe1rWFVJlbnjousEgJRU_PH
47
46
  sequenzo/datasets/polyadic_seqc1.csv,sha256=ydZ-U8NTszR4lNBN4hhsH_dHfq0w5VZSMM7t9C5Uado,7028
48
47
  sequenzo/datasets/polyadic_seqp1.csv,sha256=sydXCR0JBKJlbMxJyGa46cic9XQstUpYFOtHaLmkp_0,7681
49
48
  sequenzo/dissimilarity_measures/__init__.py,sha256=qkWAQ1sBpS2aayO-FSA8Zha7rQ_vjs0_KIHEB60bVg4,958
50
- sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd,sha256=lJ8RwokDu-iGDu2HB0inKxGNSpIlKmfSf51HStkxOtQ,222208
49
+ sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd,sha256=cCKw6xcnrShAXJAbf-TwhdyOySqXdMSmoU3dQZSBVgw,222208
51
50
  sequenzo/dissimilarity_measures/get_distance_matrix.py,sha256=dNZtqg0aN3vAz5r1sTgjS3jwLAzcXGbG4MGWeBEvhag,29886
52
51
  sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py,sha256=Yed91VyNHkzeYxIduCTVF8hmJiAmltPH4R0lhvKQoKk,9533
53
52
  sequenzo/dissimilarity_measures/src/DHDdistance.cpp,sha256=RbWbSaELxlJiw5ST_JaD-wPx_sD7PGV2VdK4qEhOcxE,4714
@@ -212,43 +211,38 @@ sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp,s
212
211
  sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py,sha256=9kcOfaW5dDqXs9AJBgmHoUx80tCHdGJ3d2Elr9dOkUo,3980
213
212
  sequenzo/dissimilarity_measures/utils/__init__.py,sha256=aZMQJGgJq4GsL1x-pQPLmL7KrJ78cHMH46GVmVE8pJ0,407
214
213
  sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py,sha256=bJjbEQcjENSAdLv2IMRUWJC4avldwCfHrtSEnlDEACY,1470
215
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c,sha256=ymnTDZAawshZIpG4GNvja8v5qxfzMrFWIg7BpPs-hY4,598371
216
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd,sha256=MnN35Jdn9DCyoRiDPDQc-fSagc9Z9zfQW8eL_1qbAHw,74240
217
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.pyx,sha256=pwImh8jgNcbLLb0y1uhYAYNwXP93JPIh-DQ5frVjZUE,3216
218
- sequenzo/dissimilarity_measures/utils/seqconc.c,sha256=9_xI-P5ID0EPKmqY1bY9P7Vnlwyjwx97DTF06jbwP3o,505939
219
- sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd,sha256=ONXmCD6PylG_QrGB1jVqijuXNp-vlloKqHLxCQQf0Uo,56832
220
- sequenzo/dissimilarity_measures/utils/seqconc.pyx,sha256=7X8jv1kXq9o2pCnBNu5X_NZ0aMpWtxhhqnMsUJzqANo,770
221
- sequenzo/dissimilarity_measures/utils/seqdss.c,sha256=_VthgFnq5lCTgXDjcVkaxGyYOywcACKids-xrfeJKA8,620064
222
- sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd,sha256=KdsrH5f5iXoM5mG40TiPE6VLZ8mloLNHHi8TeU72Ch0,82432
223
- sequenzo/dissimilarity_measures/utils/seqdss.pyx,sha256=THl9-bw63NqgXAv9_OhlB6DF92A0moszCoA32XyyA0Q,1232
224
- sequenzo/dissimilarity_measures/utils/seqdur.c,sha256=Ca6dtgx134HyhUUVUp19hnDPR8KGuUwPf-mEvp5C-yM,542361
225
- sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd,sha256=oPaSDV_FtgBCzTCChOwla31Qe0xQuKEWmE4CXIRE2mw,67072
226
- sequenzo/dissimilarity_measures/utils/seqdur.pyx,sha256=RyBqjdO8SdiCYg7cScdT6uEWPjYX_e7-KUrlQkRkBC0,1288
227
- sequenzo/dissimilarity_measures/utils/seqlength.c,sha256=wPG-Oo7qfoY-pcn9jyNXzMI5amOyJGqae5d_niJfyQI,493532
228
- sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd,sha256=ZXVEFQTgqbKO_B157-cXFlsJLNy3fFxCkwGPgFZT5wA,56832
229
- sequenzo/dissimilarity_measures/utils/seqlength.pyx,sha256=y-792z6X1L4zychHJj5IQVHOfM5JibQ_ITFnkYHJO3c,564
214
+ sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c,sha256=4wwXFfZ1rtGinidgo_FEb7VLvNUasC9eITEeklHygH8,598395
215
+ sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd,sha256=jOqxRpZWE7XweIxZAV88mJ8QuWkLL7o14cgMWFg8fbI,74240
216
+ sequenzo/dissimilarity_measures/utils/seqconc.c,sha256=RL_l7FC9pVhmhyV6JOeTE8XC6KRxCYi3_aIhhuNry7g,505901
217
+ sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd,sha256=0ZxbpJdpGkHpStjvLoZJw_5eh3d7cjA4JbR_E8WJgfs,56832
218
+ sequenzo/dissimilarity_measures/utils/seqdss.c,sha256=taO6RoSucXAnicfDvLMUUsKSsGBKCeLfSC6FNLt_5p4,619965
219
+ sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd,sha256=nEK-JzNkZsCoOqy12jkowfvT1WAum1tHOc0ER-YjpvI,82432
220
+ sequenzo/dissimilarity_measures/utils/seqdur.c,sha256=cViNFXq8Abr37BFRvaqSGaUHIaQ6AvUQ22aVtfbEDRU,542262
221
+ sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd,sha256=vh1RcA7En6v8U5vi1BraqaPK_8wH_K2iIKUY2dDEvPo,67072
222
+ sequenzo/dissimilarity_measures/utils/seqlength.c,sha256=64tQJ2WgRnemHCTV4HXYPalBK1g_6C4ESSinCE_hO18,493507
223
+ sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd,sha256=6U7KbGHnXYCVEmtxyCECVsPj2j6fXKltAT9EDsdnb5k,56832
230
224
  sequenzo/multidomain/__init__.py,sha256=bVnbkJXuXj8y5lHreRBQnL1JFcrmlsz2TSt-qFfmWm8,734
231
- sequenzo/multidomain/association_between_domains.py,sha256=V4I_ILAAwf3cfz2i0HdLF8otAqPtF2LfjcCKo1IkfHA,10855
225
+ sequenzo/multidomain/association_between_domains.py,sha256=tncMzsSn0yhRd9C37-GInHw7FlIucOGdb_affCAc_nk,10844
232
226
  sequenzo/multidomain/cat.py,sha256=7QqdEjZSiqDZnXGOWgkOoeRHg0USVAUKPlzXecJeMFQ,18022
233
- sequenzo/multidomain/combt.py,sha256=aE1OsCE5huJI6VqInBpBjT6WjAIZlgFHFtUwSZt9XLU,22079
227
+ sequenzo/multidomain/combt.py,sha256=RCqk1GvJaZiLzI85FuiQ82xUn61xQTTVXyv3JmWSiqg,22075
234
228
  sequenzo/multidomain/dat.py,sha256=ljhc3kbAl8t8tpNh06eu4lAbR36PFUQCkxXBBtx81lo,3477
235
229
  sequenzo/multidomain/idcd.py,sha256=kKSGJfL_c0pBPbbDdR1BJ08GIIErGFj5ft7ieHyhnT8,4526
236
- sequenzo/multidomain/linked_polyad.py,sha256=l2ysDmhKeuHXq2xMVGb-WOq7AStnLd99NZ363gmh3lA,13614
237
- sequenzo/prefix_tree/__init__.py,sha256=YxMzr5UwM22DmpMDMAmKYI-vjhikDI8Gft_cJ9h1gYo,1126
238
- sequenzo/prefix_tree/individual_level_indicators.py,sha256=9JPINoPU50UGh9-D2QOAr7GQsxAkZ6qDmhKQtVwvZsU,52788
230
+ sequenzo/multidomain/linked_polyad.py,sha256=efnPq0Jvr4XF_um3En6mEZxPywvrVmTmb75IUK75lgU,13611
231
+ sequenzo/prefix_tree/__init__.py,sha256=Sp6HmMfzzv4C2jrUzAyS3GMW9dSAFwVM79vHtTGrLgU,1124
232
+ sequenzo/prefix_tree/individual_level_indicators.py,sha256=dpzRBjwQboxZhpwscRdlGr19f7jpofepQF1oaxm1V6E,52786
239
233
  sequenzo/prefix_tree/system_level_indicators.py,sha256=tGnzRRqwzJbGv-vjOMAzdh6arN6QJkdsybe0Yif57ug,17507
240
234
  sequenzo/prefix_tree/utils.py,sha256=7DETf9i_OclRnWel680qD4wO1b8SffJVKq2Kx0zkTaQ,1489
241
235
  sequenzo/sequence_characteristics/__init__.py,sha256=dPdBD7K-dhsuLoVYhDDVUj9_DYBLPxSUh9GPJ8y2P4k,1224
242
236
  sequenzo/sequence_characteristics/complexity_index.py,sha256=KZ9TpaHtSIkbaqiOqEGRoFDYenrcuIzv34Du_RbL-6A,1762
243
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py,sha256=ipyOVZ_o9xC-VYMLIPU7QcMlpJzyzAYG3qKdlBV5HS0,9302
244
- sequenzo/sequence_characteristics/plot_characteristics.py,sha256=LYOCNoS5BnFVbY1r-HEJSrQyACOTkxHuxguS4wVLI9o,25612
245
- sequenzo/sequence_characteristics/simple_characteristics.py,sha256=3dj8R_tDEtC7Wk58PzX4q0nXW9orW4f1hO1Lt2uXeCc,11715
237
+ sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py,sha256=nRRynM11tPD16cOlsw6PaNIvmpiXhLAuWBGLku4AFF4,9296
238
+ sequenzo/sequence_characteristics/plot_characteristics.py,sha256=55I4Hbpko7QR54HeAKBTrW_meOZDezMVW2JpOwuzHOw,25608
239
+ sequenzo/sequence_characteristics/simple_characteristics.py,sha256=pgINV0jvriJ3Zhi6Rm2lzxFZ0Yxzv-JHDuor62m8DjA,11711
246
240
  sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py,sha256=S5vzqUrJF_tadIlgd-S-jHfwWo3agFzc1ptAB6CXfPE,1313
247
241
  sequenzo/sequence_characteristics/turbulence.py,sha256=bixo1wcGNKYhsNmppORwv_fjy7xQxhGHo-OebG_TnYE,6217
248
242
  sequenzo/sequence_characteristics/variance_of_spell_durations.py,sha256=hx00o_ypk3VDUU1VHOI8eN_Fy12CKlOd02NeF_fKl04,3299
249
243
  sequenzo/sequence_characteristics/within_sequence_entropy.py,sha256=K9uhkTcy7SkW5By1lNX1DS6mREGj49ElgdPs1erJ-bI,1500
250
- sequenzo/suffix_tree/__init__.py,sha256=rJTkjwxg2Ub_jGxugYmEYWatTxtKu_BTWNDZbJ-KgsI,1148
251
- sequenzo/suffix_tree/individual_level_indicators.py,sha256=W36tEQEEmzu67gf5BLmau8Ja6-1BEBG5ArzZqbKc-PM,67211
244
+ sequenzo/suffix_tree/__init__.py,sha256=0CSK_oC6xOrkbkScgOxD15HXwnAxaYuvizv0MSbTh-E,1146
245
+ sequenzo/suffix_tree/individual_level_indicators.py,sha256=EQP1dWPsdI3aOedhCxcFIGIDWEt615x1V_SYZIAH_bs,67208
252
246
  sequenzo/suffix_tree/system_level_indicators.py,sha256=N4DrjM9fBHFqqcjDN1TAkwbkoDnlZSK2F8F2ERCizIQ,16962
253
247
  sequenzo/suffix_tree/utils.py,sha256=G1qYVNTTZHoUbY7x-j1FlZ-XfnXpGVHpaaa_tE4hMAE,1637
254
248
  sequenzo/visualization/__init__.py,sha256=JKYTCx4qbXF7oAE0OrfH_Tw5WhzTU_HJEn7XjJncFBw,944
@@ -257,16 +251,16 @@ sequenzo/visualization/plot_modal_state.py,sha256=wTAhdlu4px-dJdxM9LSSgDQioW46r-
257
251
  sequenzo/visualization/plot_most_frequent_sequences.py,sha256=UbkCjPUCKRAVY06Hm8HVSBI9_8iKFxkBdWwoMNmy0BE,6317
258
252
  sequenzo/visualization/plot_relative_frequency.py,sha256=tUNyIxc8C0SPjRJJYormBbTxWjvbox-pjaH9suzjRwU,16423
259
253
  sequenzo/visualization/plot_sequence_index.py,sha256=qc4h6JzQrDeiBsGvK6Cdn3HwDZFLfNzPqyFOon1ZQ6Q,41079
260
- sequenzo/visualization/plot_single_medoid.py,sha256=yqKzUANkmA-f-oreDZuAVAzMrALxn_uGjg815HAjKag,5971
254
+ sequenzo/visualization/plot_single_medoid.py,sha256=K3d8feYV1S2PmjNdlRD961-bdt-An1-SWw8HKWDcBAc,5971
261
255
  sequenzo/visualization/plot_state_distribution.py,sha256=8uH533kwyqxIeGPM4eFJBWJ2eRgqEFPH3EiPJECzvS8,25978
262
- sequenzo/visualization/plot_transition_matrix.py,sha256=aQVJ61UictR5FtjvDvpzIYQZ0EdllJ0Cw_U3ZgxTgiE,6959
256
+ sequenzo/visualization/plot_transition_matrix.py,sha256=ZmLyKniHROsnp4Xp2fggpofaskja7wiQNjOUL-jfkFE,6977
263
257
  sequenzo/visualization/utils/__init__.py,sha256=brrYzeIQm_cEM_TgA8_eRdckzN9WP1pj9g-f1qBzRLY,734
264
- sequenzo/visualization/utils/utils.py,sha256=9Z1L3PVL-Z41fvCW4tLJ5DaUVHO6C6PAJYlT2Q1jY4k,10249
258
+ sequenzo/visualization/utils/utils.py,sha256=P33amescn1FLcfGwzxDrHpvaELzUHRKt06f3Iky23t0,10246
265
259
  sequenzo/with_event_history_analysis/__init__.py,sha256=B2EZhtJ7NEzO8piDwfSbh0l87fQ0ZuesPO5GNJEXKPo,730
266
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py,sha256=wtsKnq-82bJhRH78cy49Nzo3yGJKoFD4RckoZ7D-SS8,37301
260
+ sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py,sha256=DdNqx8MBaxdLu-n-9RH5q2cSr5sWr18LTNF9qdNaKbM,37300
267
261
  sequenzo/with_event_history_analysis/sequence_history_analysis.py,sha256=vv5y2u9cpzhmNJX_fSYgLmFOncPvB7DVhWujljII1vA,10902
268
- sequenzo-0.1.20.dist-info/licenses/LICENSE,sha256=URRMyLHVeGF2kyDLC1xbRKBBIjDHJyWqF4nWpzfBX10,1497
269
- sequenzo-0.1.20.dist-info/METADATA,sha256=MCb19RNQA1wRatJpLP7WFhMc6A1lUOc1db5qKmsogl8,15452
270
- sequenzo-0.1.20.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99
271
- sequenzo-0.1.20.dist-info/top_level.txt,sha256=yM8eczbPzqB1bRHMYLptvjjQ3p5tYhY6VjgWHUIi9vw,9
272
- sequenzo-0.1.20.dist-info/RECORD,,
262
+ sequenzo-0.1.22.dist-info/licenses/LICENSE,sha256=URRMyLHVeGF2kyDLC1xbRKBBIjDHJyWqF4nWpzfBX10,1497
263
+ sequenzo-0.1.22.dist-info/METADATA,sha256=wpW8q_ogXcCkfVKqQsOJ1AOzgup4NerTa4Th6IH_UOM,14591
264
+ sequenzo-0.1.22.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99
265
+ sequenzo-0.1.22.dist-info/top_level.txt,sha256=yM8eczbPzqB1bRHMYLptvjjQ3p5tYhY6VjgWHUIi9vw,9
266
+ sequenzo-0.1.22.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- import numpy as np
2
- cimport numpy as cnp
3
-
4
- def get_weighted_diss(cnp.ndarray[double, ndim=2] diss,
5
- cnp.ndarray[double, ndim=1] weights):
6
- cdef int n = weights.shape[0]
7
- cdef int i, j
8
- cdef double factor
9
-
10
- for i in range(n):
11
- for j in range(i + 1, n):
12
- factor = (weights[i] * weights[j]) ** 0.5
13
- diss[i, j] *= factor
14
- diss[j, i] = diss[i, j]
15
-
16
- return diss
@@ -1,95 +0,0 @@
1
- # cython: boundscheck=False, wraparound=False
2
- import numpy as np
3
- cimport numpy as np
4
-
5
- import pandas as pd
6
- from libc.math cimport isnan
7
-
8
- def get_sm_trate_substitution_cost_matrix(
9
- object seqdata,
10
- bint time_varying=False,
11
- bint weighted=True,
12
- int lag=1,
13
- bint count=False
14
- ):
15
- """
16
- Compute substitution cost matrix (transition rate matrix)
17
- """
18
-
19
- from sequenzo.define_sequence_data import SequenceData
20
- if not isinstance(seqdata, SequenceData):
21
- raise ValueError("[x] Seqdata must be a pandas DataFrame wrapped in a SequenceData object.")
22
-
23
- cdef np.ndarray[np.float64_t, ndim=1] weights
24
- if weighted:
25
- weights = np.asarray(seqdata.weights, dtype=np.float64)
26
- else:
27
- weights = np.ones(seqdata.seqdata.shape[0], dtype=np.float64)
28
-
29
- states = seqdata.states.copy()
30
- statesMapping = seqdata.state_mapping.copy()
31
-
32
- cdef int _size = len(states) + 1
33
- df = seqdata.seqdata
34
- cdef int n_rows = df.shape[0]
35
- cdef int sdur = df.shape[1]
36
- cdef int i, j, t, sl, state_x, state_y
37
- cdef double PA, PAB
38
-
39
- if lag < 0:
40
- all_transition = list(range(abs(lag), sdur))
41
- else:
42
- all_transition = list(range(sdur - lag))
43
-
44
- cdef int num_transition = len(all_transition)
45
-
46
- # convert df to NumPy 2D array of ints
47
- seq_mat = df.to_numpy(dtype=np.float64)
48
- cdef np.ndarray[np.float64_t, ndim=2] seq_mat_mv = seq_mat
49
-
50
- if time_varying:
51
- tmat = np.zeros((num_transition, _size, _size), dtype=np.float64)
52
-
53
- for idx, sl in enumerate(all_transition):
54
- for state_x in statesMapping.values():
55
- PA = 0.0
56
- for i in range(n_rows):
57
- if seq_mat_mv[i, sl] == state_x and not isnan(seq_mat_mv[i, sl + lag]):
58
- PA += weights[i]
59
-
60
- if PA == 0:
61
- tmat[idx, state_x, :] = 0
62
- else:
63
- for state_y in statesMapping.values():
64
- PAB = 0.0
65
- for i in range(n_rows):
66
- if (seq_mat_mv[i, sl] == state_x and
67
- not isnan(seq_mat_mv[i, sl + lag]) and
68
- seq_mat_mv[i, sl + lag] == state_y):
69
- PAB += weights[i]
70
-
71
- tmat[idx, state_x, state_y] = PAB if count else PAB / PA
72
-
73
- else:
74
- tmat = np.zeros((_size, _size), dtype=np.float64)
75
-
76
- for state_x in statesMapping.values():
77
- PA = 0.0
78
- for i in range(n_rows):
79
- for t in all_transition:
80
- if (seq_mat_mv[i, t] == state_x and not isnan(seq_mat_mv[i, t + lag])):
81
- PA += weights[i]
82
-
83
- if PA == 0:
84
- tmat[state_x, :] = 0
85
- else:
86
- for state_y in statesMapping.values():
87
- PAB = 0.0
88
- for i in range(n_rows):
89
- for t in all_transition:
90
- if (seq_mat_mv[i, t] == state_x and seq_mat_mv[i, t + lag] == state_y):
91
- PAB += weights[i]
92
-
93
- tmat[state_x, state_y] = PAB if count else PAB / PA
94
-
95
- return tmat