sequenzo 0.1.20__cp39-cp39-win_amd64.whl → 0.1.22__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sequenzo might be problematic. Click here for more details.
- sequenzo/big_data/clara/utils/get_weighted_diss.c +195 -195
- sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd +0 -0
- sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1 -1
- sequenzo/define_sequence_data.py +4 -4
- sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +172 -173
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.c +234 -234
- sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.c +327 -327
- sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.c +327 -327
- sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.c +227 -226
- sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd +0 -0
- sequenzo/multidomain/association_between_domains.py +1 -1
- sequenzo/multidomain/combt.py +4 -4
- sequenzo/multidomain/linked_polyad.py +3 -3
- sequenzo/prefix_tree/__init__.py +1 -1
- sequenzo/prefix_tree/individual_level_indicators.py +2 -2
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +2 -2
- sequenzo/sequence_characteristics/plot_characteristics.py +2 -2
- sequenzo/sequence_characteristics/simple_characteristics.py +2 -2
- sequenzo/suffix_tree/__init__.py +1 -1
- sequenzo/suffix_tree/individual_level_indicators.py +3 -3
- sequenzo/visualization/plot_single_medoid.py +2 -2
- sequenzo/visualization/plot_transition_matrix.py +3 -2
- sequenzo/visualization/utils/utils.py +2 -2
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +1 -1
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/METADATA +17 -43
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/RECORD +35 -41
- sequenzo/big_data/clara/utils/get_weighted_diss.pyx +0 -16
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.pyx +0 -95
- sequenzo/dissimilarity_measures/utils/seqconc.pyx +0 -26
- sequenzo/dissimilarity_measures/utils/seqdss.pyx +0 -33
- sequenzo/dissimilarity_measures/utils/seqdur.pyx +0 -34
- sequenzo/dissimilarity_measures/utils/seqlength.pyx +0 -19
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/WHEEL +0 -0
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/licenses/LICENSE +0 -0
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
@@ -307,5 +307,5 @@ def get_association_between_domains(seqdata_dom, assoc=("LRT", "V"), rep_method=
|
|
|
307
307
|
print(" - v : Cramer's V statistic (0 to 1, measures association strength).")
|
|
308
308
|
print(" - p(v) : p-value for Cramer's V (based on chi-squared test) + significance stars: * (p<.05), ** (p<.01), *** (p<.001)")
|
|
309
309
|
print(" - strength : Qualitative label for association strength based on Cramer's V:")
|
|
310
|
-
print(" 0.00
|
|
310
|
+
print(" 0.00-0.09 -> None, 0.10-0.29 -> Weak, 0.30-0.49 -> Moderate, >=0.50 -> Strong")
|
|
311
311
|
|
sequenzo/multidomain/combt.py
CHANGED
|
@@ -253,7 +253,7 @@ def merge_sparse_combt_types(distance_matrix,
|
|
|
253
253
|
distance_matrix = distance_matrix.values
|
|
254
254
|
|
|
255
255
|
if distance_matrix.shape[0] != distance_matrix.shape[1]:
|
|
256
|
-
raise ValueError("distance_matrix must be square (n
|
|
256
|
+
raise ValueError("distance_matrix must be square (n x n)")
|
|
257
257
|
|
|
258
258
|
labels = np.array(labels)
|
|
259
259
|
if len(labels) != distance_matrix.shape[0]:
|
|
@@ -323,7 +323,7 @@ def merge_sparse_combt_types(distance_matrix,
|
|
|
323
323
|
best_target = target
|
|
324
324
|
except Exception as e:
|
|
325
325
|
if verbose:
|
|
326
|
-
print(f"[!] Error computing silhouette for merge {small}
|
|
326
|
+
print(f"[!] Error computing silhouette for merge {small} -> {target}: {e}")
|
|
327
327
|
continue
|
|
328
328
|
|
|
329
329
|
# Execute merge if it maintains quality threshold
|
|
@@ -346,7 +346,7 @@ def merge_sparse_combt_types(distance_matrix,
|
|
|
346
346
|
|
|
347
347
|
if verbose:
|
|
348
348
|
print(
|
|
349
|
-
f"[+] Merged {small} ({reverse_map[small]}, size={old_count})
|
|
349
|
+
f"[+] Merged {small} ({reverse_map[small]}, size={old_count}) -> {best_target} ({reverse_map[best_target]}) | New ASW: {current_score:.4f}")
|
|
350
350
|
|
|
351
351
|
merged = True
|
|
352
352
|
break
|
|
@@ -380,7 +380,7 @@ def merge_sparse_combt_types(distance_matrix,
|
|
|
380
380
|
print("\n[>] Merge History Details:")
|
|
381
381
|
for i, merge in enumerate(merge_info["merge_history"]):
|
|
382
382
|
print(
|
|
383
|
-
f" Merge {i + 1}: {merge['source']} (size={merge['source_size']})
|
|
383
|
+
f" Merge {i + 1}: {merge['source']} (size={merge['source_size']}) -> {merge['target']} | ASW: {merge['new_asw']:.4f}")
|
|
384
384
|
|
|
385
385
|
# Visualize merge process if requested
|
|
386
386
|
if visualize_process and merge_info["merge_history"]:
|
|
@@ -158,7 +158,7 @@ def linked_polyadic_sequence_analysis(seqlist: List[SequenceData],
|
|
|
158
158
|
|
|
159
159
|
print("[Step 3] Computing all pairwise dissimilarities using method:", method)
|
|
160
160
|
alldist = np.asarray(get_distance_matrix(merged_seqdata, method=method, **distance_parameters))
|
|
161
|
-
print("
|
|
161
|
+
print(" -> Dissimilarity matrix shape:", alldist.shape)
|
|
162
162
|
|
|
163
163
|
cj = np.array([n * p for p in range(P)])
|
|
164
164
|
|
|
@@ -199,7 +199,7 @@ def linked_polyadic_sequence_analysis(seqlist: List[SequenceData],
|
|
|
199
199
|
else:
|
|
200
200
|
raise ValueError("Invalid randomization type 'a'. Should be 1 or 2.")
|
|
201
201
|
|
|
202
|
-
iterator = tqdm(range(T), desc="
|
|
202
|
+
iterator = tqdm(range(T), desc="-> Randomizing polyads") if verbose else range(T)
|
|
203
203
|
random_dists = Parallel(n_jobs=n_jobs)(delayed(random_sample_once)(i) for i in iterator)
|
|
204
204
|
random_dists = np.array(random_dists)
|
|
205
205
|
|
|
@@ -231,7 +231,7 @@ def linked_polyadic_sequence_analysis(seqlist: List[SequenceData],
|
|
|
231
231
|
|
|
232
232
|
print(
|
|
233
233
|
f"[Step 7] Final summary: mean observed = {np.mean(observed_dists):.2f}, mean randomized = {mean_rand_dist:.2f}")
|
|
234
|
-
print(f"
|
|
234
|
+
print(f" -> Significant polyads (V > 0.95): {np.sum(V_95)} / {n}")
|
|
235
235
|
|
|
236
236
|
result = {
|
|
237
237
|
"mean.dist": {"Obs": np.mean(observed_dists), "Rand": mean_rand_dist},
|
sequenzo/prefix_tree/__init__.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
@File : __init__.py
|
|
4
4
|
@Time : 02/05/2025 11:05
|
|
5
5
|
@Desc :
|
|
6
|
-
Prefix Tree Framework
|
|
6
|
+
Prefix Tree Framework - exposes core indicators and utilities for sequence divergence analysis.
|
|
7
7
|
"""
|
|
8
8
|
from .system_level_indicators import (
|
|
9
9
|
build_prefix_tree,
|
|
@@ -609,7 +609,7 @@ class IndividualDivergence:
|
|
|
609
609
|
Where z_{i,t} are the year-wise standardized prefix rarity scores using column-wise
|
|
610
610
|
standardization with sample standard deviation (ddof=1, as computed by pandas).
|
|
611
611
|
|
|
612
|
-
The standardized scores can be used with a threshold (e.g., z
|
|
612
|
+
The standardized scores can be used with a threshold (e.g., z >= 1.5) to classify
|
|
613
613
|
individuals as diverged/not diverged, and are particularly useful for visualization.
|
|
614
614
|
|
|
615
615
|
Parameters:
|
|
@@ -622,7 +622,7 @@ class IndividualDivergence:
|
|
|
622
622
|
Returns:
|
|
623
623
|
--------
|
|
624
624
|
List[float]
|
|
625
|
-
Standardized rarity scores for each individual. Values
|
|
625
|
+
Standardized rarity scores for each individual. Values >= z_threshold indicate divergence.
|
|
626
626
|
|
|
627
627
|
Notes:
|
|
628
628
|
------
|
|
@@ -31,7 +31,7 @@ def get_cross_sectional_entropy(
|
|
|
31
31
|
...
|
|
32
32
|
|
|
33
33
|
Additional metrics:
|
|
34
|
-
- per_time_entropy_norm: If norm=True, normalized by maximum entropy (|S|), range 0
|
|
34
|
+
- per_time_entropy_norm: If norm=True, normalized by maximum entropy (|S|), range 0-1
|
|
35
35
|
- effective_states (H_effective): exp(H), equivalent "effective number of states"
|
|
36
36
|
- summary: Key interpretation points (entropy peaks/valleys, dominant state intervals, average entropy, etc.)
|
|
37
37
|
|
|
@@ -199,7 +199,7 @@ def get_cross_sectional_entropy(
|
|
|
199
199
|
if eff_s is not None:
|
|
200
200
|
out["Effective States"] = eff_s
|
|
201
201
|
return out
|
|
202
|
-
else: # "dict"
|
|
202
|
+
else: # "dict" -- try to be more readable too
|
|
203
203
|
res = {
|
|
204
204
|
"Frequencies": freq_df_wide,
|
|
205
205
|
"ValidStates": valid_s,
|
|
@@ -360,7 +360,7 @@ def plot_cross_sectional_characteristics(seqdata,
|
|
|
360
360
|
title="Cross-sectional entropy over time",
|
|
361
361
|
show_title=True,
|
|
362
362
|
xlabel="Time",
|
|
363
|
-
ylabel="Entropy (0
|
|
363
|
+
ylabel="Entropy (0-1)",
|
|
364
364
|
line_color="#74C9B4",
|
|
365
365
|
save_as=None,
|
|
366
366
|
dpi=200,
|
|
@@ -396,7 +396,7 @@ def plot_cross_sectional_characteristics(seqdata,
|
|
|
396
396
|
xlabel : str, optional (default="Time")
|
|
397
397
|
Label for the x-axis.
|
|
398
398
|
|
|
399
|
-
ylabel : str, optional (default="Entropy (0
|
|
399
|
+
ylabel : str, optional (default="Entropy (0-1)")
|
|
400
400
|
Label for the y-axis (main entropy axis).
|
|
401
401
|
|
|
402
402
|
line_color : str, optional (default="#74C9B4")
|
|
@@ -91,7 +91,7 @@ def get_subsequences_all_sequences(seqdata, dss: bool = True, with_missing: bool
|
|
|
91
91
|
Args:
|
|
92
92
|
seqdata: SequenceData object or pandas DataFrame containing your sequence data
|
|
93
93
|
dss (bool): Whether to apply distinct state sequence preprocessing.
|
|
94
|
-
If True, consecutive identical states are compressed (e.g., [1,1,2,2]
|
|
94
|
+
If True, consecutive identical states are compressed (e.g., [1,1,2,2] -> [1,2])
|
|
95
95
|
with_missing (bool): Whether to include missing values in the calculation
|
|
96
96
|
|
|
97
97
|
Returns:
|
|
@@ -266,7 +266,7 @@ def get_number_of_transitions(seqdata, norm=False, pwight=False) -> pd.DataFrame
|
|
|
266
266
|
seq_3 2
|
|
267
267
|
|
|
268
268
|
>>> # Example: sequence [1, 1, 2, 2, 1, 3] has 3 transitions:
|
|
269
|
-
>>> # 1
|
|
269
|
+
>>> # 1->2 (position 3), 2->1 (position 5), 1->3 (position 6)
|
|
270
270
|
|
|
271
271
|
Note:
|
|
272
272
|
Missing values are automatically ignored. Only counts actual state changes
|
sequenzo/suffix_tree/__init__.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
@File : __init__.py
|
|
4
4
|
@Time : 08/08/2025 15:50
|
|
5
5
|
@Desc :
|
|
6
|
-
Suffix Tree Framework
|
|
6
|
+
Suffix Tree Framework - exposes core indicators and utilities for sequence convergence analysis.
|
|
7
7
|
"""
|
|
8
8
|
from .system_level_indicators import (
|
|
9
9
|
build_suffix_tree,
|
|
@@ -706,7 +706,7 @@ class IndividualConvergence:
|
|
|
706
706
|
across individuals with sample standard deviation (ddof=1):
|
|
707
707
|
z_{i,t} = (x_{i,t} - mean_t) / std_t
|
|
708
708
|
|
|
709
|
-
The standardized scores can be used with a threshold (e.g., z
|
|
709
|
+
The standardized scores can be used with a threshold (e.g., z <= -1.5) to classify
|
|
710
710
|
individuals as converged/not converged, and are particularly useful for visualization.
|
|
711
711
|
|
|
712
712
|
Note: For convergence (suffix tree), we look for LOW rarity (more typical patterns),
|
|
@@ -725,7 +725,7 @@ class IndividualConvergence:
|
|
|
725
725
|
Returns:
|
|
726
726
|
--------
|
|
727
727
|
List[float]
|
|
728
|
-
Standardized rarity scores for each individual. Values
|
|
728
|
+
Standardized rarity scores for each individual. Values <= -z_threshold indicate convergence.
|
|
729
729
|
|
|
730
730
|
Notes:
|
|
731
731
|
------
|
|
@@ -1539,7 +1539,7 @@ def compute_quantile_thresholds_by_group(scores, group_labels, quantiles=None):
|
|
|
1539
1539
|
|
|
1540
1540
|
def compute_quantile_thresholds_by_group_year(scores, group_labels, year_labels, quantiles=None, min_group_year_size=30):
|
|
1541
1541
|
"""
|
|
1542
|
-
Compute quantile thresholds by group
|
|
1542
|
+
Compute quantile thresholds by group x year for time-drifting distributions.
|
|
1543
1543
|
|
|
1544
1544
|
Parameters
|
|
1545
1545
|
----------
|
|
@@ -102,7 +102,7 @@ def compute_medoids_from_distance_matrix(distance_matrix: np.ndarray, seqdata: S
|
|
|
102
102
|
:return: Tuple containing the medoid sequences and their indices.
|
|
103
103
|
"""
|
|
104
104
|
if not isinstance(seqdata, SequenceData):
|
|
105
|
-
raise TypeError("
|
|
105
|
+
raise TypeError("[X] seqdata must be a SequenceData object.")
|
|
106
106
|
|
|
107
107
|
# Process weights
|
|
108
108
|
if isinstance(weights, str) and weights == "auto":
|
|
@@ -131,7 +131,7 @@ def compute_medoids_from_distance_matrix(distance_matrix: np.ndarray, seqdata: S
|
|
|
131
131
|
medoid_indices = medoid_indices.tolist()
|
|
132
132
|
|
|
133
133
|
if not all(isinstance(idx, int) for idx in medoid_indices):
|
|
134
|
-
raise ValueError("
|
|
134
|
+
raise ValueError("[X] medoid_indices must be a list of integers.")
|
|
135
135
|
|
|
136
136
|
return medoid_sequences, medoid_indices
|
|
137
137
|
|
|
@@ -112,7 +112,7 @@ def print_transition_matrix(seqdata: SequenceData, transition_rates: np.ndarray)
|
|
|
112
112
|
# Print each row
|
|
113
113
|
for i, from_state in enumerate(state_labels):
|
|
114
114
|
# Print row label
|
|
115
|
-
print(f"{from_state:>{max_label_width}}
|
|
115
|
+
print(f"{from_state:>{max_label_width}} ->", end=" ")
|
|
116
116
|
|
|
117
117
|
# Print transition rates
|
|
118
118
|
for prob in transition_rates[i]:
|
|
@@ -131,7 +131,7 @@ def plot_transition_matrix(seqdata: SequenceData,
|
|
|
131
131
|
fontsize: int = 12,
|
|
132
132
|
save_as: Optional[str] = None,
|
|
133
133
|
dpi: int = 200,
|
|
134
|
-
format: str = "
|
|
134
|
+
format: str = ".2f") -> None:
|
|
135
135
|
"""
|
|
136
136
|
Plot state transition rate matrix as a heatmap.
|
|
137
137
|
|
|
@@ -157,6 +157,7 @@ def plot_transition_matrix(seqdata: SequenceData,
|
|
|
157
157
|
# Generate heatmap using pre-formatted annotation strings
|
|
158
158
|
ax = sns.heatmap(
|
|
159
159
|
transition_matrix,
|
|
160
|
+
annot=True,
|
|
160
161
|
fmt=format,
|
|
161
162
|
cmap=cmap,
|
|
162
163
|
xticklabels=seqdata.labels,
|
|
@@ -35,10 +35,10 @@ def set_up_time_labels_for_x_axis(seqdata: SequenceData,
|
|
|
35
35
|
# If 10 or fewer time points, show all labels
|
|
36
36
|
xtick_positions = np.arange(num_time_steps)
|
|
37
37
|
elif num_time_steps <= 20:
|
|
38
|
-
# If 10
|
|
38
|
+
# If 10-20 time points, show every 2nd label
|
|
39
39
|
xtick_positions = np.arange(0, num_time_steps, step=2)
|
|
40
40
|
else:
|
|
41
|
-
# More than 20 time points
|
|
41
|
+
# More than 20 time points -> Pick 10 evenly spaced tick positions
|
|
42
42
|
xtick_positions = np.linspace(0, num_time_steps - 1, num=10, dtype=int)
|
|
43
43
|
|
|
44
44
|
# Set x-ticks and labels dynamically
|
|
@@ -117,7 +117,7 @@ def sequence_analysis_multi_state_model(seqdata: SequenceData, sublength: int, c
|
|
|
117
117
|
**What is person-period data?**
|
|
118
118
|
Instead of having one row per person with all their time points as columns,
|
|
119
119
|
person-period data has one row for each person-time combination. For example,
|
|
120
|
-
if we track 3 people over 5 time periods, we get 15 rows (3
|
|
120
|
+
if we track 3 people over 5 time periods, we get 15 rows (3 x 5).
|
|
121
121
|
|
|
122
122
|
**What are subsequences?**
|
|
123
123
|
At each time point, we look ahead and record what happens in the next few time periods.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sequenzo
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.22
|
|
4
4
|
Summary: A fast, scalable and intuitive Python package for social sequence analysis.
|
|
5
5
|
Author-email: Yuqi Liang <yuqi.liang.1900@gmail.com>, Xinyi Li <1836724126@qq.com>, Jan Heinrich Ernst Meyerhoff-Liang <jan.meyerhoff1@gmail.com>
|
|
6
6
|
License: BSD 3-Clause License
|
|
@@ -44,7 +44,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
44
44
|
Requires-Python: <3.13,>=3.9
|
|
45
45
|
Description-Content-Type: text/markdown
|
|
46
46
|
License-File: LICENSE
|
|
47
|
-
Requires-Dist: numpy
|
|
47
|
+
Requires-Dist: numpy>=1.21.0
|
|
48
48
|
Requires-Dist: pandas>=1.2.5
|
|
49
49
|
Requires-Dist: matplotlib>=3.4.3
|
|
50
50
|
Requires-Dist: seaborn>=0.11.2
|
|
@@ -54,22 +54,23 @@ Requires-Dist: cython>=0.29.21
|
|
|
54
54
|
Requires-Dist: scipy>=1.6.3
|
|
55
55
|
Requires-Dist: scikit-learn>=0.24.2
|
|
56
56
|
Requires-Dist: fastcluster>=1.2.6
|
|
57
|
-
Requires-Dist: rpy2>=3.5.12; python_version >= "3.12"
|
|
58
|
-
Requires-Dist: rpy2>=3.5.6; python_version == "3.11"
|
|
59
|
-
Requires-Dist: rpy2>=3.5.6; python_version == "3.10"
|
|
60
|
-
Requires-Dist: rpy2>=3.5.6; python_version == "3.9"
|
|
61
57
|
Requires-Dist: joblib>=1.0.1
|
|
62
58
|
Requires-Dist: docutils>=0.17
|
|
63
59
|
Requires-Dist: tqdm<5.0.0,>=4.62.3
|
|
64
60
|
Requires-Dist: missingno<0.6.0,>=0.5.2
|
|
65
61
|
Requires-Dist: cffi>=1.15.0
|
|
62
|
+
Provides-Extra: r
|
|
63
|
+
Requires-Dist: rpy2>=3.5.12; python_version >= "3.12" and extra == "r"
|
|
64
|
+
Requires-Dist: rpy2>=3.5.6; python_version == "3.11" and extra == "r"
|
|
65
|
+
Requires-Dist: rpy2>=3.5.6; python_version == "3.10" and extra == "r"
|
|
66
|
+
Requires-Dist: rpy2>=3.5.6; python_version == "3.9" and extra == "r"
|
|
66
67
|
Provides-Extra: dev
|
|
67
68
|
Requires-Dist: pytest>=6.2.5; extra == "dev"
|
|
68
69
|
Requires-Dist: flake8>=3.9.2; extra == "dev"
|
|
69
70
|
Dynamic: license-file
|
|
70
71
|
|
|
71
72
|
<p align="center">
|
|
72
|
-
<img src="https://raw.githubusercontent.com/Liang-Team/Sequenzo/main/assets/logo/FullLogo_NoBuffer.jpg" alt="Sequenzo Logo" width="
|
|
73
|
+
<img src="https://raw.githubusercontent.com/Liang-Team/Sequenzo/main/assets/logo/FullLogo_NoBuffer.jpg" alt="Sequenzo Logo" width="300">
|
|
73
74
|
</p>
|
|
74
75
|
|
|
75
76
|
<p align="center">
|
|
@@ -133,17 +134,18 @@ Perfect for research, policy, and business, enabling seamless analysis of catego
|
|
|
133
134
|
|
|
134
135
|
Sequenzo provides pre-built Python wheels for maximum compatibility — no need to compile from source.
|
|
135
136
|
|
|
136
|
-
| Platform | Architecture
|
|
137
|
-
|
|
138
|
-
| **macOS** |
|
|
139
|
-
| **Windows** | `AMD64` (64-bit)
|
|
140
|
-
| **Linux (glibc)**| `x86_64` (standard Linux)
|
|
141
|
-
| **Linux (musl)** | `x86_64` (Alpine Linux)
|
|
137
|
+
| Platform | Architecture | Python Versions | Status |
|
|
138
|
+
|------------------|---------------------------------|-----------------------|-------------------|
|
|
139
|
+
| **macOS** | Intel && Apple Silicon (64-bit) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
|
|
140
|
+
| **Windows** | `AMD64` (64-bit) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
|
|
141
|
+
| **Linux (glibc)**| `x86_64` (standard Linux) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
|
|
142
|
+
| **Linux (musl)** | `x86_64` (Alpine Linux) | 3.9, 3.10, 3.11, 3.12 | ✅ Pre-built wheel |
|
|
142
143
|
|
|
143
144
|
|
|
144
145
|
What do these terms mean?
|
|
145
|
-
- **
|
|
146
|
-
- **
|
|
146
|
+
- **macosx_arm64 (macOS)**: One wheel supports Apple Silicon Macs.
|
|
147
|
+
- **macosx_x86_64 (macOS)**: One wheel supports Intel Macs.
|
|
148
|
+
- **manylinux2014_x86_64 (glibc-based Linux)**: Compatible with most mainstream Linux distributions (e.g., Ubuntu, Debian, CentOS).
|
|
147
149
|
- **musllinux_1_2 (musl-based Linux)**: For lightweight Alpine Linux environments, common in Docker containers.
|
|
148
150
|
- **AMD64 (Windows)**: Standard 64-bit Windows system architecture.
|
|
149
151
|
|
|
@@ -181,34 +183,6 @@ If you have some issues with the installation, it might because you have both Py
|
|
|
181
183
|
pip3 install sequenzo
|
|
182
184
|
```
|
|
183
185
|
|
|
184
|
-
### ⚠️ Having Installation or Import Issues?
|
|
185
|
-
|
|
186
|
-
**Error:** `ImportError: numpy.core.multiarray failed to import`
|
|
187
|
-
|
|
188
|
-
**Most likely cause:** NumPy version mismatch (you have NumPy 1.x, but need 2.x)
|
|
189
|
-
|
|
190
|
-
**Quick Fix** (copy-paste these commands):
|
|
191
|
-
```bash
|
|
192
|
-
# Check your NumPy version first
|
|
193
|
-
python -c "import numpy; print(f'NumPy: {numpy.__version__}')"
|
|
194
|
-
|
|
195
|
-
# If you see 1.x.x, upgrade to 2.x:
|
|
196
|
-
pip install --upgrade "numpy>=2.0.0"
|
|
197
|
-
pip uninstall sequenzo -y
|
|
198
|
-
pip install --no-cache-dir sequenzo
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
📖 **Still having issues?**
|
|
202
|
-
1. Run our diagnostic tool to identify the problem:
|
|
203
|
-
```bash
|
|
204
|
-
curl -O https://raw.githubusercontent.com/Liang-Team/Sequenzo/main/diagnose.py
|
|
205
|
-
python diagnose.py
|
|
206
|
-
```
|
|
207
|
-
2. See our detailed guides:
|
|
208
|
-
- **[QUICK_FIX.md](QUICK_FIX.md)** - Simple step-by-step solutions
|
|
209
|
-
- **[TROUBLESHOOTING.md](TROUBLESHOOTING.md)** - Comprehensive troubleshooting
|
|
210
|
-
- **[docs/WHY_IMPORT_FAILS.md](docs/WHY_IMPORT_FAILS.md)** - Technical explanation
|
|
211
|
-
|
|
212
186
|
### Optional R Integration
|
|
213
187
|
|
|
214
188
|
Sequenzo now checks the system environment variables before running ward.D hierarchical clustering.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
sequenzo/__init__.py,sha256=uGmuqHVR8JDseOnN0tHgbly9nRzCeNY7A9OHBusdMP4,6990
|
|
2
|
-
sequenzo/define_sequence_data.py,sha256=
|
|
2
|
+
sequenzo/define_sequence_data.py,sha256=4ON_-Z8AMtqTlezYLKqbn9msBEs2Ba5RoIuJpgWMBpY,28621
|
|
3
3
|
sequenzo/openmp_setup.py,sha256=f_8SxfcRhdI_uj_2RHFwiEw8iizB35Mv-3UUccRDt6U,7059
|
|
4
4
|
sequenzo/big_data/__init__.py,sha256=iSZnGboYhbvsFf75uL8D8XDucXRxYypmFNN1uX5MxJo,152
|
|
5
5
|
sequenzo/big_data/clara/__init__.py,sha256=pDR5_TSDisEhPtsA2gXGaXXBNTmWidJC_nnd9QMkz-U,700
|
|
@@ -8,14 +8,13 @@ sequenzo/big_data/clara/visualization.py,sha256=EpSmtAxRHVqcXlcXvSGiUuBjEETR7zK_
|
|
|
8
8
|
sequenzo/big_data/clara/utils/__init__.py,sha256=2_o1tz8HFZVKFy8w8oJWdWlVKtwGjGY3z4PQylHKjt0,726
|
|
9
9
|
sequenzo/big_data/clara/utils/aggregatecases.py,sha256=ul97pbnRlwxbFbX_0M4j-Bkyxkp7zMAKatSo4eanO24,2899
|
|
10
10
|
sequenzo/big_data/clara/utils/davies_bouldin.py,sha256=4Y6VFjqopG3CaftQ8tDxQPjxxupJ6Hgv-yTXwCvgN7w,3037
|
|
11
|
-
sequenzo/big_data/clara/utils/get_weighted_diss.c,sha256=
|
|
12
|
-
sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd,sha256=
|
|
13
|
-
sequenzo/big_data/clara/utils/get_weighted_diss.pyx,sha256=UYR-u8MDQEuWID3inKhSpBsuxu7qTFmEwLrjNPBMmUw,430
|
|
11
|
+
sequenzo/big_data/clara/utils/get_weighted_diss.c,sha256=Nj6fZduxi-tdZ9c22NJBAzZRtaV6yYY1he0QTVIp3uE,483794
|
|
12
|
+
sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd,sha256=ikQ93kVblKCBPoJ_aYjU_28hQWb_PZgujWD3QK7lM9E,53248
|
|
14
13
|
sequenzo/big_data/clara/utils/wfcmdd.py,sha256=-1H6CbTteTW-CeuQ_ehVDhnKH3ozcCkUobxoCSRIpYg,7074
|
|
15
14
|
sequenzo/clustering/KMedoids.py,sha256=asktGP0KKgP4TsOH4bSYEWfE8yY5b9BfKa1d54KzaXI,7147
|
|
16
15
|
sequenzo/clustering/__init__.py,sha256=duEY0Hq0-7Kc_lv0uFDK3D8IEXby-7Z0Rjff0EgO0KM,875
|
|
17
|
-
sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd,sha256=
|
|
18
|
-
sequenzo/clustering/hierarchical_clustering.py,sha256=
|
|
16
|
+
sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd,sha256=6u1PJJnSKfNQXw3ZYBc_YTZuEDirJsGzCYqamT-22kw,253440
|
|
17
|
+
sequenzo/clustering/hierarchical_clustering.py,sha256=D4m3Wg3tPWBNXtPTsauxntD5ctx3hGXF9bjL-cB7VM0,61568
|
|
19
18
|
sequenzo/clustering/src/KMedoid.cpp,sha256=Bb4LaRes004T9vyCmUknRS0NUaNr4ZoEpWvtkYGq-jw,9299
|
|
20
19
|
sequenzo/clustering/src/PAM.cpp,sha256=UFXdTy1wMWheYa-fUoi8ASQPmn0Ew-AO7fqVQVxn_E8,8357
|
|
21
20
|
sequenzo/clustering/src/PAMonce.cpp,sha256=C9HqGBRenmF2tnQofALjjU1As02dTw2oqEnuvdhoJIk,7943
|
|
@@ -47,7 +46,7 @@ sequenzo/datasets/polyadic_samplep1.csv,sha256=-2HvKSmevfqe1rWFVJlbnjousEgJRU_PH
|
|
|
47
46
|
sequenzo/datasets/polyadic_seqc1.csv,sha256=ydZ-U8NTszR4lNBN4hhsH_dHfq0w5VZSMM7t9C5Uado,7028
|
|
48
47
|
sequenzo/datasets/polyadic_seqp1.csv,sha256=sydXCR0JBKJlbMxJyGa46cic9XQstUpYFOtHaLmkp_0,7681
|
|
49
48
|
sequenzo/dissimilarity_measures/__init__.py,sha256=qkWAQ1sBpS2aayO-FSA8Zha7rQ_vjs0_KIHEB60bVg4,958
|
|
50
|
-
sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd,sha256=
|
|
49
|
+
sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd,sha256=cCKw6xcnrShAXJAbf-TwhdyOySqXdMSmoU3dQZSBVgw,222208
|
|
51
50
|
sequenzo/dissimilarity_measures/get_distance_matrix.py,sha256=dNZtqg0aN3vAz5r1sTgjS3jwLAzcXGbG4MGWeBEvhag,29886
|
|
52
51
|
sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py,sha256=Yed91VyNHkzeYxIduCTVF8hmJiAmltPH4R0lhvKQoKk,9533
|
|
53
52
|
sequenzo/dissimilarity_measures/src/DHDdistance.cpp,sha256=RbWbSaELxlJiw5ST_JaD-wPx_sD7PGV2VdK4qEhOcxE,4714
|
|
@@ -212,43 +211,38 @@ sequenzo/dissimilarity_measures/src/xsimd/test/doc/writing_vectorized_code.cpp,s
|
|
|
212
211
|
sequenzo/dissimilarity_measures/src/xsimd/test/test_wasm/test_wasm_playwright.py,sha256=9kcOfaW5dDqXs9AJBgmHoUx80tCHdGJ3d2Elr9dOkUo,3980
|
|
213
212
|
sequenzo/dissimilarity_measures/utils/__init__.py,sha256=aZMQJGgJq4GsL1x-pQPLmL7KrJ78cHMH46GVmVE8pJ0,407
|
|
214
213
|
sequenzo/dissimilarity_measures/utils/get_LCP_length_for_2_seq.py,sha256=bJjbEQcjENSAdLv2IMRUWJC4avldwCfHrtSEnlDEACY,1470
|
|
215
|
-
sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c,sha256=
|
|
216
|
-
sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd,sha256=
|
|
217
|
-
sequenzo/dissimilarity_measures/utils/
|
|
218
|
-
sequenzo/dissimilarity_measures/utils/seqconc.
|
|
219
|
-
sequenzo/dissimilarity_measures/utils/
|
|
220
|
-
sequenzo/dissimilarity_measures/utils/
|
|
221
|
-
sequenzo/dissimilarity_measures/utils/
|
|
222
|
-
sequenzo/dissimilarity_measures/utils/
|
|
223
|
-
sequenzo/dissimilarity_measures/utils/
|
|
224
|
-
sequenzo/dissimilarity_measures/utils/
|
|
225
|
-
sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd,sha256=oPaSDV_FtgBCzTCChOwla31Qe0xQuKEWmE4CXIRE2mw,67072
|
|
226
|
-
sequenzo/dissimilarity_measures/utils/seqdur.pyx,sha256=RyBqjdO8SdiCYg7cScdT6uEWPjYX_e7-KUrlQkRkBC0,1288
|
|
227
|
-
sequenzo/dissimilarity_measures/utils/seqlength.c,sha256=wPG-Oo7qfoY-pcn9jyNXzMI5amOyJGqae5d_niJfyQI,493532
|
|
228
|
-
sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd,sha256=ZXVEFQTgqbKO_B157-cXFlsJLNy3fFxCkwGPgFZT5wA,56832
|
|
229
|
-
sequenzo/dissimilarity_measures/utils/seqlength.pyx,sha256=y-792z6X1L4zychHJj5IQVHOfM5JibQ_ITFnkYHJO3c,564
|
|
214
|
+
sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c,sha256=4wwXFfZ1rtGinidgo_FEb7VLvNUasC9eITEeklHygH8,598395
|
|
215
|
+
sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd,sha256=jOqxRpZWE7XweIxZAV88mJ8QuWkLL7o14cgMWFg8fbI,74240
|
|
216
|
+
sequenzo/dissimilarity_measures/utils/seqconc.c,sha256=RL_l7FC9pVhmhyV6JOeTE8XC6KRxCYi3_aIhhuNry7g,505901
|
|
217
|
+
sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd,sha256=0ZxbpJdpGkHpStjvLoZJw_5eh3d7cjA4JbR_E8WJgfs,56832
|
|
218
|
+
sequenzo/dissimilarity_measures/utils/seqdss.c,sha256=taO6RoSucXAnicfDvLMUUsKSsGBKCeLfSC6FNLt_5p4,619965
|
|
219
|
+
sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd,sha256=nEK-JzNkZsCoOqy12jkowfvT1WAum1tHOc0ER-YjpvI,82432
|
|
220
|
+
sequenzo/dissimilarity_measures/utils/seqdur.c,sha256=cViNFXq8Abr37BFRvaqSGaUHIaQ6AvUQ22aVtfbEDRU,542262
|
|
221
|
+
sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd,sha256=vh1RcA7En6v8U5vi1BraqaPK_8wH_K2iIKUY2dDEvPo,67072
|
|
222
|
+
sequenzo/dissimilarity_measures/utils/seqlength.c,sha256=64tQJ2WgRnemHCTV4HXYPalBK1g_6C4ESSinCE_hO18,493507
|
|
223
|
+
sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd,sha256=6U7KbGHnXYCVEmtxyCECVsPj2j6fXKltAT9EDsdnb5k,56832
|
|
230
224
|
sequenzo/multidomain/__init__.py,sha256=bVnbkJXuXj8y5lHreRBQnL1JFcrmlsz2TSt-qFfmWm8,734
|
|
231
|
-
sequenzo/multidomain/association_between_domains.py,sha256=
|
|
225
|
+
sequenzo/multidomain/association_between_domains.py,sha256=tncMzsSn0yhRd9C37-GInHw7FlIucOGdb_affCAc_nk,10844
|
|
232
226
|
sequenzo/multidomain/cat.py,sha256=7QqdEjZSiqDZnXGOWgkOoeRHg0USVAUKPlzXecJeMFQ,18022
|
|
233
|
-
sequenzo/multidomain/combt.py,sha256=
|
|
227
|
+
sequenzo/multidomain/combt.py,sha256=RCqk1GvJaZiLzI85FuiQ82xUn61xQTTVXyv3JmWSiqg,22075
|
|
234
228
|
sequenzo/multidomain/dat.py,sha256=ljhc3kbAl8t8tpNh06eu4lAbR36PFUQCkxXBBtx81lo,3477
|
|
235
229
|
sequenzo/multidomain/idcd.py,sha256=kKSGJfL_c0pBPbbDdR1BJ08GIIErGFj5ft7ieHyhnT8,4526
|
|
236
|
-
sequenzo/multidomain/linked_polyad.py,sha256=
|
|
237
|
-
sequenzo/prefix_tree/__init__.py,sha256=
|
|
238
|
-
sequenzo/prefix_tree/individual_level_indicators.py,sha256=
|
|
230
|
+
sequenzo/multidomain/linked_polyad.py,sha256=efnPq0Jvr4XF_um3En6mEZxPywvrVmTmb75IUK75lgU,13611
|
|
231
|
+
sequenzo/prefix_tree/__init__.py,sha256=Sp6HmMfzzv4C2jrUzAyS3GMW9dSAFwVM79vHtTGrLgU,1124
|
|
232
|
+
sequenzo/prefix_tree/individual_level_indicators.py,sha256=dpzRBjwQboxZhpwscRdlGr19f7jpofepQF1oaxm1V6E,52786
|
|
239
233
|
sequenzo/prefix_tree/system_level_indicators.py,sha256=tGnzRRqwzJbGv-vjOMAzdh6arN6QJkdsybe0Yif57ug,17507
|
|
240
234
|
sequenzo/prefix_tree/utils.py,sha256=7DETf9i_OclRnWel680qD4wO1b8SffJVKq2Kx0zkTaQ,1489
|
|
241
235
|
sequenzo/sequence_characteristics/__init__.py,sha256=dPdBD7K-dhsuLoVYhDDVUj9_DYBLPxSUh9GPJ8y2P4k,1224
|
|
242
236
|
sequenzo/sequence_characteristics/complexity_index.py,sha256=KZ9TpaHtSIkbaqiOqEGRoFDYenrcuIzv34Du_RbL-6A,1762
|
|
243
|
-
sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py,sha256=
|
|
244
|
-
sequenzo/sequence_characteristics/plot_characteristics.py,sha256=
|
|
245
|
-
sequenzo/sequence_characteristics/simple_characteristics.py,sha256=
|
|
237
|
+
sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py,sha256=nRRynM11tPD16cOlsw6PaNIvmpiXhLAuWBGLku4AFF4,9296
|
|
238
|
+
sequenzo/sequence_characteristics/plot_characteristics.py,sha256=55I4Hbpko7QR54HeAKBTrW_meOZDezMVW2JpOwuzHOw,25608
|
|
239
|
+
sequenzo/sequence_characteristics/simple_characteristics.py,sha256=pgINV0jvriJ3Zhi6Rm2lzxFZ0Yxzv-JHDuor62m8DjA,11711
|
|
246
240
|
sequenzo/sequence_characteristics/state_frequencies_and_entropy_per_sequence.py,sha256=S5vzqUrJF_tadIlgd-S-jHfwWo3agFzc1ptAB6CXfPE,1313
|
|
247
241
|
sequenzo/sequence_characteristics/turbulence.py,sha256=bixo1wcGNKYhsNmppORwv_fjy7xQxhGHo-OebG_TnYE,6217
|
|
248
242
|
sequenzo/sequence_characteristics/variance_of_spell_durations.py,sha256=hx00o_ypk3VDUU1VHOI8eN_Fy12CKlOd02NeF_fKl04,3299
|
|
249
243
|
sequenzo/sequence_characteristics/within_sequence_entropy.py,sha256=K9uhkTcy7SkW5By1lNX1DS6mREGj49ElgdPs1erJ-bI,1500
|
|
250
|
-
sequenzo/suffix_tree/__init__.py,sha256=
|
|
251
|
-
sequenzo/suffix_tree/individual_level_indicators.py,sha256=
|
|
244
|
+
sequenzo/suffix_tree/__init__.py,sha256=0CSK_oC6xOrkbkScgOxD15HXwnAxaYuvizv0MSbTh-E,1146
|
|
245
|
+
sequenzo/suffix_tree/individual_level_indicators.py,sha256=EQP1dWPsdI3aOedhCxcFIGIDWEt615x1V_SYZIAH_bs,67208
|
|
252
246
|
sequenzo/suffix_tree/system_level_indicators.py,sha256=N4DrjM9fBHFqqcjDN1TAkwbkoDnlZSK2F8F2ERCizIQ,16962
|
|
253
247
|
sequenzo/suffix_tree/utils.py,sha256=G1qYVNTTZHoUbY7x-j1FlZ-XfnXpGVHpaaa_tE4hMAE,1637
|
|
254
248
|
sequenzo/visualization/__init__.py,sha256=JKYTCx4qbXF7oAE0OrfH_Tw5WhzTU_HJEn7XjJncFBw,944
|
|
@@ -257,16 +251,16 @@ sequenzo/visualization/plot_modal_state.py,sha256=wTAhdlu4px-dJdxM9LSSgDQioW46r-
|
|
|
257
251
|
sequenzo/visualization/plot_most_frequent_sequences.py,sha256=UbkCjPUCKRAVY06Hm8HVSBI9_8iKFxkBdWwoMNmy0BE,6317
|
|
258
252
|
sequenzo/visualization/plot_relative_frequency.py,sha256=tUNyIxc8C0SPjRJJYormBbTxWjvbox-pjaH9suzjRwU,16423
|
|
259
253
|
sequenzo/visualization/plot_sequence_index.py,sha256=qc4h6JzQrDeiBsGvK6Cdn3HwDZFLfNzPqyFOon1ZQ6Q,41079
|
|
260
|
-
sequenzo/visualization/plot_single_medoid.py,sha256=
|
|
254
|
+
sequenzo/visualization/plot_single_medoid.py,sha256=K3d8feYV1S2PmjNdlRD961-bdt-An1-SWw8HKWDcBAc,5971
|
|
261
255
|
sequenzo/visualization/plot_state_distribution.py,sha256=8uH533kwyqxIeGPM4eFJBWJ2eRgqEFPH3EiPJECzvS8,25978
|
|
262
|
-
sequenzo/visualization/plot_transition_matrix.py,sha256=
|
|
256
|
+
sequenzo/visualization/plot_transition_matrix.py,sha256=ZmLyKniHROsnp4Xp2fggpofaskja7wiQNjOUL-jfkFE,6977
|
|
263
257
|
sequenzo/visualization/utils/__init__.py,sha256=brrYzeIQm_cEM_TgA8_eRdckzN9WP1pj9g-f1qBzRLY,734
|
|
264
|
-
sequenzo/visualization/utils/utils.py,sha256=
|
|
258
|
+
sequenzo/visualization/utils/utils.py,sha256=P33amescn1FLcfGwzxDrHpvaELzUHRKt06f3Iky23t0,10246
|
|
265
259
|
sequenzo/with_event_history_analysis/__init__.py,sha256=B2EZhtJ7NEzO8piDwfSbh0l87fQ0ZuesPO5GNJEXKPo,730
|
|
266
|
-
sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py,sha256=
|
|
260
|
+
sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py,sha256=DdNqx8MBaxdLu-n-9RH5q2cSr5sWr18LTNF9qdNaKbM,37300
|
|
267
261
|
sequenzo/with_event_history_analysis/sequence_history_analysis.py,sha256=vv5y2u9cpzhmNJX_fSYgLmFOncPvB7DVhWujljII1vA,10902
|
|
268
|
-
sequenzo-0.1.
|
|
269
|
-
sequenzo-0.1.
|
|
270
|
-
sequenzo-0.1.
|
|
271
|
-
sequenzo-0.1.
|
|
272
|
-
sequenzo-0.1.
|
|
262
|
+
sequenzo-0.1.22.dist-info/licenses/LICENSE,sha256=URRMyLHVeGF2kyDLC1xbRKBBIjDHJyWqF4nWpzfBX10,1497
|
|
263
|
+
sequenzo-0.1.22.dist-info/METADATA,sha256=wpW8q_ogXcCkfVKqQsOJ1AOzgup4NerTa4Th6IH_UOM,14591
|
|
264
|
+
sequenzo-0.1.22.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99
|
|
265
|
+
sequenzo-0.1.22.dist-info/top_level.txt,sha256=yM8eczbPzqB1bRHMYLptvjjQ3p5tYhY6VjgWHUIi9vw,9
|
|
266
|
+
sequenzo-0.1.22.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
cimport numpy as cnp
|
|
3
|
-
|
|
4
|
-
def get_weighted_diss(cnp.ndarray[double, ndim=2] diss,
|
|
5
|
-
cnp.ndarray[double, ndim=1] weights):
|
|
6
|
-
cdef int n = weights.shape[0]
|
|
7
|
-
cdef int i, j
|
|
8
|
-
cdef double factor
|
|
9
|
-
|
|
10
|
-
for i in range(n):
|
|
11
|
-
for j in range(i + 1, n):
|
|
12
|
-
factor = (weights[i] * weights[j]) ** 0.5
|
|
13
|
-
diss[i, j] *= factor
|
|
14
|
-
diss[j, i] = diss[i, j]
|
|
15
|
-
|
|
16
|
-
return diss
|
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
# cython: boundscheck=False, wraparound=False
|
|
2
|
-
import numpy as np
|
|
3
|
-
cimport numpy as np
|
|
4
|
-
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from libc.math cimport isnan
|
|
7
|
-
|
|
8
|
-
def get_sm_trate_substitution_cost_matrix(
|
|
9
|
-
object seqdata,
|
|
10
|
-
bint time_varying=False,
|
|
11
|
-
bint weighted=True,
|
|
12
|
-
int lag=1,
|
|
13
|
-
bint count=False
|
|
14
|
-
):
|
|
15
|
-
"""
|
|
16
|
-
Compute substitution cost matrix (transition rate matrix)
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
from sequenzo.define_sequence_data import SequenceData
|
|
20
|
-
if not isinstance(seqdata, SequenceData):
|
|
21
|
-
raise ValueError("[x] Seqdata must be a pandas DataFrame wrapped in a SequenceData object.")
|
|
22
|
-
|
|
23
|
-
cdef np.ndarray[np.float64_t, ndim=1] weights
|
|
24
|
-
if weighted:
|
|
25
|
-
weights = np.asarray(seqdata.weights, dtype=np.float64)
|
|
26
|
-
else:
|
|
27
|
-
weights = np.ones(seqdata.seqdata.shape[0], dtype=np.float64)
|
|
28
|
-
|
|
29
|
-
states = seqdata.states.copy()
|
|
30
|
-
statesMapping = seqdata.state_mapping.copy()
|
|
31
|
-
|
|
32
|
-
cdef int _size = len(states) + 1
|
|
33
|
-
df = seqdata.seqdata
|
|
34
|
-
cdef int n_rows = df.shape[0]
|
|
35
|
-
cdef int sdur = df.shape[1]
|
|
36
|
-
cdef int i, j, t, sl, state_x, state_y
|
|
37
|
-
cdef double PA, PAB
|
|
38
|
-
|
|
39
|
-
if lag < 0:
|
|
40
|
-
all_transition = list(range(abs(lag), sdur))
|
|
41
|
-
else:
|
|
42
|
-
all_transition = list(range(sdur - lag))
|
|
43
|
-
|
|
44
|
-
cdef int num_transition = len(all_transition)
|
|
45
|
-
|
|
46
|
-
# convert df to NumPy 2D array of ints
|
|
47
|
-
seq_mat = df.to_numpy(dtype=np.float64)
|
|
48
|
-
cdef np.ndarray[np.float64_t, ndim=2] seq_mat_mv = seq_mat
|
|
49
|
-
|
|
50
|
-
if time_varying:
|
|
51
|
-
tmat = np.zeros((num_transition, _size, _size), dtype=np.float64)
|
|
52
|
-
|
|
53
|
-
for idx, sl in enumerate(all_transition):
|
|
54
|
-
for state_x in statesMapping.values():
|
|
55
|
-
PA = 0.0
|
|
56
|
-
for i in range(n_rows):
|
|
57
|
-
if seq_mat_mv[i, sl] == state_x and not isnan(seq_mat_mv[i, sl + lag]):
|
|
58
|
-
PA += weights[i]
|
|
59
|
-
|
|
60
|
-
if PA == 0:
|
|
61
|
-
tmat[idx, state_x, :] = 0
|
|
62
|
-
else:
|
|
63
|
-
for state_y in statesMapping.values():
|
|
64
|
-
PAB = 0.0
|
|
65
|
-
for i in range(n_rows):
|
|
66
|
-
if (seq_mat_mv[i, sl] == state_x and
|
|
67
|
-
not isnan(seq_mat_mv[i, sl + lag]) and
|
|
68
|
-
seq_mat_mv[i, sl + lag] == state_y):
|
|
69
|
-
PAB += weights[i]
|
|
70
|
-
|
|
71
|
-
tmat[idx, state_x, state_y] = PAB if count else PAB / PA
|
|
72
|
-
|
|
73
|
-
else:
|
|
74
|
-
tmat = np.zeros((_size, _size), dtype=np.float64)
|
|
75
|
-
|
|
76
|
-
for state_x in statesMapping.values():
|
|
77
|
-
PA = 0.0
|
|
78
|
-
for i in range(n_rows):
|
|
79
|
-
for t in all_transition:
|
|
80
|
-
if (seq_mat_mv[i, t] == state_x and not isnan(seq_mat_mv[i, t + lag])):
|
|
81
|
-
PA += weights[i]
|
|
82
|
-
|
|
83
|
-
if PA == 0:
|
|
84
|
-
tmat[state_x, :] = 0
|
|
85
|
-
else:
|
|
86
|
-
for state_y in statesMapping.values():
|
|
87
|
-
PAB = 0.0
|
|
88
|
-
for i in range(n_rows):
|
|
89
|
-
for t in all_transition:
|
|
90
|
-
if (seq_mat_mv[i, t] == state_x and seq_mat_mv[i, t + lag] == state_y):
|
|
91
|
-
PAB += weights[i]
|
|
92
|
-
|
|
93
|
-
tmat[state_x, state_y] = PAB if count else PAB / PA
|
|
94
|
-
|
|
95
|
-
return tmat
|