sequenzo 0.1.20__cp39-cp39-win_amd64.whl → 0.1.22__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sequenzo might be problematic. Click here for more details.
- sequenzo/big_data/clara/utils/get_weighted_diss.c +195 -195
- sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd +0 -0
- sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1 -1
- sequenzo/define_sequence_data.py +4 -4
- sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +172 -173
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.c +234 -234
- sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.c +327 -327
- sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.c +327 -327
- sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.c +227 -226
- sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd +0 -0
- sequenzo/multidomain/association_between_domains.py +1 -1
- sequenzo/multidomain/combt.py +4 -4
- sequenzo/multidomain/linked_polyad.py +3 -3
- sequenzo/prefix_tree/__init__.py +1 -1
- sequenzo/prefix_tree/individual_level_indicators.py +2 -2
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +2 -2
- sequenzo/sequence_characteristics/plot_characteristics.py +2 -2
- sequenzo/sequence_characteristics/simple_characteristics.py +2 -2
- sequenzo/suffix_tree/__init__.py +1 -1
- sequenzo/suffix_tree/individual_level_indicators.py +3 -3
- sequenzo/visualization/plot_single_medoid.py +2 -2
- sequenzo/visualization/plot_transition_matrix.py +3 -2
- sequenzo/visualization/utils/utils.py +2 -2
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +1 -1
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/METADATA +17 -43
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/RECORD +35 -41
- sequenzo/big_data/clara/utils/get_weighted_diss.pyx +0 -16
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.pyx +0 -95
- sequenzo/dissimilarity_measures/utils/seqconc.pyx +0 -26
- sequenzo/dissimilarity_measures/utils/seqdss.pyx +0 -33
- sequenzo/dissimilarity_measures/utils/seqdur.pyx +0 -34
- sequenzo/dissimilarity_measures/utils/seqlength.pyx +0 -19
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/WHEEL +0 -0
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/licenses/LICENSE +0 -0
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
Binary file
|
|
@@ -281,7 +281,7 @@ def _warn_ward_usage_once(matrix, method):
|
|
|
281
281
|
if not _WARD_WARNING_SHOWN and method.lower() in ["ward", "ward_d", "ward_d2"]:
|
|
282
282
|
if not _check_euclidean_compatibility(matrix, method):
|
|
283
283
|
warnings.warn(
|
|
284
|
-
"\n
|
|
284
|
+
"\n[!] Ward linkage method detected with potentially non-Euclidean distance matrix!\n"
|
|
285
285
|
" Ward clustering (both Ward D and Ward D2) assumes Euclidean distances for theoretical validity.\n"
|
|
286
286
|
" \n"
|
|
287
287
|
" Ward method variants:\n"
|
sequenzo/define_sequence_data.py
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
However, in this implementation, we require the user to explicitly provide the set of `states`. This explicit control
|
|
15
15
|
is essential for ensuring consistent ordering of states, reproducibility of visualizations, and compatibility across
|
|
16
|
-
sequence datasets
|
|
16
|
+
sequence datasets - especially when certain states may not appear in a given subset of the data.
|
|
17
17
|
|
|
18
18
|
As a result, `alphabet` is automatically set to `states` upon initialization, and kept as a semantic alias for clarity
|
|
19
19
|
and potential compatibility. Users should treat `states` as the definitive state space and are not required to provide
|
|
@@ -170,7 +170,7 @@ class SequenceData:
|
|
|
170
170
|
raise ValueError(
|
|
171
171
|
f"[!] You must specify a valid `id_col` parameter that exists in your dataset.\n"
|
|
172
172
|
f" ID is required to uniquely identify each sequence (e.g., individuals).\n"
|
|
173
|
-
f"
|
|
173
|
+
f" -> Hint: If your data does not have an ID column yet, you can use the helper function:\n\n"
|
|
174
174
|
f" from sequenzo.utils import assign_unique_ids\n"
|
|
175
175
|
f" df = assign_unique_ids(df, id_col_name='Entity ID')\n"
|
|
176
176
|
f" df.to_csv('your_dataset_with_ids.csv', index=False)\n\n"
|
|
@@ -245,7 +245,7 @@ class SequenceData:
|
|
|
245
245
|
|
|
246
246
|
print(
|
|
247
247
|
"[!] Detected missing values (empty cells) in the sequence data.\n"
|
|
248
|
-
f"
|
|
248
|
+
f" -> Automatically added {example_missing} to `states` and `labels` for compatibility.\n"
|
|
249
249
|
" However, it's strongly recommended to manually include it when defining `states` and `labels`.\n"
|
|
250
250
|
" For example:\n\n"
|
|
251
251
|
f" states = [{quote}At Home{quote}, {quote}Left Home{quote}, {example_missing}]\n"
|
|
@@ -519,7 +519,7 @@ class SequenceData:
|
|
|
519
519
|
def flatten_weights(self) -> np.ndarray:
|
|
520
520
|
"""
|
|
521
521
|
Repeat weights across sequence length for 1D alignment with flatten().
|
|
522
|
-
E.g., 5 sequences
|
|
522
|
+
E.g., 5 sequences x 10 steps -> repeat each weight 10 times.
|
|
523
523
|
"""
|
|
524
524
|
return np.repeat(self.weights, self.n_steps)
|
|
525
525
|
|
|
Binary file
|