PyPI - sequenzo - Versions diffs - 0.1.19__cp312-cp312-win_amd64.whl → 0.1.21__cp312-cp312-win_amd64.whl - Mend

sequenzo 0.1.19__cp312-cp312-win_amd64.whl → 0.1.21__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sequenzo might be problematic. Click here for more details.

Files changed (45) hide show

sequenzo/big_data/clara/utils/get_weighted_diss.cp312-win_amd64.pyd CHANGED Viewed

Binary file

sequenzo/clustering/clustering_c_code.cp312-win_amd64.pyd CHANGED Viewed

Binary file

sequenzo/clustering/hierarchical_clustering.py CHANGED Viewed

@@ -281,7 +281,7 @@ def _warn_ward_usage_once(matrix, method):
     if not _WARD_WARNING_SHOWN and method.lower() in ["ward", "ward_d", "ward_d2"]:
         if not _check_euclidean_compatibility(matrix, method):
             warnings.warn(
-                "\n⚠️  Ward linkage method detected with potentially non-Euclidean distance matrix!\n"
+                "\n[!] Ward linkage method detected with potentially non-Euclidean distance matrix!\n"
                 "   Ward clustering (both Ward D and Ward D2) assumes Euclidean distances for theoretical validity.\n"
                 "   \n"
                 "   Ward method variants:\n"
@@ -1350,7 +1350,7 @@ if __name__ == '__main__':
     labels = ['further education', 'higher education', 'employment', 'joblessness', 'school', 'training']
     # TODO: write a try and error: if no such a parameter, then ask to pass the right ones
-    # sequence_data = SequenceData(df, time=time, time_type="year", id_col="country", ids=df['country'].values, states=states)
+    # sequence_data = SequenceData(df, time=time, id_col="country", ids=df['country'].values, states=states)
     sequence_data = SequenceData(df,
                                  time=time_list,

sequenzo/define_sequence_data.py CHANGED Viewed

@@ -13,7 +13,7 @@
     However, in this implementation, we require the user to explicitly provide the set of `states`. This explicit control
     is essential for ensuring consistent ordering of states, reproducibility of visualizations, and compatibility across
-    sequence datasets — especially when certain states may not appear in a given subset of the data.
+    sequence datasets - especially when certain states may not appear in a given subset of the data.
     As a result, `alphabet` is automatically set to `states` upon initialization, and kept as a semantic alias for clarity
     and potential compatibility. Users should treat `states` as the definitive state space and are not required to provide
@@ -170,7 +170,7 @@ class SequenceData:
             raise ValueError(
                 f"[!] You must specify a valid `id_col` parameter that exists in your dataset.\n"
                 f"    ID is required to uniquely identify each sequence (e.g., individuals).\n"
-                f"    → Hint: If your data does not have an ID column yet, you can use the helper function:\n\n"
+                f"    -> Hint: If your data does not have an ID column yet, you can use the helper function:\n\n"
                 f"        from sequenzo.utils import assign_unique_ids\n"
                 f"        df = assign_unique_ids(df, id_col_name='Entity ID')\n"
                 f"        df.to_csv('your_dataset_with_ids.csv', index=False)\n\n"
@@ -245,7 +245,7 @@ class SequenceData:
                 print(
                     "[!] Detected missing values (empty cells) in the sequence data.\n"
-                    f"    → Automatically added {example_missing} to `states` and `labels` for compatibility.\n"
+                    f"    -> Automatically added {example_missing} to `states` and `labels` for compatibility.\n"
                     "    However, it's strongly recommended to manually include it when defining `states` and `labels`.\n"
                     "    For example:\n\n"
                     f"        states = [{quote}At Home{quote}, {quote}Left Home{quote}, {example_missing}]\n"
@@ -519,7 +519,7 @@ class SequenceData:
     def flatten_weights(self) -> np.ndarray:
         """
         Repeat weights across sequence length for 1D alignment with flatten().
-        E.g., 5 sequences × 10 steps → repeat each weight 10 times.
+        E.g., 5 sequences x 10 steps -> repeat each weight 10 times.
         """
         return np.repeat(self.weights, self.n_steps)

sequenzo/dissimilarity_measures/c_code.cp312-win_amd64.pyd CHANGED Viewed

Binary file

sequenzo/dissimilarity_measures/get_distance_matrix.py CHANGED Viewed

@@ -641,7 +641,7 @@ if __name__ == '__main__':
     # states = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
     # # states = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
     # labels = ['FT+WC', 'FT+BC', 'PT+WC', 'PT+BC', 'U', 'OLF']
-    # sequence_data = SequenceData(df, time=time_list, time_type="age", states=states, labels=labels, id_col="PID")
+    # sequence_data = SequenceData(df, time=time_list, states=states, labels=labels, id_col="PID")
     # om = get_distance_matrix(sequence_data, method="OM", sm="TRATE", indel="auto")
     # om.to_csv("D:/college/research/QiQi/sequenzo/files/sequenzo_Sohee_string_OM_TRATE.csv", index=True)

sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py CHANGED Viewed

@@ -231,7 +231,7 @@ if __name__ == "__main__":
     states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
-    sequence_data = SequenceData(df, time=time, time_type="year", id_col="country", states=states)
+    sequence_data = SequenceData(df, time=time, id_col="country", states=states)
     sm = get_substitution_cost_matrix(sequence_data,
                                       method="CONSTANT",