sequenzo 0.1.19__cp312-cp312-win_amd64.whl → 0.1.21__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (45) hide show
  1. sequenzo/big_data/clara/utils/get_weighted_diss.c +192 -191
  2. sequenzo/big_data/clara/utils/get_weighted_diss.cp312-win_amd64.pyd +0 -0
  3. sequenzo/clustering/clustering_c_code.cp312-win_amd64.pyd +0 -0
  4. sequenzo/clustering/hierarchical_clustering.py +2 -2
  5. sequenzo/define_sequence_data.py +4 -4
  6. sequenzo/dissimilarity_measures/c_code.cp312-win_amd64.pyd +0 -0
  7. sequenzo/dissimilarity_measures/get_distance_matrix.py +1 -1
  8. sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +1 -1
  9. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +169 -169
  10. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp312-win_amd64.pyd +0 -0
  11. sequenzo/dissimilarity_measures/utils/seqconc.c +231 -230
  12. sequenzo/dissimilarity_measures/utils/seqconc.cp312-win_amd64.pyd +0 -0
  13. sequenzo/dissimilarity_measures/utils/seqdss.c +324 -323
  14. sequenzo/dissimilarity_measures/utils/seqdss.cp312-win_amd64.pyd +0 -0
  15. sequenzo/dissimilarity_measures/utils/seqdur.c +324 -323
  16. sequenzo/dissimilarity_measures/utils/seqdur.cp312-win_amd64.pyd +0 -0
  17. sequenzo/dissimilarity_measures/utils/seqlength.c +224 -222
  18. sequenzo/dissimilarity_measures/utils/seqlength.cp312-win_amd64.pyd +0 -0
  19. sequenzo/multidomain/association_between_domains.py +1 -1
  20. sequenzo/multidomain/combt.py +4 -4
  21. sequenzo/multidomain/dat.py +11 -3
  22. sequenzo/multidomain/idcd.py +0 -2
  23. sequenzo/multidomain/linked_polyad.py +3 -4
  24. sequenzo/prefix_tree/__init__.py +1 -1
  25. sequenzo/prefix_tree/individual_level_indicators.py +2 -2
  26. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +2 -2
  27. sequenzo/sequence_characteristics/plot_characteristics.py +2 -2
  28. sequenzo/sequence_characteristics/simple_characteristics.py +2 -2
  29. sequenzo/suffix_tree/__init__.py +1 -1
  30. sequenzo/suffix_tree/individual_level_indicators.py +3 -3
  31. sequenzo/visualization/plot_single_medoid.py +2 -2
  32. sequenzo/visualization/plot_transition_matrix.py +1 -1
  33. sequenzo/visualization/utils/utils.py +2 -2
  34. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +1 -1
  35. {sequenzo-0.1.19.dist-info → sequenzo-0.1.21.dist-info}/METADATA +37 -6
  36. {sequenzo-0.1.19.dist-info → sequenzo-0.1.21.dist-info}/RECORD +39 -45
  37. sequenzo/big_data/clara/utils/get_weighted_diss.pyx +0 -16
  38. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.pyx +0 -95
  39. sequenzo/dissimilarity_measures/utils/seqconc.pyx +0 -26
  40. sequenzo/dissimilarity_measures/utils/seqdss.pyx +0 -33
  41. sequenzo/dissimilarity_measures/utils/seqdur.pyx +0 -34
  42. sequenzo/dissimilarity_measures/utils/seqlength.pyx +0 -19
  43. {sequenzo-0.1.19.dist-info → sequenzo-0.1.21.dist-info}/WHEEL +0 -0
  44. {sequenzo-0.1.19.dist-info → sequenzo-0.1.21.dist-info}/licenses/LICENSE +0 -0
  45. {sequenzo-0.1.19.dist-info → sequenzo-0.1.21.dist-info}/top_level.txt +0 -0
@@ -281,7 +281,7 @@ def _warn_ward_usage_once(matrix, method):
281
281
  if not _WARD_WARNING_SHOWN and method.lower() in ["ward", "ward_d", "ward_d2"]:
282
282
  if not _check_euclidean_compatibility(matrix, method):
283
283
  warnings.warn(
284
- "\n⚠️ Ward linkage method detected with potentially non-Euclidean distance matrix!\n"
284
+ "\n[!] Ward linkage method detected with potentially non-Euclidean distance matrix!\n"
285
285
  " Ward clustering (both Ward D and Ward D2) assumes Euclidean distances for theoretical validity.\n"
286
286
  " \n"
287
287
  " Ward method variants:\n"
@@ -1350,7 +1350,7 @@ if __name__ == '__main__':
1350
1350
  labels = ['further education', 'higher education', 'employment', 'joblessness', 'school', 'training']
1351
1351
 
1352
1352
  # TODO: write a try and error: if no such a parameter, then ask to pass the right ones
1353
- # sequence_data = SequenceData(df, time=time, time_type="year", id_col="country", ids=df['country'].values, states=states)
1353
+ # sequence_data = SequenceData(df, time=time, id_col="country", ids=df['country'].values, states=states)
1354
1354
 
1355
1355
  sequence_data = SequenceData(df,
1356
1356
  time=time_list,
@@ -13,7 +13,7 @@
13
13
 
14
14
  However, in this implementation, we require the user to explicitly provide the set of `states`. This explicit control
15
15
  is essential for ensuring consistent ordering of states, reproducibility of visualizations, and compatibility across
16
- sequence datasets especially when certain states may not appear in a given subset of the data.
16
+ sequence datasets - especially when certain states may not appear in a given subset of the data.
17
17
 
18
18
  As a result, `alphabet` is automatically set to `states` upon initialization, and kept as a semantic alias for clarity
19
19
  and potential compatibility. Users should treat `states` as the definitive state space and are not required to provide
@@ -170,7 +170,7 @@ class SequenceData:
170
170
  raise ValueError(
171
171
  f"[!] You must specify a valid `id_col` parameter that exists in your dataset.\n"
172
172
  f" ID is required to uniquely identify each sequence (e.g., individuals).\n"
173
- f" Hint: If your data does not have an ID column yet, you can use the helper function:\n\n"
173
+ f" -> Hint: If your data does not have an ID column yet, you can use the helper function:\n\n"
174
174
  f" from sequenzo.utils import assign_unique_ids\n"
175
175
  f" df = assign_unique_ids(df, id_col_name='Entity ID')\n"
176
176
  f" df.to_csv('your_dataset_with_ids.csv', index=False)\n\n"
@@ -245,7 +245,7 @@ class SequenceData:
245
245
 
246
246
  print(
247
247
  "[!] Detected missing values (empty cells) in the sequence data.\n"
248
- f" Automatically added {example_missing} to `states` and `labels` for compatibility.\n"
248
+ f" -> Automatically added {example_missing} to `states` and `labels` for compatibility.\n"
249
249
  " However, it's strongly recommended to manually include it when defining `states` and `labels`.\n"
250
250
  " For example:\n\n"
251
251
  f" states = [{quote}At Home{quote}, {quote}Left Home{quote}, {example_missing}]\n"
@@ -519,7 +519,7 @@ class SequenceData:
519
519
  def flatten_weights(self) -> np.ndarray:
520
520
  """
521
521
  Repeat weights across sequence length for 1D alignment with flatten().
522
- E.g., 5 sequences × 10 steps repeat each weight 10 times.
522
+ E.g., 5 sequences x 10 steps -> repeat each weight 10 times.
523
523
  """
524
524
  return np.repeat(self.weights, self.n_steps)
525
525
 
@@ -641,7 +641,7 @@ if __name__ == '__main__':
641
641
  # states = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
642
642
  # # states = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
643
643
  # labels = ['FT+WC', 'FT+BC', 'PT+WC', 'PT+BC', 'U', 'OLF']
644
- # sequence_data = SequenceData(df, time=time_list, time_type="age", states=states, labels=labels, id_col="PID")
644
+ # sequence_data = SequenceData(df, time=time_list, states=states, labels=labels, id_col="PID")
645
645
  # om = get_distance_matrix(sequence_data, method="OM", sm="TRATE", indel="auto")
646
646
 
647
647
  # om.to_csv("D:/college/research/QiQi/sequenzo/files/sequenzo_Sohee_string_OM_TRATE.csv", index=True)
@@ -231,7 +231,7 @@ if __name__ == "__main__":
231
231
 
232
232
  states = ['Very Low', 'Low', 'Middle', 'High', 'Very High']
233
233
 
234
- sequence_data = SequenceData(df, time=time, time_type="year", id_col="country", states=states)
234
+ sequence_data = SequenceData(df, time=time, id_col="country", states=states)
235
235
 
236
236
  sm = get_substitution_cost_matrix(sequence_data,
237
237
  method="CONSTANT",