sequenzo 0.1.20__cp39-cp39-win_amd64.whl → 0.1.22__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sequenzo might be problematic. Click here for more details.

Files changed (41) hide show
  1. sequenzo/big_data/clara/utils/get_weighted_diss.c +195 -195
  2. sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd +0 -0
  3. sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd +0 -0
  4. sequenzo/clustering/hierarchical_clustering.py +1 -1
  5. sequenzo/define_sequence_data.py +4 -4
  6. sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd +0 -0
  7. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +172 -173
  8. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd +0 -0
  9. sequenzo/dissimilarity_measures/utils/seqconc.c +234 -234
  10. sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd +0 -0
  11. sequenzo/dissimilarity_measures/utils/seqdss.c +327 -327
  12. sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd +0 -0
  13. sequenzo/dissimilarity_measures/utils/seqdur.c +327 -327
  14. sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd +0 -0
  15. sequenzo/dissimilarity_measures/utils/seqlength.c +227 -226
  16. sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd +0 -0
  17. sequenzo/multidomain/association_between_domains.py +1 -1
  18. sequenzo/multidomain/combt.py +4 -4
  19. sequenzo/multidomain/linked_polyad.py +3 -3
  20. sequenzo/prefix_tree/__init__.py +1 -1
  21. sequenzo/prefix_tree/individual_level_indicators.py +2 -2
  22. sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +2 -2
  23. sequenzo/sequence_characteristics/plot_characteristics.py +2 -2
  24. sequenzo/sequence_characteristics/simple_characteristics.py +2 -2
  25. sequenzo/suffix_tree/__init__.py +1 -1
  26. sequenzo/suffix_tree/individual_level_indicators.py +3 -3
  27. sequenzo/visualization/plot_single_medoid.py +2 -2
  28. sequenzo/visualization/plot_transition_matrix.py +3 -2
  29. sequenzo/visualization/utils/utils.py +2 -2
  30. sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +1 -1
  31. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/METADATA +17 -43
  32. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/RECORD +35 -41
  33. sequenzo/big_data/clara/utils/get_weighted_diss.pyx +0 -16
  34. sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.pyx +0 -95
  35. sequenzo/dissimilarity_measures/utils/seqconc.pyx +0 -26
  36. sequenzo/dissimilarity_measures/utils/seqdss.pyx +0 -33
  37. sequenzo/dissimilarity_measures/utils/seqdur.pyx +0 -34
  38. sequenzo/dissimilarity_measures/utils/seqlength.pyx +0 -19
  39. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/WHEEL +0 -0
  40. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/licenses/LICENSE +0 -0
  41. {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/top_level.txt +0 -0
@@ -1,26 +0,0 @@
1
- import numpy as np
2
- cimport numpy as cnp
3
- from libc.stdint cimport int32_t
4
- from libc.string cimport strcmp
5
-
6
- cdef str sconc_np(cnp.ndarray[int32_t, ndim=1] seqdata, str sep):
7
- cdef int i, size = seqdata.shape[0]
8
- cdef list valid_values = []
9
- cdef bytes result = b""
10
-
11
- for i in range(size):
12
- if seqdata[i] >= 0:
13
- valid_values.append(str(seqdata[i]))
14
-
15
- if valid_values:
16
- result = sep.join(valid_values).encode('utf-8')
17
-
18
- return result.decode('utf-8')
19
-
20
- def seqconc(cnp.ndarray[int32_t, ndim=2] data, str sep="-"):
21
- if data.ndim == 1:
22
- return sconc_np(data, sep)
23
- elif data.ndim == 2:
24
- return np.array([sconc_np(row, sep) for row in data])
25
- else:
26
- raise ValueError("Only 1D and 2D arrays are supported.")
@@ -1,33 +0,0 @@
1
- import numpy as np
2
- cimport numpy as cnp
3
- from sequenzo.define_sequence_data import SequenceData
4
- from libc.stdint cimport int32_t
5
-
6
- def seqdss(seqdata):
7
- if not isinstance(seqdata, SequenceData):
8
- raise ValueError("[!] data is NOT a sequence object, see SequenceData to create one.")
9
-
10
- cdef cnp.ndarray[int32_t, ndim=2] seqdatanum = seqdata.values.astype(np.int32, copy=False)
11
- cdef int n = seqdatanum.shape[0], m = seqdatanum.shape[1]
12
- cdef int i, j
13
-
14
- cdef cnp.ndarray[int32_t, ndim=2] ffill = seqdatanum
15
- cdef cnp.ndarray[char, ndim=2] boundaries = np.ones((n, 1), dtype=bool) # Fixed here
16
-
17
- for j in range(1, m):
18
- mask = (ffill[:, j] < 0)
19
- ffill[mask, j] = ffill[mask, j - 1]
20
-
21
- boundaries = np.concatenate([boundaries, ffill[:, 1:] != ffill[:, :-1]], axis=1)
22
-
23
- cdef list groups = [row[boundary & (row >= 0)] for row, boundary in zip(ffill, boundaries)]
24
- cdef int max_groups = max(len(g) for g in groups)
25
-
26
- # 使用int32能表示的缺失值标记,避免np.nan转换为int32时的警告
27
- cdef cnp.ndarray[int32_t, ndim=2] result = np.full((n, max_groups), -999, dtype=np.int32)
28
-
29
- for i in range(n):
30
- g = groups[i]
31
- result[i, :len(g)] = g
32
-
33
- return result
@@ -1,34 +0,0 @@
1
- import numpy as np
2
- cimport numpy as cnp
3
- from sequenzo.define_sequence_data import SequenceData
4
- from libc.stdint cimport int32_t
5
-
6
- def seqdur(seqdata):
7
- if not isinstance(seqdata, SequenceData):
8
- raise ValueError("data is not a sequence object, see SequenceData to create one")
9
-
10
- cdef cnp.ndarray[int32_t, ndim=2] seqdatanum = seqdata.values.copy().astype(np.int32, copy=False)
11
-
12
- cdef int n = seqdatanum.shape[0], m = seqdatanum.shape[1]
13
- cdef int i, j
14
-
15
- cdef cnp.ndarray[int32_t, ndim=2] ffill = seqdatanum.copy()
16
-
17
- for j in range(1, m):
18
- ffill[:, j] = np.where(seqdatanum[:, j] < 0, ffill[:, j - 1], seqdatanum[:, j])
19
-
20
- cdef cnp.ndarray[char, ndim=2] boundaries = np.concatenate(
21
- [np.ones((n, 1), dtype=bool), ffill[:, 1:] != ffill[:, :-1]], axis=1
22
- )
23
-
24
- cdef cnp.ndarray[int32_t, ndim=2] group_ids = np.cumsum(boundaries, axis=1).astype(np.int32, copy=False)
25
- cdef cnp.ndarray[char, ndim=2] valid = seqdatanum >= 0
26
-
27
- cdef cnp.ndarray[int32_t, ndim=2] group_durations = np.zeros((n, m), dtype=np.int32)
28
-
29
- for i in range(n):
30
- counts = np.bincount(group_ids[i][valid[i]])
31
- counts = counts[1:] if counts.size > 0 else np.array([], dtype=int)
32
- group_durations[i, :len(counts)] = counts
33
-
34
- return group_durations
@@ -1,19 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- cimport numpy as cnp
4
- from sequenzo.define_sequence_data import SequenceData
5
- from libc.stdint cimport int32_t
6
-
7
- def seqlength(seqdata):
8
- if isinstance(seqdata, SequenceData):
9
- seqdata = seqdata.seqdata.replace(np.nan, -99)
10
-
11
- cdef cnp.ndarray[int32_t, ndim=2] seqarray_long
12
-
13
- if isinstance(seqdata, pd.DataFrame):
14
- seqarray_long = seqdata.to_numpy(dtype=np.int32)
15
- return np.sum(seqarray_long > 0, axis=1)
16
-
17
- else:
18
- seqarray_long = seqdata
19
- return np.sum(seqarray_long > 0, axis=1)