sequenzo 0.1.20__cp39-cp39-win_amd64.whl → 0.1.22__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sequenzo might be problematic. Click here for more details.
- sequenzo/big_data/clara/utils/get_weighted_diss.c +195 -195
- sequenzo/big_data/clara/utils/get_weighted_diss.cp39-win_amd64.pyd +0 -0
- sequenzo/clustering/clustering_c_code.cp39-win_amd64.pyd +0 -0
- sequenzo/clustering/hierarchical_clustering.py +1 -1
- sequenzo/define_sequence_data.py +4 -4
- sequenzo/dissimilarity_measures/c_code.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.c +172 -173
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqconc.c +234 -234
- sequenzo/dissimilarity_measures/utils/seqconc.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqdss.c +327 -327
- sequenzo/dissimilarity_measures/utils/seqdss.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqdur.c +327 -327
- sequenzo/dissimilarity_measures/utils/seqdur.cp39-win_amd64.pyd +0 -0
- sequenzo/dissimilarity_measures/utils/seqlength.c +227 -226
- sequenzo/dissimilarity_measures/utils/seqlength.cp39-win_amd64.pyd +0 -0
- sequenzo/multidomain/association_between_domains.py +1 -1
- sequenzo/multidomain/combt.py +4 -4
- sequenzo/multidomain/linked_polyad.py +3 -3
- sequenzo/prefix_tree/__init__.py +1 -1
- sequenzo/prefix_tree/individual_level_indicators.py +2 -2
- sequenzo/sequence_characteristics/overall_cross_sectional_entropy.py +2 -2
- sequenzo/sequence_characteristics/plot_characteristics.py +2 -2
- sequenzo/sequence_characteristics/simple_characteristics.py +2 -2
- sequenzo/suffix_tree/__init__.py +1 -1
- sequenzo/suffix_tree/individual_level_indicators.py +3 -3
- sequenzo/visualization/plot_single_medoid.py +2 -2
- sequenzo/visualization/plot_transition_matrix.py +3 -2
- sequenzo/visualization/utils/utils.py +2 -2
- sequenzo/with_event_history_analysis/sequence_analysis_multi_state_model.py +1 -1
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/METADATA +17 -43
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/RECORD +35 -41
- sequenzo/big_data/clara/utils/get_weighted_diss.pyx +0 -16
- sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.pyx +0 -95
- sequenzo/dissimilarity_measures/utils/seqconc.pyx +0 -26
- sequenzo/dissimilarity_measures/utils/seqdss.pyx +0 -33
- sequenzo/dissimilarity_measures/utils/seqdur.pyx +0 -34
- sequenzo/dissimilarity_measures/utils/seqlength.pyx +0 -19
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/WHEEL +0 -0
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/licenses/LICENSE +0 -0
- {sequenzo-0.1.20.dist-info → sequenzo-0.1.22.dist-info}/top_level.txt +0 -0
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
cimport numpy as cnp
|
|
3
|
-
from libc.stdint cimport int32_t
|
|
4
|
-
from libc.string cimport strcmp
|
|
5
|
-
|
|
6
|
-
cdef str sconc_np(cnp.ndarray[int32_t, ndim=1] seqdata, str sep):
|
|
7
|
-
cdef int i, size = seqdata.shape[0]
|
|
8
|
-
cdef list valid_values = []
|
|
9
|
-
cdef bytes result = b""
|
|
10
|
-
|
|
11
|
-
for i in range(size):
|
|
12
|
-
if seqdata[i] >= 0:
|
|
13
|
-
valid_values.append(str(seqdata[i]))
|
|
14
|
-
|
|
15
|
-
if valid_values:
|
|
16
|
-
result = sep.join(valid_values).encode('utf-8')
|
|
17
|
-
|
|
18
|
-
return result.decode('utf-8')
|
|
19
|
-
|
|
20
|
-
def seqconc(cnp.ndarray[int32_t, ndim=2] data, str sep="-"):
|
|
21
|
-
if data.ndim == 1:
|
|
22
|
-
return sconc_np(data, sep)
|
|
23
|
-
elif data.ndim == 2:
|
|
24
|
-
return np.array([sconc_np(row, sep) for row in data])
|
|
25
|
-
else:
|
|
26
|
-
raise ValueError("Only 1D and 2D arrays are supported.")
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
cimport numpy as cnp
|
|
3
|
-
from sequenzo.define_sequence_data import SequenceData
|
|
4
|
-
from libc.stdint cimport int32_t
|
|
5
|
-
|
|
6
|
-
def seqdss(seqdata):
|
|
7
|
-
if not isinstance(seqdata, SequenceData):
|
|
8
|
-
raise ValueError("[!] data is NOT a sequence object, see SequenceData to create one.")
|
|
9
|
-
|
|
10
|
-
cdef cnp.ndarray[int32_t, ndim=2] seqdatanum = seqdata.values.astype(np.int32, copy=False)
|
|
11
|
-
cdef int n = seqdatanum.shape[0], m = seqdatanum.shape[1]
|
|
12
|
-
cdef int i, j
|
|
13
|
-
|
|
14
|
-
cdef cnp.ndarray[int32_t, ndim=2] ffill = seqdatanum
|
|
15
|
-
cdef cnp.ndarray[char, ndim=2] boundaries = np.ones((n, 1), dtype=bool) # Fixed here
|
|
16
|
-
|
|
17
|
-
for j in range(1, m):
|
|
18
|
-
mask = (ffill[:, j] < 0)
|
|
19
|
-
ffill[mask, j] = ffill[mask, j - 1]
|
|
20
|
-
|
|
21
|
-
boundaries = np.concatenate([boundaries, ffill[:, 1:] != ffill[:, :-1]], axis=1)
|
|
22
|
-
|
|
23
|
-
cdef list groups = [row[boundary & (row >= 0)] for row, boundary in zip(ffill, boundaries)]
|
|
24
|
-
cdef int max_groups = max(len(g) for g in groups)
|
|
25
|
-
|
|
26
|
-
# 使用int32能表示的缺失值标记,避免np.nan转换为int32时的警告
|
|
27
|
-
cdef cnp.ndarray[int32_t, ndim=2] result = np.full((n, max_groups), -999, dtype=np.int32)
|
|
28
|
-
|
|
29
|
-
for i in range(n):
|
|
30
|
-
g = groups[i]
|
|
31
|
-
result[i, :len(g)] = g
|
|
32
|
-
|
|
33
|
-
return result
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
cimport numpy as cnp
|
|
3
|
-
from sequenzo.define_sequence_data import SequenceData
|
|
4
|
-
from libc.stdint cimport int32_t
|
|
5
|
-
|
|
6
|
-
def seqdur(seqdata):
|
|
7
|
-
if not isinstance(seqdata, SequenceData):
|
|
8
|
-
raise ValueError("data is not a sequence object, see SequenceData to create one")
|
|
9
|
-
|
|
10
|
-
cdef cnp.ndarray[int32_t, ndim=2] seqdatanum = seqdata.values.copy().astype(np.int32, copy=False)
|
|
11
|
-
|
|
12
|
-
cdef int n = seqdatanum.shape[0], m = seqdatanum.shape[1]
|
|
13
|
-
cdef int i, j
|
|
14
|
-
|
|
15
|
-
cdef cnp.ndarray[int32_t, ndim=2] ffill = seqdatanum.copy()
|
|
16
|
-
|
|
17
|
-
for j in range(1, m):
|
|
18
|
-
ffill[:, j] = np.where(seqdatanum[:, j] < 0, ffill[:, j - 1], seqdatanum[:, j])
|
|
19
|
-
|
|
20
|
-
cdef cnp.ndarray[char, ndim=2] boundaries = np.concatenate(
|
|
21
|
-
[np.ones((n, 1), dtype=bool), ffill[:, 1:] != ffill[:, :-1]], axis=1
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
cdef cnp.ndarray[int32_t, ndim=2] group_ids = np.cumsum(boundaries, axis=1).astype(np.int32, copy=False)
|
|
25
|
-
cdef cnp.ndarray[char, ndim=2] valid = seqdatanum >= 0
|
|
26
|
-
|
|
27
|
-
cdef cnp.ndarray[int32_t, ndim=2] group_durations = np.zeros((n, m), dtype=np.int32)
|
|
28
|
-
|
|
29
|
-
for i in range(n):
|
|
30
|
-
counts = np.bincount(group_ids[i][valid[i]])
|
|
31
|
-
counts = counts[1:] if counts.size > 0 else np.array([], dtype=int)
|
|
32
|
-
group_durations[i, :len(counts)] = counts
|
|
33
|
-
|
|
34
|
-
return group_durations
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import numpy as np
|
|
3
|
-
cimport numpy as cnp
|
|
4
|
-
from sequenzo.define_sequence_data import SequenceData
|
|
5
|
-
from libc.stdint cimport int32_t
|
|
6
|
-
|
|
7
|
-
def seqlength(seqdata):
|
|
8
|
-
if isinstance(seqdata, SequenceData):
|
|
9
|
-
seqdata = seqdata.seqdata.replace(np.nan, -99)
|
|
10
|
-
|
|
11
|
-
cdef cnp.ndarray[int32_t, ndim=2] seqarray_long
|
|
12
|
-
|
|
13
|
-
if isinstance(seqdata, pd.DataFrame):
|
|
14
|
-
seqarray_long = seqdata.to_numpy(dtype=np.int32)
|
|
15
|
-
return np.sum(seqarray_long > 0, axis=1)
|
|
16
|
-
|
|
17
|
-
else:
|
|
18
|
-
seqarray_long = seqdata
|
|
19
|
-
return np.sum(seqarray_long > 0, axis=1)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|