sequenzo 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sequenzo-0.1.0/.gitattributes +69 -0
- sequenzo-0.1.0/.github/workflows/python-app.yml +212 -0
- sequenzo-0.1.0/.gitignore +37 -0
- sequenzo-0.1.0/LICENSE +28 -0
- sequenzo-0.1.0/PKG-INFO +141 -0
- sequenzo-0.1.0/README.md +81 -0
- sequenzo-0.1.0/Tutorials/01_quickstart.ipynb +830 -0
- sequenzo-0.1.0/Tutorials/test.ipynb +3911 -0
- sequenzo-0.1.0/__init__.py +49 -0
- sequenzo-0.1.0/original_datasets_and_cleaning/country_co2_emissions_missing.csv +195 -0
- sequenzo-0.1.0/original_datasets_and_cleaning/country_co2_gdp_gapminder_data.ipynb +353 -0
- sequenzo-0.1.0/original_datasets_and_cleaning/data_sources/gapminder/Output_CO2 Long Series 1800 - 2022 - Output.csv +43263 -0
- sequenzo-0.1.0/original_datasets_and_cleaning/data_sources/gapminder/co2_pcap_cons.csv +195 -0
- sequenzo-0.1.0/original_datasets_and_cleaning/examples_output_display_for_documents.ipynb +4223 -0
- sequenzo-0.1.0/pyproject.toml +68 -0
- sequenzo-0.1.0/requirements-3.10.txt +10 -0
- sequenzo-0.1.0/requirements-3.11.txt +10 -0
- sequenzo-0.1.0/requirements-3.9.txt +10 -0
- sequenzo-0.1.0/requirements-dev.txt +7 -0
- sequenzo-0.1.0/sequenzo/__init__.py +36 -0
- sequenzo-0.1.0/sequenzo/big_data/__init__.py +6 -0
- sequenzo-0.1.0/sequenzo/big_data/clara/__init__.py +22 -0
- sequenzo-0.1.0/sequenzo/big_data/clara/clara.py +412 -0
- sequenzo-0.1.0/sequenzo/big_data/clara/utils/__init__.py +26 -0
- sequenzo-0.1.0/sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
- sequenzo-0.1.0/sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
- sequenzo-0.1.0/sequenzo/big_data/clara/utils/k_medoids_once.py +77 -0
- sequenzo-0.1.0/sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
- sequenzo-0.1.0/sequenzo/clustering/__init__.py +28 -0
- sequenzo-0.1.0/sequenzo/clustering/hierarchical_clustering.py +596 -0
- sequenzo-0.1.0/sequenzo/clustering/utils/__init__.py +27 -0
- sequenzo-0.1.0/sequenzo/clustering/utils/disscenter.py +118 -0
- sequenzo-0.1.0/sequenzo/datasets/__init__.py +41 -0
- sequenzo-0.1.0/sequenzo/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- sequenzo-0.1.0/sequenzo/datasets/chinese_colonial_territories.csv +12 -0
- sequenzo-0.1.0/sequenzo/datasets/country_co2_emissions.csv +195 -0
- sequenzo-0.1.0/sequenzo/datasets/country_gdp_per_capita.csv +194 -0
- sequenzo-0.1.0/sequenzo/define_sequence_data.py +248 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/__init__.py +29 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/lib.win-amd64-cpython-38/example.cp38-win_amd64.pyd +0 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/temp.win-amd64-cpython-38/example.cp38-win_amd64.exp +0 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/temp.win-amd64-cpython-38/example.cp38-win_amd64.lib +0 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/temp.win-amd64-cpython-38/module.obj +0 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/get_distance_matrix.py +582 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +179 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/DHDdistance.cpp +141 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/OMdistance.cpp +214 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +244 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/PAMonce.cpp +166 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/dist2matrix.cpp +54 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/module.cpp +38 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/weightedinertia.cpp +72 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/__init__.py +15 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.py +134 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqconc.py +42 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqdss.py +57 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqdur.py +70 -0
- sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqlength.py +28 -0
- sequenzo-0.1.0/sequenzo/visualization/__init__.py +39 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_mean_time.py +142 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_modal_state.py +215 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_most_frequent_sequences.py +104 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_relative_frequency.py +331 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_sequence_index.py +256 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_single_medoid.py +124 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_state_distribution.py +340 -0
- sequenzo-0.1.0/sequenzo/visualization/plot_transition_matrix.py +171 -0
- sequenzo-0.1.0/sequenzo/visualization/utils/__init__.py +17 -0
- sequenzo-0.1.0/sequenzo/visualization/utils/utils.py +182 -0
- sequenzo-0.1.0/sequenzo.egg-info/PKG-INFO +141 -0
- sequenzo-0.1.0/sequenzo.egg-info/SOURCES.txt +78 -0
- sequenzo-0.1.0/sequenzo.egg-info/dependency_links.txt +1 -0
- sequenzo-0.1.0/sequenzo.egg-info/requires.txt +15 -0
- sequenzo-0.1.0/sequenzo.egg-info/top_level.txt +1 -0
- sequenzo-0.1.0/setup.cfg +4 -0
- sequenzo-0.1.0/setup.py +61 -0
- sequenzo-0.1.0/tests/__init__.py +8 -0
- sequenzo-0.1.0/tests/test_basic.py +11 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# 设置默认行为,所有文件都采用LF行结束符和UTF-8编码
|
|
2
|
+
* text=auto eol=lf encoding=utf-8
|
|
3
|
+
|
|
4
|
+
# 源代码文件
|
|
5
|
+
*.py text diff=python encoding=utf-8
|
|
6
|
+
*.c text diff=c encoding=utf-8
|
|
7
|
+
*.cpp text diff=cpp encoding=utf-8
|
|
8
|
+
*.h text diff=c encoding=utf-8
|
|
9
|
+
*.hpp text diff=cpp encoding=utf-8
|
|
10
|
+
*.pyx text diff=python encoding=utf-8
|
|
11
|
+
*.pxd text diff=python encoding=utf-8
|
|
12
|
+
|
|
13
|
+
# 配置文件
|
|
14
|
+
*.toml text encoding=utf-8
|
|
15
|
+
*.ini text encoding=utf-8
|
|
16
|
+
*.yaml text encoding=utf-8
|
|
17
|
+
*.yml text encoding=utf-8
|
|
18
|
+
*.json text encoding=utf-8
|
|
19
|
+
*.cfg text encoding=utf-8
|
|
20
|
+
*.conf text encoding=utf-8
|
|
21
|
+
requirements*.txt text encoding=utf-8
|
|
22
|
+
pyproject.toml text encoding=utf-8
|
|
23
|
+
setup.py text encoding=utf-8
|
|
24
|
+
setup.cfg text encoding=utf-8
|
|
25
|
+
|
|
26
|
+
# 文档文件
|
|
27
|
+
*.txt text encoding=utf-8
|
|
28
|
+
*.md text encoding=utf-8
|
|
29
|
+
*.rst text encoding=utf-8
|
|
30
|
+
LICENSE text encoding=utf-8
|
|
31
|
+
README* text encoding=utf-8
|
|
32
|
+
CHANGELOG* text encoding=utf-8
|
|
33
|
+
|
|
34
|
+
# 在Windows系统上保留CRLF行尾
|
|
35
|
+
*.bat text eol=crlf encoding=utf-8
|
|
36
|
+
*.cmd text eol=crlf encoding=utf-8
|
|
37
|
+
*.ps1 text eol=crlf encoding=utf-8
|
|
38
|
+
|
|
39
|
+
# 特殊文件处理
|
|
40
|
+
Makefile text eol=lf encoding=utf-8
|
|
41
|
+
makefile text eol=lf encoding=utf-8
|
|
42
|
+
|
|
43
|
+
# 数据文件(作为二进制处理,不进行任何转换)
|
|
44
|
+
*.db binary
|
|
45
|
+
*.p binary
|
|
46
|
+
*.pkl binary
|
|
47
|
+
*.pickle binary
|
|
48
|
+
*.pyc binary
|
|
49
|
+
*.pyd binary
|
|
50
|
+
*.pyo binary
|
|
51
|
+
|
|
52
|
+
# 图像文件(二进制)
|
|
53
|
+
*.png binary
|
|
54
|
+
*.jpg binary
|
|
55
|
+
*.jpeg binary
|
|
56
|
+
*.gif binary
|
|
57
|
+
*.ico binary
|
|
58
|
+
*.svg text encoding=utf-8
|
|
59
|
+
|
|
60
|
+
# Excel和CSV文件(作为文本文件处理,但特别注意编码)
|
|
61
|
+
*.csv text encoding=utf-8
|
|
62
|
+
*.tsv text encoding=utf-8
|
|
63
|
+
*.xls binary
|
|
64
|
+
*.xlsx binary
|
|
65
|
+
|
|
66
|
+
# 压缩文件(二进制)
|
|
67
|
+
*.gz binary
|
|
68
|
+
*.zip binary
|
|
69
|
+
*.7z binary
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
# GitHub Actions workflow for Sequenzo package
|
|
2
|
+
name: Sequenzo Package CI
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
push:
|
|
6
|
+
branches: [ "main" ]
|
|
7
|
+
pull_request:
|
|
8
|
+
branches: [ "main" ]
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
build:
|
|
15
|
+
runs-on: ${{ matrix.os }}
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: false
|
|
18
|
+
matrix:
|
|
19
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
20
|
+
python-version: ['3.9', '3.10', '3.11']
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- name: Checkout repository
|
|
24
|
+
uses: actions/checkout@v4
|
|
25
|
+
|
|
26
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
27
|
+
uses: actions/setup-python@v4
|
|
28
|
+
with:
|
|
29
|
+
python-version: ${{ matrix.python-version }}
|
|
30
|
+
|
|
31
|
+
# Clear the `pip` cache to prevent conflicts
|
|
32
|
+
- name: Clear pip cache
|
|
33
|
+
run: python -m pip cache purge
|
|
34
|
+
|
|
35
|
+
# macOS: Handle dependency installation, using different strategies for different Python versions
|
|
36
|
+
- name: Install dependencies (macOS)
|
|
37
|
+
if: runner.os == 'macOS'
|
|
38
|
+
run: |
|
|
39
|
+
python -m pip install --upgrade pip
|
|
40
|
+
pip install Cython==0.29.36
|
|
41
|
+
|
|
42
|
+
if [ "${{ matrix.python-version }}" == "3.11" ]; then
|
|
43
|
+
pip install "setuptools>=60.0.0"
|
|
44
|
+
else
|
|
45
|
+
pip install "setuptools==58.1.0"
|
|
46
|
+
fi
|
|
47
|
+
|
|
48
|
+
if [ "${{ matrix.python-version }}" == "3.11" ]; then
|
|
49
|
+
pip install "numpy>=1.23.2,<2.0" --only-binary numpy
|
|
50
|
+
elif [ "${{ matrix.python-version }}" == "3.10" ]; then
|
|
51
|
+
pip install "numpy>=1.22.4,<2.0" --only-binary numpy
|
|
52
|
+
else
|
|
53
|
+
pip install "numpy==1.22.4" --only-binary numpy || pip install "numpy>=1.21.0,<2.0" --only-binary numpy
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
pip install pybind11
|
|
57
|
+
|
|
58
|
+
if [ "${{ matrix.python-version }}" == "3.11" ]; then
|
|
59
|
+
pip install "scipy>=1.8.0"
|
|
60
|
+
else
|
|
61
|
+
pip install "scipy==1.7.3" || pip install "scipy>=1.7.0"
|
|
62
|
+
fi
|
|
63
|
+
|
|
64
|
+
python -m pip install --upgrade wheel
|
|
65
|
+
pip install fastcluster==1.2.6 || pip install fastcluster
|
|
66
|
+
|
|
67
|
+
if [ -f "requirements-${{ matrix.python-version }}.txt" ]; then
|
|
68
|
+
pip install -r requirements-${{ matrix.python-version }}.txt --no-build-isolation || echo "⚠️ Some requirements may not be installed"
|
|
69
|
+
else
|
|
70
|
+
pip install -r requirements-3.9.txt --no-build-isolation || echo "⚠️ Some requirements may not be installed"
|
|
71
|
+
echo "⚠️ Using requirements-3.9.txt as fallback"
|
|
72
|
+
fi
|
|
73
|
+
|
|
74
|
+
# macOS: Ensure that Xcode CLI tools are installed – required for all Python versions
|
|
75
|
+
- name: Install Xcode Command Line Tools
|
|
76
|
+
if: runner.os == 'macOS'
|
|
77
|
+
run: |
|
|
78
|
+
xcode-select --install || echo "Xcode CLI already installed"
|
|
79
|
+
|
|
80
|
+
# Windows: Handle numpy & fastcluster version issues, install the complete MSVC toolchain
|
|
81
|
+
- name: Install dependencies (Windows)
|
|
82
|
+
if: runner.os == 'Windows'
|
|
83
|
+
shell: pwsh # 确保使用 PowerShell,而不是默认 cmd.exe
|
|
84
|
+
run: |
|
|
85
|
+
choco install visualstudio2022buildtools --version=17.0.0 -y
|
|
86
|
+
choco install visualstudio2022-workload-vctools -y
|
|
87
|
+
|
|
88
|
+
python -m pip install --upgrade pip
|
|
89
|
+
|
|
90
|
+
if ("${{ matrix.python-version }}" -eq "3.11") {
|
|
91
|
+
pip install "numpy>=1.23.2,<2.0"
|
|
92
|
+
} else {
|
|
93
|
+
pip install numpy==1.22.4 --only-binary numpy
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
pip install wheel setuptools cmake Cython
|
|
97
|
+
|
|
98
|
+
pip uninstall -y pybind11
|
|
99
|
+
pip install pybind11==2.10.4
|
|
100
|
+
|
|
101
|
+
pip install --force-reinstall fastcluster==1.2.6
|
|
102
|
+
|
|
103
|
+
if (Test-Path "requirements-${{ matrix.python-version }}.txt") {
|
|
104
|
+
pip install -r requirements-${{ matrix.python-version }}.txt --use-deprecated=legacy-resolver
|
|
105
|
+
} else {
|
|
106
|
+
pip install -r requirements-3.9.txt --use-deprecated=legacy-resolver
|
|
107
|
+
Write-Host "⚠️ Using requirements-3.9.txt as fallback"
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# Linux: Handle fastcluster version issues, ensure numpy is installed first
|
|
111
|
+
- name: Install dependencies (Linux)
|
|
112
|
+
if: runner.os == 'Linux'
|
|
113
|
+
run: |
|
|
114
|
+
python -m pip install --upgrade pip
|
|
115
|
+
pip install wheel setuptools
|
|
116
|
+
|
|
117
|
+
sudo apt-get update
|
|
118
|
+
sudo apt-get install -y build-essential
|
|
119
|
+
|
|
120
|
+
if [ "${{ matrix.python-version }}" == "3.11" ]; then
|
|
121
|
+
pip install "numpy>=1.23.2,<2.0" --only-binary numpy
|
|
122
|
+
else
|
|
123
|
+
pip install numpy==1.22.4 --only-binary numpy
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
pip install pybind11
|
|
127
|
+
pip install fastcluster==1.2.6
|
|
128
|
+
|
|
129
|
+
if [ -f "requirements-${{ matrix.python-version }}.txt" ]; then
|
|
130
|
+
pip install -r requirements-${{ matrix.python-version }}.txt --no-build-isolation
|
|
131
|
+
else
|
|
132
|
+
pip install -r requirements-3.9.txt --no-build-isolation
|
|
133
|
+
echo "⚠️ Using requirements-3.9.txt as fallback"
|
|
134
|
+
fi
|
|
135
|
+
|
|
136
|
+
# Ensure `PYTHONPATH` is set correctly
|
|
137
|
+
- name: Verify package path
|
|
138
|
+
shell: bash
|
|
139
|
+
run: |
|
|
140
|
+
echo "📂 Current directory:"
|
|
141
|
+
ls -lah
|
|
142
|
+
echo "📂 Sequenzo package directory:"
|
|
143
|
+
ls -lah sequenzo || echo "⚠️ Directory 'sequenzo' not found!"
|
|
144
|
+
|
|
145
|
+
# Windows: Fix Unicode encoding issues
|
|
146
|
+
- name: Set Windows Encoding
|
|
147
|
+
if: runner.os == 'Windows'
|
|
148
|
+
run: chcp 65001
|
|
149
|
+
|
|
150
|
+
- name: Setup Windows C++ compilation
|
|
151
|
+
if: runner.os == 'Windows'
|
|
152
|
+
shell: pwsh
|
|
153
|
+
run: |
|
|
154
|
+
choco install visualstudio2019buildtools -y
|
|
155
|
+
choco install visualstudio2019-workload-vctools -y
|
|
156
|
+
|
|
157
|
+
& "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
|
|
158
|
+
|
|
159
|
+
$Env:CL = "/EHsc /MD /bigobj /Ox"
|
|
160
|
+
$Env:DISTUTILS_USE_SDK = "1"
|
|
161
|
+
$Env:MSSdk = "1"
|
|
162
|
+
|
|
163
|
+
# Consolidated installation steps for all platforms
|
|
164
|
+
- name: Install local package
|
|
165
|
+
if: runner.os != 'Windows'
|
|
166
|
+
run: |
|
|
167
|
+
pip install --no-cache-dir -e . --no-build-isolation
|
|
168
|
+
env:
|
|
169
|
+
PYTHONUTF8: 1
|
|
170
|
+
PYTHONPATH: ${{ github.workspace }}
|
|
171
|
+
|
|
172
|
+
- name: Install local package (Windows)
|
|
173
|
+
if: runner.os == 'Windows'
|
|
174
|
+
shell: pwsh
|
|
175
|
+
run: |
|
|
176
|
+
$editable_install = $false
|
|
177
|
+
try {
|
|
178
|
+
pip install --no-cache-dir -e . --no-build-isolation --verbose
|
|
179
|
+
$editable_install = $true
|
|
180
|
+
} catch {
|
|
181
|
+
Write-Host "ERROR DETAILS: $_"
|
|
182
|
+
Write-Host "Editable install failed, trying regular install..."
|
|
183
|
+
pip install --no-cache-dir . --no-build-isolation --verbose
|
|
184
|
+
}
|
|
185
|
+
env:
|
|
186
|
+
PYTHONUTF8: 1
|
|
187
|
+
PYTHONPATH: ${{ github.workspace }}
|
|
188
|
+
DISTUTILS_USE_SDK: 1
|
|
189
|
+
MSSdk: 1
|
|
190
|
+
|
|
191
|
+
# Display installed packages
|
|
192
|
+
- name: Show installed packages
|
|
193
|
+
run: pip list
|
|
194
|
+
|
|
195
|
+
# Test whether the module is installed correctly
|
|
196
|
+
- name: Test imports
|
|
197
|
+
run: |
|
|
198
|
+
python -c "import sequenzo.dissimilarity_measures; print('Dissimilarity measures module imported successfully')"
|
|
199
|
+
python -c "import sequenzo.clustering; print('Clustering module imported successfully')"
|
|
200
|
+
env:
|
|
201
|
+
PYTHONPATH: ${{ github.workspace }}
|
|
202
|
+
|
|
203
|
+
# Build the wheel
|
|
204
|
+
- name: Build wheel
|
|
205
|
+
run: python setup.py bdist_wheel
|
|
206
|
+
|
|
207
|
+
# Upload build artifacts
|
|
208
|
+
- name: Upload wheel
|
|
209
|
+
uses: actions/upload-artifact@v4
|
|
210
|
+
with:
|
|
211
|
+
name: wheels-${{ matrix.os }}-${{ matrix.python-version }}
|
|
212
|
+
path: dist/*.whl
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
.idea
|
|
2
|
+
.vscode
|
|
3
|
+
.DS_Store
|
|
4
|
+
|
|
5
|
+
# Python build artifacts
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
# MacOS built-in
|
|
9
|
+
*.egg-info/
|
|
10
|
+
|
|
11
|
+
# Ignore compiled Python files
|
|
12
|
+
**/__pycache__/
|
|
13
|
+
# Ignore the topic branch's .pyc files *.pyc
|
|
14
|
+
**/*.pyo
|
|
15
|
+
# Ignore all
|
|
16
|
+
**/*.pyc
|
|
17
|
+
|
|
18
|
+
# Intermediate files or system-specific binary files generated during compilation
|
|
19
|
+
|
|
20
|
+
# Shared object (.so) files are Python extension modules on Linux/macOS
|
|
21
|
+
**/*.so
|
|
22
|
+
|
|
23
|
+
# Python dynamic library (.pyd) files are Python extension modules on Windows,
|
|
24
|
+
# similar to Windows DLL files but specifically for Python
|
|
25
|
+
**/*.pyd
|
|
26
|
+
|
|
27
|
+
# Windows dynamic link library (.dll)
|
|
28
|
+
**/*.dll
|
|
29
|
+
|
|
30
|
+
# Intermediate object files, which are later linked to generate the final .so shared library.
|
|
31
|
+
# These files are usually intermediate products of the compilation process and are not directly used by Python.
|
|
32
|
+
**/*.o
|
|
33
|
+
|
|
34
|
+
# Static library files on Linux/macOS, used for statically linking C/C++ code
|
|
35
|
+
**/*.a
|
|
36
|
+
|
|
37
|
+
raw_requirements.txt
|
sequenzo-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, Yuqi Liang
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
sequenzo-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: sequenzo
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A fast, scalable and intuitive Python package for social sequence analysis.
|
|
5
|
+
Author-email: Yuqi Liang <yuqi.liang.1900@gmail.com>, Xinyi Li <1836724126@qq.com>, Jan Heinrich Ernst Meyerhoff-Liang <jan.meyerhoff1@gmail.com>
|
|
6
|
+
License: BSD 3-Clause License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025, Yuqi Liang
|
|
9
|
+
|
|
10
|
+
Redistribution and use in source and binary forms, with or without
|
|
11
|
+
modification, are permitted provided that the following conditions are met:
|
|
12
|
+
|
|
13
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
14
|
+
list of conditions and the following disclaimer.
|
|
15
|
+
|
|
16
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
17
|
+
this list of conditions and the following disclaimer in the documentation
|
|
18
|
+
and/or other materials provided with the distribution.
|
|
19
|
+
|
|
20
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
21
|
+
contributors may be used to endorse or promote products derived from
|
|
22
|
+
this software without specific prior written permission.
|
|
23
|
+
|
|
24
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
25
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
26
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
27
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
28
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
29
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
30
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
31
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
32
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
33
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
34
|
+
|
|
35
|
+
Project-URL: Homepage, https://github.com/Liang-Team/Sequenzo
|
|
36
|
+
Project-URL: Documentation, https://sequenzo.yuqi-liang.tech
|
|
37
|
+
Classifier: Development Status :: 3 - Alpha
|
|
38
|
+
Classifier: Intended Audience :: Developers
|
|
39
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
43
|
+
Requires-Python: <3.12,>=3.9
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
License-File: LICENSE
|
|
46
|
+
Requires-Dist: numpy<2.0,>=1.19.5
|
|
47
|
+
Requires-Dist: pandas>=1.2.5
|
|
48
|
+
Requires-Dist: matplotlib>=3.4.3
|
|
49
|
+
Requires-Dist: seaborn>=0.11.2
|
|
50
|
+
Requires-Dist: Pillow>=8.3.2
|
|
51
|
+
Requires-Dist: pybind11>=2.6.0
|
|
52
|
+
Requires-Dist: scipy>=1.6.3
|
|
53
|
+
Requires-Dist: scikit-learn>=0.24.2
|
|
54
|
+
Requires-Dist: fastcluster>=1.2.6
|
|
55
|
+
Requires-Dist: joblib>=1.0.1
|
|
56
|
+
Provides-Extra: dev
|
|
57
|
+
Requires-Dist: pytest>=6.2.5; extra == "dev"
|
|
58
|
+
Requires-Dist: flake8>=3.9.2; extra == "dev"
|
|
59
|
+
Requires-Dist: memory-profiler>=0.58.0; extra == "dev"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# Sequenzo: Fast, scalable, and intuitive social sequence analysis
|
|
63
|
+
|
|
64
|
+
Sequenzo is a high-performance Python package designed for social sequence analysis. It is built to analyze **any sequence of categorical events**, from individual career paths and migration patterns to corporate growth and urban development.
|
|
65
|
+
Whether you are working with **people, places, or policies**, Sequenzo helps uncover meaningful patterns efficiently.
|
|
66
|
+
|
|
67
|
+
Sequenzo outperforms traditional R-based tools in social sequence analysis, delivering faster processing and superior efficiency, especially for large-scale datasets. **No big data? No problem. You don’t need big data to benefit as Sequenzo is designed to enhance sequence analysis at any scale, making complex methods accessible to everyone.**
|
|
68
|
+
|
|
69
|
+
## Why Choose Sequenzo?
|
|
70
|
+
|
|
71
|
+
🚀 **High Performance**
|
|
72
|
+
|
|
73
|
+
Leverages Python’s computational power to achieve 10× faster processing than traditional R-based tools like TraMineR.
|
|
74
|
+
|
|
75
|
+
🎯 **Easy-to-Use API**
|
|
76
|
+
|
|
77
|
+
Designed with simplicity in mind: intuitive functions streamline complex sequence analysis without compromising flexibility.
|
|
78
|
+
|
|
79
|
+
🌍 **Flexible for Any Scenario**
|
|
80
|
+
|
|
81
|
+
Perfect for research, policy, and business, enabling seamless analysis of categorical data and its evolution over time.
|
|
82
|
+
|
|
83
|
+
## Documentation
|
|
84
|
+
|
|
85
|
+
Explore the full Sequenzo documentation [here](sequenzo.yuqi-liang.tech).
|
|
86
|
+
|
|
87
|
+
**Where to start on the documentation website?**
|
|
88
|
+
* New to Sequenzo or social sequence analysis? Begin with "About Sequenzo" → "Quickstart Guide" for a smooth introduction.
|
|
89
|
+
* Got your own data? After going through "About Sequenzo" and "Quickstart Guide", you are ready to dive in and start analyzing.
|
|
90
|
+
* Looking for more? Check out our example datasets and tutorials to deepen your understanding.
|
|
91
|
+
|
|
92
|
+
For Chinese users, additional tutorials are available on [Yuqi's video tutorials on Bilibili](https://space.bilibili.com/263594713/lists/4147974).
|
|
93
|
+
|
|
94
|
+
## Installation
|
|
95
|
+
|
|
96
|
+
The latest stable release and required dependencies can be installed from PyPI. You can type the following line in your terminal:
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
pip install sequenzo
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Join the Community
|
|
103
|
+
|
|
104
|
+
💬 **Have a question or found a bug?**
|
|
105
|
+
|
|
106
|
+
Please submit an issue on [GitHub Issues](https://github.com/Liang-Team/Sequenzo/issues).
|
|
107
|
+
|
|
108
|
+
For an effective bug report, please include the following:
|
|
109
|
+
* A reproducible code example that clearly demonstrates the issue.
|
|
110
|
+
* The output you’re seeing, such as an error message or an image of the plot.
|
|
111
|
+
* A brief explanation of why you believe this is a bug.
|
|
112
|
+
|
|
113
|
+
Providing these details will help us diagnose and resolve the issue more efficiently. We are always happy to help and will address it as soon as possible.
|
|
114
|
+
|
|
115
|
+
🌟 **Enjoying Sequenzo?**
|
|
116
|
+
|
|
117
|
+
Support the project by starring ⭐ the GitHub repo and spreading the word!
|
|
118
|
+
|
|
119
|
+
🛠 **Interested in contributing?**
|
|
120
|
+
|
|
121
|
+
Check out our [contribution guide]() for more details.
|
|
122
|
+
|
|
123
|
+
* Write code? Submit a pull request to enhance Sequenzo.
|
|
124
|
+
* Testing? Try Sequenzo and share your feedback. Every suggestion counts!
|
|
125
|
+
|
|
126
|
+
## Team
|
|
127
|
+
|
|
128
|
+
**Authors**
|
|
129
|
+
* Yuqi Liang, University of Oxford
|
|
130
|
+
* Xinyi Li, Heilongjiang University
|
|
131
|
+
* Jan Heinrich Ernst Meyerhoff-Liang, Institute for New Economic Thinking Oxford
|
|
132
|
+
|
|
133
|
+
**Ackowledgements**
|
|
134
|
+
* Technical advisor in sequence analysis: Tim Liao, University of Illinois Urbana-Champaign
|
|
135
|
+
* Website and related technical support: Mactavish
|
|
136
|
+
* Logo and color design: Changyu Yi
|
|
137
|
+
* Sequence data sources compilation
|
|
138
|
+
* Economics: Jan Meyerhoff-Liang
|
|
139
|
+
* History: Jingrui Chen
|
|
140
|
+
* Public health: Yuelu Yin
|
|
141
|
+
* Testing
|
sequenzo-0.1.0/README.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
|
|
2
|
+
# Sequenzo: Fast, scalable, and intuitive social sequence analysis
|
|
3
|
+
|
|
4
|
+
Sequenzo is a high-performance Python package designed for social sequence analysis. It is built to analyze **any sequence of categorical events**, from individual career paths and migration patterns to corporate growth and urban development.
|
|
5
|
+
Whether you are working with **people, places, or policies**, Sequenzo helps uncover meaningful patterns efficiently.
|
|
6
|
+
|
|
7
|
+
Sequenzo outperforms traditional R-based tools in social sequence analysis, delivering faster processing and superior efficiency, especially for large-scale datasets. **No big data? No problem. You don’t need big data to benefit as Sequenzo is designed to enhance sequence analysis at any scale, making complex methods accessible to everyone.**
|
|
8
|
+
|
|
9
|
+
## Why Choose Sequenzo?
|
|
10
|
+
|
|
11
|
+
🚀 **High Performance**
|
|
12
|
+
|
|
13
|
+
Leverages Python’s computational power to achieve 10× faster processing than traditional R-based tools like TraMineR.
|
|
14
|
+
|
|
15
|
+
🎯 **Easy-to-Use API**
|
|
16
|
+
|
|
17
|
+
Designed with simplicity in mind: intuitive functions streamline complex sequence analysis without compromising flexibility.
|
|
18
|
+
|
|
19
|
+
🌍 **Flexible for Any Scenario**
|
|
20
|
+
|
|
21
|
+
Perfect for research, policy, and business, enabling seamless analysis of categorical data and its evolution over time.
|
|
22
|
+
|
|
23
|
+
## Documentation
|
|
24
|
+
|
|
25
|
+
Explore the full Sequenzo documentation [here](sequenzo.yuqi-liang.tech).
|
|
26
|
+
|
|
27
|
+
**Where to start on the documentation website?**
|
|
28
|
+
* New to Sequenzo or social sequence analysis? Begin with "About Sequenzo" → "Quickstart Guide" for a smooth introduction.
|
|
29
|
+
* Got your own data? After going through "About Sequenzo" and "Quickstart Guide", you are ready to dive in and start analyzing.
|
|
30
|
+
* Looking for more? Check out our example datasets and tutorials to deepen your understanding.
|
|
31
|
+
|
|
32
|
+
For Chinese users, additional tutorials are available on [Yuqi's video tutorials on Bilibili](https://space.bilibili.com/263594713/lists/4147974).
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
The latest stable release and required dependencies can be installed from PyPI. You can type the following line in your terminal:
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
pip install sequenzo
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Join the Community
|
|
43
|
+
|
|
44
|
+
💬 **Have a question or found a bug?**
|
|
45
|
+
|
|
46
|
+
Please submit an issue on [GitHub Issues](https://github.com/Liang-Team/Sequenzo/issues).
|
|
47
|
+
|
|
48
|
+
For an effective bug report, please include the following:
|
|
49
|
+
* A reproducible code example that clearly demonstrates the issue.
|
|
50
|
+
* The output you’re seeing, such as an error message or an image of the plot.
|
|
51
|
+
* A brief explanation of why you believe this is a bug.
|
|
52
|
+
|
|
53
|
+
Providing these details will help us diagnose and resolve the issue more efficiently. We are always happy to help and will address it as soon as possible.
|
|
54
|
+
|
|
55
|
+
🌟 **Enjoying Sequenzo?**
|
|
56
|
+
|
|
57
|
+
Support the project by starring ⭐ the GitHub repo and spreading the word!
|
|
58
|
+
|
|
59
|
+
🛠 **Interested in contributing?**
|
|
60
|
+
|
|
61
|
+
Check out our [contribution guide]() for more details.
|
|
62
|
+
|
|
63
|
+
* Write code? Submit a pull request to enhance Sequenzo.
|
|
64
|
+
* Testing? Try Sequenzo and share your feedback. Every suggestion counts!
|
|
65
|
+
|
|
66
|
+
## Team
|
|
67
|
+
|
|
68
|
+
**Authors**
|
|
69
|
+
* Yuqi Liang, University of Oxford
|
|
70
|
+
* Xinyi Li, Heilongjiang University
|
|
71
|
+
* Jan Heinrich Ernst Meyerhoff-Liang, Institute for New Economic Thinking Oxford
|
|
72
|
+
|
|
73
|
+
**Ackowledgements**
|
|
74
|
+
* Technical advisor in sequence analysis: Tim Liao, University of Illinois Urbana-Champaign
|
|
75
|
+
* Website and related technical support: Mactavish
|
|
76
|
+
* Logo and color design: Changyu Yi
|
|
77
|
+
* Sequence data sources compilation
|
|
78
|
+
* Economics: Jan Meyerhoff-Liang
|
|
79
|
+
* History: Jingrui Chen
|
|
80
|
+
* Public health: Yuelu Yin
|
|
81
|
+
* Testing
|