sequenzo 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. sequenzo-0.1.0/.gitattributes +69 -0
  2. sequenzo-0.1.0/.github/workflows/python-app.yml +212 -0
  3. sequenzo-0.1.0/.gitignore +37 -0
  4. sequenzo-0.1.0/LICENSE +28 -0
  5. sequenzo-0.1.0/PKG-INFO +141 -0
  6. sequenzo-0.1.0/README.md +81 -0
  7. sequenzo-0.1.0/Tutorials/01_quickstart.ipynb +830 -0
  8. sequenzo-0.1.0/Tutorials/test.ipynb +3911 -0
  9. sequenzo-0.1.0/__init__.py +49 -0
  10. sequenzo-0.1.0/original_datasets_and_cleaning/country_co2_emissions_missing.csv +195 -0
  11. sequenzo-0.1.0/original_datasets_and_cleaning/country_co2_gdp_gapminder_data.ipynb +353 -0
  12. sequenzo-0.1.0/original_datasets_and_cleaning/data_sources/gapminder/Output_CO2 Long Series 1800 - 2022 - Output.csv +43263 -0
  13. sequenzo-0.1.0/original_datasets_and_cleaning/data_sources/gapminder/co2_pcap_cons.csv +195 -0
  14. sequenzo-0.1.0/original_datasets_and_cleaning/examples_output_display_for_documents.ipynb +4223 -0
  15. sequenzo-0.1.0/pyproject.toml +68 -0
  16. sequenzo-0.1.0/requirements-3.10.txt +10 -0
  17. sequenzo-0.1.0/requirements-3.11.txt +10 -0
  18. sequenzo-0.1.0/requirements-3.9.txt +10 -0
  19. sequenzo-0.1.0/requirements-dev.txt +7 -0
  20. sequenzo-0.1.0/sequenzo/__init__.py +36 -0
  21. sequenzo-0.1.0/sequenzo/big_data/__init__.py +6 -0
  22. sequenzo-0.1.0/sequenzo/big_data/clara/__init__.py +22 -0
  23. sequenzo-0.1.0/sequenzo/big_data/clara/clara.py +412 -0
  24. sequenzo-0.1.0/sequenzo/big_data/clara/utils/__init__.py +26 -0
  25. sequenzo-0.1.0/sequenzo/big_data/clara/utils/aggregatecases.py +92 -0
  26. sequenzo-0.1.0/sequenzo/big_data/clara/utils/davies_bouldin.py +91 -0
  27. sequenzo-0.1.0/sequenzo/big_data/clara/utils/k_medoids_once.py +77 -0
  28. sequenzo-0.1.0/sequenzo/big_data/clara/utils/wfcmdd.py +205 -0
  29. sequenzo-0.1.0/sequenzo/clustering/__init__.py +28 -0
  30. sequenzo-0.1.0/sequenzo/clustering/hierarchical_clustering.py +596 -0
  31. sequenzo-0.1.0/sequenzo/clustering/utils/__init__.py +27 -0
  32. sequenzo-0.1.0/sequenzo/clustering/utils/disscenter.py +118 -0
  33. sequenzo-0.1.0/sequenzo/datasets/__init__.py +41 -0
  34. sequenzo-0.1.0/sequenzo/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
  35. sequenzo-0.1.0/sequenzo/datasets/chinese_colonial_territories.csv +12 -0
  36. sequenzo-0.1.0/sequenzo/datasets/country_co2_emissions.csv +195 -0
  37. sequenzo-0.1.0/sequenzo/datasets/country_gdp_per_capita.csv +194 -0
  38. sequenzo-0.1.0/sequenzo/define_sequence_data.py +248 -0
  39. sequenzo-0.1.0/sequenzo/dissimilarity_measures/__init__.py +29 -0
  40. sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/lib.win-amd64-cpython-38/example.cp38-win_amd64.pyd +0 -0
  41. sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/temp.win-amd64-cpython-38/example.cp38-win_amd64.exp +0 -0
  42. sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/temp.win-amd64-cpython-38/example.cp38-win_amd64.lib +0 -0
  43. sequenzo-0.1.0/sequenzo/dissimilarity_measures/build/temp.win-amd64-cpython-38/module.obj +0 -0
  44. sequenzo-0.1.0/sequenzo/dissimilarity_measures/get_distance_matrix.py +582 -0
  45. sequenzo-0.1.0/sequenzo/dissimilarity_measures/get_substitution_cost_matrix.py +179 -0
  46. sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/DHDdistance.cpp +141 -0
  47. sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/OMdistance.cpp +214 -0
  48. sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/OMspellDistance.cpp +244 -0
  49. sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/PAMonce.cpp +166 -0
  50. sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/dist2matrix.cpp +54 -0
  51. sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/module.cpp +38 -0
  52. sequenzo-0.1.0/sequenzo/dissimilarity_measures/src/weightedinertia.cpp +72 -0
  53. sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/__init__.py +15 -0
  54. sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/get_sm_trate_substitution_cost_matrix.py +134 -0
  55. sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqconc.py +42 -0
  56. sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqdss.py +57 -0
  57. sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqdur.py +70 -0
  58. sequenzo-0.1.0/sequenzo/dissimilarity_measures/utils/seqlength.py +28 -0
  59. sequenzo-0.1.0/sequenzo/visualization/__init__.py +39 -0
  60. sequenzo-0.1.0/sequenzo/visualization/plot_mean_time.py +142 -0
  61. sequenzo-0.1.0/sequenzo/visualization/plot_modal_state.py +215 -0
  62. sequenzo-0.1.0/sequenzo/visualization/plot_most_frequent_sequences.py +104 -0
  63. sequenzo-0.1.0/sequenzo/visualization/plot_relative_frequency.py +331 -0
  64. sequenzo-0.1.0/sequenzo/visualization/plot_sequence_index.py +256 -0
  65. sequenzo-0.1.0/sequenzo/visualization/plot_single_medoid.py +124 -0
  66. sequenzo-0.1.0/sequenzo/visualization/plot_state_distribution.py +340 -0
  67. sequenzo-0.1.0/sequenzo/visualization/plot_transition_matrix.py +171 -0
  68. sequenzo-0.1.0/sequenzo/visualization/utils/__init__.py +17 -0
  69. sequenzo-0.1.0/sequenzo/visualization/utils/utils.py +182 -0
  70. sequenzo-0.1.0/sequenzo.egg-info/PKG-INFO +141 -0
  71. sequenzo-0.1.0/sequenzo.egg-info/SOURCES.txt +78 -0
  72. sequenzo-0.1.0/sequenzo.egg-info/dependency_links.txt +1 -0
  73. sequenzo-0.1.0/sequenzo.egg-info/requires.txt +15 -0
  74. sequenzo-0.1.0/sequenzo.egg-info/top_level.txt +1 -0
  75. sequenzo-0.1.0/setup.cfg +4 -0
  76. sequenzo-0.1.0/setup.py +61 -0
  77. sequenzo-0.1.0/tests/__init__.py +8 -0
  78. sequenzo-0.1.0/tests/test_basic.py +11 -0
@@ -0,0 +1,69 @@
1
+ # 设置默认行为,所有文件都采用LF行结束符和UTF-8编码
2
+ * text=auto eol=lf encoding=utf-8
3
+
4
+ # 源代码文件
5
+ *.py text diff=python encoding=utf-8
6
+ *.c text diff=c encoding=utf-8
7
+ *.cpp text diff=cpp encoding=utf-8
8
+ *.h text diff=c encoding=utf-8
9
+ *.hpp text diff=cpp encoding=utf-8
10
+ *.pyx text diff=python encoding=utf-8
11
+ *.pxd text diff=python encoding=utf-8
12
+
13
+ # 配置文件
14
+ *.toml text encoding=utf-8
15
+ *.ini text encoding=utf-8
16
+ *.yaml text encoding=utf-8
17
+ *.yml text encoding=utf-8
18
+ *.json text encoding=utf-8
19
+ *.cfg text encoding=utf-8
20
+ *.conf text encoding=utf-8
21
+ requirements*.txt text encoding=utf-8
22
+ pyproject.toml text encoding=utf-8
23
+ setup.py text encoding=utf-8
24
+ setup.cfg text encoding=utf-8
25
+
26
+ # 文档文件
27
+ *.txt text encoding=utf-8
28
+ *.md text encoding=utf-8
29
+ *.rst text encoding=utf-8
30
+ LICENSE text encoding=utf-8
31
+ README* text encoding=utf-8
32
+ CHANGELOG* text encoding=utf-8
33
+
34
+ # 在Windows系统上保留CRLF行尾
35
+ *.bat text eol=crlf encoding=utf-8
36
+ *.cmd text eol=crlf encoding=utf-8
37
+ *.ps1 text eol=crlf encoding=utf-8
38
+
39
+ # 特殊文件处理
40
+ Makefile text eol=lf encoding=utf-8
41
+ makefile text eol=lf encoding=utf-8
42
+
43
+ # 数据文件(作为二进制处理,不进行任何转换)
44
+ *.db binary
45
+ *.p binary
46
+ *.pkl binary
47
+ *.pickle binary
48
+ *.pyc binary
49
+ *.pyd binary
50
+ *.pyo binary
51
+
52
+ # 图像文件(二进制)
53
+ *.png binary
54
+ *.jpg binary
55
+ *.jpeg binary
56
+ *.gif binary
57
+ *.ico binary
58
+ *.svg text encoding=utf-8
59
+
60
+ # Excel和CSV文件(作为文本文件处理,但特别注意编码)
61
+ *.csv text encoding=utf-8
62
+ *.tsv text encoding=utf-8
63
+ *.xls binary
64
+ *.xlsx binary
65
+
66
+ # 压缩文件(二进制)
67
+ *.gz binary
68
+ *.zip binary
69
+ *.7z binary
@@ -0,0 +1,212 @@
1
+ # GitHub Actions workflow for Sequenzo package
2
+ name: Sequenzo Package CI
3
+
4
+ on:
5
+ push:
6
+ branches: [ "main" ]
7
+ pull_request:
8
+ branches: [ "main" ]
9
+
10
+ permissions:
11
+ contents: read
12
+
13
+ jobs:
14
+ build:
15
+ runs-on: ${{ matrix.os }}
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ os: [ubuntu-latest, windows-latest, macos-latest]
20
+ python-version: ['3.9', '3.10', '3.11']
21
+
22
+ steps:
23
+ - name: Checkout repository
24
+ uses: actions/checkout@v4
25
+
26
+ - name: Set up Python ${{ matrix.python-version }}
27
+ uses: actions/setup-python@v4
28
+ with:
29
+ python-version: ${{ matrix.python-version }}
30
+
31
+ # Clear the `pip` cache to prevent conflicts
32
+ - name: Clear pip cache
33
+ run: python -m pip cache purge
34
+
35
+ # macOS: Handle dependency installation, using different strategies for different Python versions
36
+ - name: Install dependencies (macOS)
37
+ if: runner.os == 'macOS'
38
+ run: |
39
+ python -m pip install --upgrade pip
40
+ pip install Cython==0.29.36
41
+
42
+ if [ "${{ matrix.python-version }}" == "3.11" ]; then
43
+ pip install "setuptools>=60.0.0"
44
+ else
45
+ pip install "setuptools==58.1.0"
46
+ fi
47
+
48
+ if [ "${{ matrix.python-version }}" == "3.11" ]; then
49
+ pip install "numpy>=1.23.2,<2.0" --only-binary numpy
50
+ elif [ "${{ matrix.python-version }}" == "3.10" ]; then
51
+ pip install "numpy>=1.22.4,<2.0" --only-binary numpy
52
+ else
53
+ pip install "numpy==1.22.4" --only-binary numpy || pip install "numpy>=1.21.0,<2.0" --only-binary numpy
54
+ fi
55
+
56
+ pip install pybind11
57
+
58
+ if [ "${{ matrix.python-version }}" == "3.11" ]; then
59
+ pip install "scipy>=1.8.0"
60
+ else
61
+ pip install "scipy==1.7.3" || pip install "scipy>=1.7.0"
62
+ fi
63
+
64
+ python -m pip install --upgrade wheel
65
+ pip install fastcluster==1.2.6 || pip install fastcluster
66
+
67
+ if [ -f "requirements-${{ matrix.python-version }}.txt" ]; then
68
+ pip install -r requirements-${{ matrix.python-version }}.txt --no-build-isolation || echo "⚠️ Some requirements may not be installed"
69
+ else
70
+ pip install -r requirements-3.9.txt --no-build-isolation || echo "⚠️ Some requirements may not be installed"
71
+ echo "⚠️ Using requirements-3.9.txt as fallback"
72
+ fi
73
+
74
+ # macOS: Ensure that Xcode CLI tools are installed – required for all Python versions
75
+ - name: Install Xcode Command Line Tools
76
+ if: runner.os == 'macOS'
77
+ run: |
78
+ xcode-select --install || echo "Xcode CLI already installed"
79
+
80
+ # Windows: Handle numpy & fastcluster version issues, install the complete MSVC toolchain
81
+ - name: Install dependencies (Windows)
82
+ if: runner.os == 'Windows'
83
+ shell: pwsh # 确保使用 PowerShell,而不是默认 cmd.exe
84
+ run: |
85
+ choco install visualstudio2022buildtools --version=17.0.0 -y
86
+ choco install visualstudio2022-workload-vctools -y
87
+
88
+ python -m pip install --upgrade pip
89
+
90
+ if ("${{ matrix.python-version }}" -eq "3.11") {
91
+ pip install "numpy>=1.23.2,<2.0"
92
+ } else {
93
+ pip install numpy==1.22.4 --only-binary numpy
94
+ }
95
+
96
+ pip install wheel setuptools cmake Cython
97
+
98
+ pip uninstall -y pybind11
99
+ pip install pybind11==2.10.4
100
+
101
+ pip install --force-reinstall fastcluster==1.2.6
102
+
103
+ if (Test-Path "requirements-${{ matrix.python-version }}.txt") {
104
+ pip install -r requirements-${{ matrix.python-version }}.txt --use-deprecated=legacy-resolver
105
+ } else {
106
+ pip install -r requirements-3.9.txt --use-deprecated=legacy-resolver
107
+ Write-Host "⚠️ Using requirements-3.9.txt as fallback"
108
+ }
109
+
110
+ # Linux: Handle fastcluster version issues, ensure numpy is installed first
111
+ - name: Install dependencies (Linux)
112
+ if: runner.os == 'Linux'
113
+ run: |
114
+ python -m pip install --upgrade pip
115
+ pip install wheel setuptools
116
+
117
+ sudo apt-get update
118
+ sudo apt-get install -y build-essential
119
+
120
+ if [ "${{ matrix.python-version }}" == "3.11" ]; then
121
+ pip install "numpy>=1.23.2,<2.0" --only-binary numpy
122
+ else
123
+ pip install numpy==1.22.4 --only-binary numpy
124
+ fi
125
+
126
+ pip install pybind11
127
+ pip install fastcluster==1.2.6
128
+
129
+ if [ -f "requirements-${{ matrix.python-version }}.txt" ]; then
130
+ pip install -r requirements-${{ matrix.python-version }}.txt --no-build-isolation
131
+ else
132
+ pip install -r requirements-3.9.txt --no-build-isolation
133
+ echo "⚠️ Using requirements-3.9.txt as fallback"
134
+ fi
135
+
136
+ # Ensure `PYTHONPATH` is set correctly
137
+ - name: Verify package path
138
+ shell: bash
139
+ run: |
140
+ echo "📂 Current directory:"
141
+ ls -lah
142
+ echo "📂 Sequenzo package directory:"
143
+ ls -lah sequenzo || echo "⚠️ Directory 'sequenzo' not found!"
144
+
145
+ # Windows: Fix Unicode encoding issues
146
+ - name: Set Windows Encoding
147
+ if: runner.os == 'Windows'
148
+ run: chcp 65001
149
+
150
+ - name: Setup Windows C++ compilation
151
+ if: runner.os == 'Windows'
152
+ shell: pwsh
153
+ run: |
154
+ choco install visualstudio2019buildtools -y
155
+ choco install visualstudio2019-workload-vctools -y
156
+
157
+ & "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat"
158
+
159
+ $Env:CL = "/EHsc /MD /bigobj /Ox"
160
+ $Env:DISTUTILS_USE_SDK = "1"
161
+ $Env:MSSdk = "1"
162
+
163
+ # Consolidated installation steps for all platforms
164
+ - name: Install local package
165
+ if: runner.os != 'Windows'
166
+ run: |
167
+ pip install --no-cache-dir -e . --no-build-isolation
168
+ env:
169
+ PYTHONUTF8: 1
170
+ PYTHONPATH: ${{ github.workspace }}
171
+
172
+ - name: Install local package (Windows)
173
+ if: runner.os == 'Windows'
174
+ shell: pwsh
175
+ run: |
176
+ $editable_install = $false
177
+ try {
178
+ pip install --no-cache-dir -e . --no-build-isolation --verbose
179
+ $editable_install = $true
180
+ } catch {
181
+ Write-Host "ERROR DETAILS: $_"
182
+ Write-Host "Editable install failed, trying regular install..."
183
+ pip install --no-cache-dir . --no-build-isolation --verbose
184
+ }
185
+ env:
186
+ PYTHONUTF8: 1
187
+ PYTHONPATH: ${{ github.workspace }}
188
+ DISTUTILS_USE_SDK: 1
189
+ MSSdk: 1
190
+
191
+ # Display installed packages
192
+ - name: Show installed packages
193
+ run: pip list
194
+
195
+ # Test whether the module is installed correctly
196
+ - name: Test imports
197
+ run: |
198
+ python -c "import sequenzo.dissimilarity_measures; print('Dissimilarity measures module imported successfully')"
199
+ python -c "import sequenzo.clustering; print('Clustering module imported successfully')"
200
+ env:
201
+ PYTHONPATH: ${{ github.workspace }}
202
+
203
+ # Build the wheel
204
+ - name: Build wheel
205
+ run: python setup.py bdist_wheel
206
+
207
+ # Upload build artifacts
208
+ - name: Upload wheel
209
+ uses: actions/upload-artifact@v4
210
+ with:
211
+ name: wheels-${{ matrix.os }}-${{ matrix.python-version }}
212
+ path: dist/*.whl
@@ -0,0 +1,37 @@
1
+ .idea
2
+ .vscode
3
+ .DS_Store
4
+
5
+ # Python build artifacts
6
+ build/
7
+ dist/
8
+ # MacOS built-in
9
+ *.egg-info/
10
+
11
+ # Ignore compiled Python files
12
+ **/__pycache__/
13
+ # Ignore the topic branch's .pyc files *.pyc
14
+ **/*.pyo
15
+ # Ignore all
16
+ **/*.pyc
17
+
18
+ # Intermediate files or system-specific binary files generated during compilation
19
+
20
+ # Shared object (.so) files are Python extension modules on Linux/macOS
21
+ **/*.so
22
+
23
+ # Python dynamic library (.pyd) files are Python extension modules on Windows,
24
+ # similar to Windows DLL files but specifically for Python
25
+ **/*.pyd
26
+
27
+ # Windows dynamic link library (.dll)
28
+ **/*.dll
29
+
30
+ # Intermediate object files, which are later linked to generate the final .so shared library.
31
+ # These files are usually intermediate products of the compilation process and are not directly used by Python.
32
+ **/*.o
33
+
34
+ # Static library files on Linux/macOS, used for statically linking C/C++ code
35
+ **/*.a
36
+
37
+ raw_requirements.txt
sequenzo-0.1.0/LICENSE ADDED
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2025, Yuqi Liang
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.2
2
+ Name: sequenzo
3
+ Version: 0.1.0
4
+ Summary: A fast, scalable and intuitive Python package for social sequence analysis.
5
+ Author-email: Yuqi Liang <yuqi.liang.1900@gmail.com>, Xinyi Li <1836724126@qq.com>, Jan Heinrich Ernst Meyerhoff-Liang <jan.meyerhoff1@gmail.com>
6
+ License: BSD 3-Clause License
7
+
8
+ Copyright (c) 2025, Yuqi Liang
9
+
10
+ Redistribution and use in source and binary forms, with or without
11
+ modification, are permitted provided that the following conditions are met:
12
+
13
+ 1. Redistributions of source code must retain the above copyright notice, this
14
+ list of conditions and the following disclaimer.
15
+
16
+ 2. Redistributions in binary form must reproduce the above copyright notice,
17
+ this list of conditions and the following disclaimer in the documentation
18
+ and/or other materials provided with the distribution.
19
+
20
+ 3. Neither the name of the copyright holder nor the names of its
21
+ contributors may be used to endorse or promote products derived from
22
+ this software without specific prior written permission.
23
+
24
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
28
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+
35
+ Project-URL: Homepage, https://github.com/Liang-Team/Sequenzo
36
+ Project-URL: Documentation, https://sequenzo.yuqi-liang.tech
37
+ Classifier: Development Status :: 3 - Alpha
38
+ Classifier: Intended Audience :: Developers
39
+ Classifier: License :: OSI Approved :: BSD License
40
+ Classifier: Programming Language :: Python :: 3.9
41
+ Classifier: Programming Language :: Python :: 3.10
42
+ Classifier: Programming Language :: Python :: 3.11
43
+ Requires-Python: <3.12,>=3.9
44
+ Description-Content-Type: text/markdown
45
+ License-File: LICENSE
46
+ Requires-Dist: numpy<2.0,>=1.19.5
47
+ Requires-Dist: pandas>=1.2.5
48
+ Requires-Dist: matplotlib>=3.4.3
49
+ Requires-Dist: seaborn>=0.11.2
50
+ Requires-Dist: Pillow>=8.3.2
51
+ Requires-Dist: pybind11>=2.6.0
52
+ Requires-Dist: scipy>=1.6.3
53
+ Requires-Dist: scikit-learn>=0.24.2
54
+ Requires-Dist: fastcluster>=1.2.6
55
+ Requires-Dist: joblib>=1.0.1
56
+ Provides-Extra: dev
57
+ Requires-Dist: pytest>=6.2.5; extra == "dev"
58
+ Requires-Dist: flake8>=3.9.2; extra == "dev"
59
+ Requires-Dist: memory-profiler>=0.58.0; extra == "dev"
60
+
61
+
62
+ # Sequenzo: Fast, scalable, and intuitive social sequence analysis
63
+
64
+ Sequenzo is a high-performance Python package designed for social sequence analysis. It is built to analyze **any sequence of categorical events**, from individual career paths and migration patterns to corporate growth and urban development.
65
+ Whether you are working with **people, places, or policies**, Sequenzo helps uncover meaningful patterns efficiently.
66
+
67
+ Sequenzo outperforms traditional R-based tools in social sequence analysis, delivering faster processing and superior efficiency, especially for large-scale datasets. **No big data? No problem. You don’t need big data to benefit as Sequenzo is designed to enhance sequence analysis at any scale, making complex methods accessible to everyone.**
68
+
69
+ ## Why Choose Sequenzo?
70
+
71
+ 🚀 **High Performance**
72
+
73
+ Leverages Python’s computational power to achieve 10× faster processing than traditional R-based tools like TraMineR.
74
+
75
+ 🎯 **Easy-to-Use API**
76
+
77
+ Designed with simplicity in mind: intuitive functions streamline complex sequence analysis without compromising flexibility.
78
+
79
+ 🌍 **Flexible for Any Scenario**
80
+
81
+ Perfect for research, policy, and business, enabling seamless analysis of categorical data and its evolution over time.
82
+
83
+ ## Documentation
84
+
85
+ Explore the full Sequenzo documentation [here](sequenzo.yuqi-liang.tech).
86
+
87
+ **Where to start on the documentation website?**
88
+ * New to Sequenzo or social sequence analysis? Begin with "About Sequenzo" → "Quickstart Guide" for a smooth introduction.
89
+ * Got your own data? After going through "About Sequenzo" and "Quickstart Guide", you are ready to dive in and start analyzing.
90
+ * Looking for more? Check out our example datasets and tutorials to deepen your understanding.
91
+
92
+ For Chinese users, additional tutorials are available on [Yuqi's video tutorials on Bilibili](https://space.bilibili.com/263594713/lists/4147974).
93
+
94
+ ## Installation
95
+
96
+ The latest stable release and required dependencies can be installed from PyPI. You can type the following line in your terminal:
97
+
98
+ ```
99
+ pip install sequenzo
100
+ ```
101
+
102
+ ## Join the Community
103
+
104
+ 💬 **Have a question or found a bug?**
105
+
106
+ Please submit an issue on [GitHub Issues](https://github.com/Liang-Team/Sequenzo/issues).
107
+
108
+ For an effective bug report, please include the following:
109
+ * A reproducible code example that clearly demonstrates the issue.
110
+ * The output you’re seeing, such as an error message or an image of the plot.
111
+ * A brief explanation of why you believe this is a bug.
112
+
113
+ Providing these details will help us diagnose and resolve the issue more efficiently. We are always happy to help and will address it as soon as possible.
114
+
115
+ 🌟 **Enjoying Sequenzo?**
116
+
117
+ Support the project by starring ⭐ the GitHub repo and spreading the word!
118
+
119
+ 🛠 **Interested in contributing?**
120
+
121
+ Check out our [contribution guide]() for more details.
122
+
123
+ * Write code? Submit a pull request to enhance Sequenzo.
124
+ * Testing? Try Sequenzo and share your feedback. Every suggestion counts!
125
+
126
+ ## Team
127
+
128
+ **Authors**
129
+ * Yuqi Liang, University of Oxford
130
+ * Xinyi Li, Heilongjiang University
131
+ * Jan Heinrich Ernst Meyerhoff-Liang, Institute for New Economic Thinking Oxford
132
+
133
+ **Ackowledgements**
134
+ * Technical advisor in sequence analysis: Tim Liao, University of Illinois Urbana-Champaign
135
+ * Website and related technical support: Mactavish
136
+ * Logo and color design: Changyu Yi
137
+ * Sequence data sources compilation
138
+ * Economics: Jan Meyerhoff-Liang
139
+ * History: Jingrui Chen
140
+ * Public health: Yuelu Yin
141
+ * Testing
@@ -0,0 +1,81 @@
1
+
2
+ # Sequenzo: Fast, scalable, and intuitive social sequence analysis
3
+
4
+ Sequenzo is a high-performance Python package designed for social sequence analysis. It is built to analyze **any sequence of categorical events**, from individual career paths and migration patterns to corporate growth and urban development.
5
+ Whether you are working with **people, places, or policies**, Sequenzo helps uncover meaningful patterns efficiently.
6
+
7
+ Sequenzo outperforms traditional R-based tools in social sequence analysis, delivering faster processing and superior efficiency, especially for large-scale datasets. **No big data? No problem. You don’t need big data to benefit as Sequenzo is designed to enhance sequence analysis at any scale, making complex methods accessible to everyone.**
8
+
9
+ ## Why Choose Sequenzo?
10
+
11
+ 🚀 **High Performance**
12
+
13
+ Leverages Python’s computational power to achieve 10× faster processing than traditional R-based tools like TraMineR.
14
+
15
+ 🎯 **Easy-to-Use API**
16
+
17
+ Designed with simplicity in mind: intuitive functions streamline complex sequence analysis without compromising flexibility.
18
+
19
+ 🌍 **Flexible for Any Scenario**
20
+
21
+ Perfect for research, policy, and business, enabling seamless analysis of categorical data and its evolution over time.
22
+
23
+ ## Documentation
24
+
25
+ Explore the full Sequenzo documentation [here](sequenzo.yuqi-liang.tech).
26
+
27
+ **Where to start on the documentation website?**
28
+ * New to Sequenzo or social sequence analysis? Begin with "About Sequenzo" → "Quickstart Guide" for a smooth introduction.
29
+ * Got your own data? After going through "About Sequenzo" and "Quickstart Guide", you are ready to dive in and start analyzing.
30
+ * Looking for more? Check out our example datasets and tutorials to deepen your understanding.
31
+
32
+ For Chinese users, additional tutorials are available on [Yuqi's video tutorials on Bilibili](https://space.bilibili.com/263594713/lists/4147974).
33
+
34
+ ## Installation
35
+
36
+ The latest stable release and required dependencies can be installed from PyPI. You can type the following line in your terminal:
37
+
38
+ ```
39
+ pip install sequenzo
40
+ ```
41
+
42
+ ## Join the Community
43
+
44
+ 💬 **Have a question or found a bug?**
45
+
46
+ Please submit an issue on [GitHub Issues](https://github.com/Liang-Team/Sequenzo/issues).
47
+
48
+ For an effective bug report, please include the following:
49
+ * A reproducible code example that clearly demonstrates the issue.
50
+ * The output you’re seeing, such as an error message or an image of the plot.
51
+ * A brief explanation of why you believe this is a bug.
52
+
53
+ Providing these details will help us diagnose and resolve the issue more efficiently. We are always happy to help and will address it as soon as possible.
54
+
55
+ 🌟 **Enjoying Sequenzo?**
56
+
57
+ Support the project by starring ⭐ the GitHub repo and spreading the word!
58
+
59
+ 🛠 **Interested in contributing?**
60
+
61
+ Check out our [contribution guide]() for more details.
62
+
63
+ * Write code? Submit a pull request to enhance Sequenzo.
64
+ * Testing? Try Sequenzo and share your feedback. Every suggestion counts!
65
+
66
+ ## Team
67
+
68
+ **Authors**
69
+ * Yuqi Liang, University of Oxford
70
+ * Xinyi Li, Heilongjiang University
71
+ * Jan Heinrich Ernst Meyerhoff-Liang, Institute for New Economic Thinking Oxford
72
+
73
+ **Ackowledgements**
74
+ * Technical advisor in sequence analysis: Tim Liao, University of Illinois Urbana-Champaign
75
+ * Website and related technical support: Mactavish
76
+ * Logo and color design: Changyu Yi
77
+ * Sequence data sources compilation
78
+ * Economics: Jan Meyerhoff-Liang
79
+ * History: Jingrui Chen
80
+ * Public health: Yuelu Yin
81
+ * Testing