pelican-nlp 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pelican_nlp-0.1.3/MANIFEST.in +27 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/PKG-INFO +15 -14
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/README.rst +10 -9
- pelican_nlp-0.1.3/examples/PyPI_testing_discourse/subjects/sub-01/interview/sub-01_interview_schizophrenia_run-01.rtf +40 -0
- pelican_nlp-0.1.3/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-01/image-description/sub-01_ses-01_image-description_drug.docx +0 -0
- pelican_nlp-0.1.3/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-01/image-description/sub-01_ses-01_image-description_placebo.docx +0 -0
- pelican_nlp-0.1.3/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-02/image-description/sub-01_ses-02_image-description_drug.docx +0 -0
- pelican_nlp-0.1.3/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-02/image-description/sub-01_ses-02_image-description_placebo.docx +0 -0
- pelican_nlp-0.1.3/examples/PyPI_testing_image-descriptions/subjects/sub-02/ses-01/image-description/sub-02_ses-01_image-description_drug.docx +0 -0
- pelican_nlp-0.1.3/examples/PyPI_testing_image-descriptions/subjects/sub-02/ses-01/image-description/sub-02_ses-01_image-description_placebo.docx +0 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/__init__.py +0 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/extract_acoustic_features.py +274 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/__init__.py +0 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/aggregate_fluency_results.py +186 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/behavioral_data.py +42 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/check_duplicates.py +169 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/coherence.py +653 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/config.py +231 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/main.py +182 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/optimality_without_tsa.py +466 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/plot_fluency.py +573 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/plotting_utils.py +170 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/questionnaires_data.py +43 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/stats_fluency.py +930 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/fluency/utils.py +41 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/speaker_diarization_Nils.py +328 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/__init__.py +0 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/annotation_tool.py +1001 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/annotation_tool_boundaries.py +1122 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/annotation_tool_sandbox.py +985 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/output/holmes_control_nova_all_outputs.json +7948 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/test.json +1 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/transcribe_audio.py +314 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/transcribe_audio_chunked.py +695 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/transcription.py +801 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/transcription_gui.py +955 -0
- pelican_nlp-0.1.3/pelican_nlp/Nils_backup/transcription/word_boundaries.py +190 -0
- pelican_nlp-0.1.3/pelican_nlp/Silvia_files/Opensmile/opensmile_feature_extraction.py +66 -0
- pelican_nlp-0.1.3/pelican_nlp/Silvia_files/prosogram/prosogram.py +104 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/__init__.py +1 -1
- pelican_nlp-0.1.3/pelican_nlp/_version.py +1 -0
- pelican_nlp-0.1.3/pelican_nlp/configuration_files/config_audio.yml +150 -0
- pelican_nlp-0.1.3/pelican_nlp/configuration_files/config_discourse.yml +104 -0
- pelican_nlp-0.1.3/pelican_nlp/configuration_files/config_fluency.yml +108 -0
- pelican_nlp-0.1.3/pelican_nlp/configuration_files/config_general.yml +131 -0
- pelican_nlp-0.1.3/pelican_nlp/configuration_files/config_morteza.yml +103 -0
- pelican_nlp-0.1.3/pelican_nlp/praat/__init__.py +29 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp.egg-info/PKG-INFO +15 -14
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp.egg-info/SOURCES.txt +36 -21
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pyproject.toml +9 -6
- pelican_nlp-0.1.1/.DS_Store +0 -0
- pelican_nlp-0.1.1/.gitignore +0 -17
- pelican_nlp-0.1.1/.idea/PELICAN-nlp.iml +0 -17
- pelican_nlp-0.1.1/.idea/inspectionProfiles/Project_Default.xml +0 -13
- pelican_nlp-0.1.1/.idea/inspectionProfiles/profiles_settings.xml +0 -6
- pelican_nlp-0.1.1/.idea/misc.xml +0 -7
- pelican_nlp-0.1.1/.idea/modules.xml +0 -8
- pelican_nlp-0.1.1/.idea/vcs.xml +0 -6
- pelican_nlp-0.1.1/.idea/workspace.xml +0 -59
- pelican_nlp-0.1.1/MANIFEST.in +0 -3
- pelican_nlp-0.1.1/examples/PyPI_testing_discourse/.idea/.gitignore +0 -3
- pelican_nlp-0.1.1/examples/PyPI_testing_discourse/.idea/inspectionProfiles/Project_Default.xml +0 -13
- pelican_nlp-0.1.1/examples/PyPI_testing_discourse/.idea/inspectionProfiles/profiles_settings.xml +0 -6
- pelican_nlp-0.1.1/examples/PyPI_testing_discourse/.idea/misc.xml +0 -6
- pelican_nlp-0.1.1/examples/PyPI_testing_discourse/.idea/modules.xml +0 -8
- pelican_nlp-0.1.1/examples/PyPI_testing_discourse/.idea/pelican_testing.iml +0 -8
- pelican_nlp-0.1.1/examples/PyPI_testing_discourse/subjects/sub-01/interview/sub-01_interview_schizophrenia_run-01.rtf +0 -97
- pelican_nlp-0.1.1/examples/PyPI_testing_fluency/.idea/.gitignore +0 -3
- pelican_nlp-0.1.1/examples/PyPI_testing_fluency/.idea/inspectionProfiles/Project_Default.xml +0 -13
- pelican_nlp-0.1.1/examples/PyPI_testing_fluency/.idea/inspectionProfiles/profiles_settings.xml +0 -6
- pelican_nlp-0.1.1/examples/PyPI_testing_fluency/.idea/misc.xml +0 -6
- pelican_nlp-0.1.1/examples/PyPI_testing_fluency/.idea/modules.xml +0 -8
- pelican_nlp-0.1.1/examples/PyPI_testing_fluency/.idea/pelican_testing.iml +0 -8
- pelican_nlp-0.1.1/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-01/image-description/sub-01_ses-01_image-description_drug.docx +0 -0
- pelican_nlp-0.1.1/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-01/image-description/sub-01_ses-01_image-description_placebo.docx +0 -0
- pelican_nlp-0.1.1/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-02/image-description/sub-01_ses-02_image-description_drug.docx +0 -0
- pelican_nlp-0.1.1/examples/PyPI_testing_image-descriptions/subjects/sub-01/ses-02/image-description/sub-01_ses-02_image-description_placebo.docx +0 -0
- pelican_nlp-0.1.1/examples/PyPI_testing_image-descriptions/subjects/sub-02/ses-01/image-description/sub-02_ses-01_image-description_drug.docx +0 -0
- pelican_nlp-0.1.1/examples/PyPI_testing_image-descriptions/subjects/sub-02/ses-01/image-description/sub-02_ses-01_image-description_placebo.docx +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/LICENSE +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_discourse/config_discourse.yml +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_fluency/config_fluency.yml +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_fluency/subjects/sub-01/fluency/sub-01_fluency_sem_animals.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_fluency/subjects/sub-01/fluency/sub-01_fluency_sem_clothes.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_fluency/subjects/sub-01/fluency/sub-01_fluency_sem_food.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_fluency/subjects/sub-02/fluency/sub-02_fluency_sem_animals.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_fluency/subjects/sub-02/fluency/sub-02_fluency_sem_clothes.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_fluency/subjects/sub-02/fluency/sub-02_fluency_sem_food.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/examples/PyPI_testing_image-descriptions/config_image-descriptions.yml +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/core/__init__.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/core/audio_document.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/core/corpus.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/core/document.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/core/subject.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/__init__.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/acoustic_feature_extraction.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/distance_from_randomness.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/extract_embeddings.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/extract_logits.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/language_model.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/semantic_similarity.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/test_documents/test_features.csv +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/test_documents/wallace_1.15_3.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/test_documents/wallace_1.1_3.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/extraction/test_documents/wallace_1_4.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/main.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/metrics_statistics/embeddings_metrics_statistics.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/LPDS.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/__init__.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/pipeline.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/speaker_diarization.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/text_cleaner.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/text_importer.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/text_normalizer.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/preprocessing/text_tokenizer.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/sample_configuration_files/config_discourse.yml +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/sample_configuration_files/config_fluency.yml +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/sample_configuration_files/config_general.yml +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/utils/__init__.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/utils/csv_functions.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/utils/sample_usage.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp/utils/setup_functions.py +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp.egg-info/dependency_links.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp.egg-info/requires.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/pelican_nlp.egg-info/top_level.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/requirements.txt +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/setup.cfg +0 -0
- {pelican_nlp-0.1.1 → pelican_nlp-0.1.3}/tests/__init__.py +0 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
include README.rst
|
2
|
+
include LICENSE
|
3
|
+
include requirements.txt
|
4
|
+
include pyproject.toml
|
5
|
+
|
6
|
+
# Include documentation
|
7
|
+
include CONTRIBUTING.md
|
8
|
+
include CHANGES
|
9
|
+
recursive-include docs *
|
10
|
+
|
11
|
+
# Include examples
|
12
|
+
recursive-include examples *.py *.yml *.yaml *.txt
|
13
|
+
|
14
|
+
# Include test files
|
15
|
+
recursive-include tests *.py
|
16
|
+
|
17
|
+
# Include package data
|
18
|
+
recursive-include pelican_nlp *.py *.json *.yml *.yaml
|
19
|
+
|
20
|
+
# Exclude cache and build files
|
21
|
+
global-exclude __pycache__
|
22
|
+
global-exclude *.py[cod]
|
23
|
+
global-exclude *.so
|
24
|
+
global-exclude .git*
|
25
|
+
global-exclude .DS_Store
|
26
|
+
|
27
|
+
exclude .gitignore
|
@@ -1,13 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pelican_nlp
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3
|
4
4
|
Summary: Preprocessing and Extraction of Linguistic Information for Computational Analysis
|
5
5
|
Author-email: Yves Pauli <yves.pauli@gmail.com>
|
6
6
|
License-Expression: CC-BY-NC-4.0
|
7
|
-
Project-URL: Homepage, https://github.com/ypauli/
|
8
|
-
Project-URL: Repository, https://github.com/ypauli/
|
9
|
-
Project-URL: Documentation, https://github.com/ypauli/
|
10
|
-
Project-URL: Bug Tracker, https://github.com/ypauli/
|
7
|
+
Project-URL: Homepage, https://github.com/ypauli/pelican_nlp
|
8
|
+
Project-URL: Repository, https://github.com/ypauli/pelican_nlp
|
9
|
+
Project-URL: Documentation, https://github.com/ypauli/pelican_nlp#readme
|
10
|
+
Project-URL: Bug Tracker, https://github.com/ypauli/pelican_nlp/issues
|
11
11
|
Keywords: nlp,linguistics,preprocessing,language-processing,text-analysis
|
12
12
|
Classifier: Development Status :: 1 - Planning
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
@@ -48,18 +48,18 @@ Dynamic: license-file
|
|
48
48
|
PELICAN_nlp
|
49
49
|
====================================
|
50
50
|
|
51
|
-
|
51
|
+
pelican_nlp stands for "Preprocessing and Extraction of Linguistic Information for Computational Analysis - Natural Language Processing". This package enables the creation of standardized and reproducible language processing pipelines, extracting linguistic features from various tasks like discourse, fluency, and image descriptions.
|
52
52
|
|
53
53
|
.. image:: https://img.shields.io/pypi/v/package-name.svg
|
54
|
-
:target: https://pypi.org/project/
|
54
|
+
:target: https://pypi.org/project/pelican_nlp/
|
55
55
|
:alt: PyPI version
|
56
56
|
|
57
57
|
.. image:: https://img.shields.io/github/license/username/package-name.svg
|
58
|
-
:target: https://github.com/ypauli/
|
58
|
+
:target: https://github.com/ypauli/pelican_nlp/blob/main/LICENSE
|
59
59
|
:alt: License
|
60
60
|
|
61
61
|
.. image:: https://img.shields.io/pypi/pyversions/package-name.svg
|
62
|
-
:target: https://pypi.org/project/
|
62
|
+
:target: https://pypi.org/project/pelican_nlp/
|
63
63
|
:alt: Supported Python Versions
|
64
64
|
|
65
65
|
Installation
|
@@ -75,7 +75,7 @@ For the latest development version:
|
|
75
75
|
|
76
76
|
.. code-block:: bash
|
77
77
|
|
78
|
-
pip install
|
78
|
+
pip install https://github.com/ypauli/pelican_nlp/releases/tag/v0.1.2-alpha
|
79
79
|
|
80
80
|
Usage
|
81
81
|
=====
|
@@ -107,7 +107,8 @@ Text and audio files should follow this naming convention:
|
|
107
107
|
- extension: file extension (e.g., txt / pdf / docx / rtf), mandatory
|
108
108
|
|
109
109
|
Example filenames:
|
110
|
-
|
110
|
+
|
111
|
+
- sub-01_interview_schizophrenia.rtf
|
111
112
|
- sub-03_ses-02_fluency_semantic_animals.docx
|
112
113
|
|
113
114
|
To optimize performance, close other programs and limit GPU usage during language processing.
|
@@ -124,15 +125,15 @@ Features
|
|
124
125
|
Examples
|
125
126
|
========
|
126
127
|
|
127
|
-
You can find example setups in the [`examples/`](https://github.com/ypauli/
|
128
|
+
You can find example setups in the [`examples/`](https://github.com/ypauli/pelican_nlp/examples) folder.
|
128
129
|
ALWAYS change the path to the project folder specified in the configuration file to your specific project location.
|
129
130
|
|
130
131
|
Contributing
|
131
132
|
============
|
132
133
|
|
133
|
-
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/
|
134
|
+
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/pelican_nlp/blob/main/CONTRIBUTING.md>`_.
|
134
135
|
|
135
136
|
License
|
136
137
|
=======
|
137
138
|
|
138
|
-
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/
|
139
|
+
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/pelican_nlp/blob/main/LICENSE>`_ file for details.
|
@@ -2,18 +2,18 @@
|
|
2
2
|
PELICAN_nlp
|
3
3
|
====================================
|
4
4
|
|
5
|
-
|
5
|
+
pelican_nlp stands for "Preprocessing and Extraction of Linguistic Information for Computational Analysis - Natural Language Processing". This package enables the creation of standardized and reproducible language processing pipelines, extracting linguistic features from various tasks like discourse, fluency, and image descriptions.
|
6
6
|
|
7
7
|
.. image:: https://img.shields.io/pypi/v/package-name.svg
|
8
|
-
:target: https://pypi.org/project/
|
8
|
+
:target: https://pypi.org/project/pelican_nlp/
|
9
9
|
:alt: PyPI version
|
10
10
|
|
11
11
|
.. image:: https://img.shields.io/github/license/username/package-name.svg
|
12
|
-
:target: https://github.com/ypauli/
|
12
|
+
:target: https://github.com/ypauli/pelican_nlp/blob/main/LICENSE
|
13
13
|
:alt: License
|
14
14
|
|
15
15
|
.. image:: https://img.shields.io/pypi/pyversions/package-name.svg
|
16
|
-
:target: https://pypi.org/project/
|
16
|
+
:target: https://pypi.org/project/pelican_nlp/
|
17
17
|
:alt: Supported Python Versions
|
18
18
|
|
19
19
|
Installation
|
@@ -29,7 +29,7 @@ For the latest development version:
|
|
29
29
|
|
30
30
|
.. code-block:: bash
|
31
31
|
|
32
|
-
pip install
|
32
|
+
pip install https://github.com/ypauli/pelican_nlp/releases/tag/v0.1.2-alpha
|
33
33
|
|
34
34
|
Usage
|
35
35
|
=====
|
@@ -61,7 +61,8 @@ Text and audio files should follow this naming convention:
|
|
61
61
|
- extension: file extension (e.g., txt / pdf / docx / rtf), mandatory
|
62
62
|
|
63
63
|
Example filenames:
|
64
|
-
|
64
|
+
|
65
|
+
- sub-01_interview_schizophrenia.rtf
|
65
66
|
- sub-03_ses-02_fluency_semantic_animals.docx
|
66
67
|
|
67
68
|
To optimize performance, close other programs and limit GPU usage during language processing.
|
@@ -78,15 +79,15 @@ Features
|
|
78
79
|
Examples
|
79
80
|
========
|
80
81
|
|
81
|
-
You can find example setups in the [`examples/`](https://github.com/ypauli/
|
82
|
+
You can find example setups in the [`examples/`](https://github.com/ypauli/pelican_nlp/examples) folder.
|
82
83
|
ALWAYS change the path to the project folder specified in the configuration file to your specific project location.
|
83
84
|
|
84
85
|
Contributing
|
85
86
|
============
|
86
87
|
|
87
|
-
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/
|
88
|
+
Contributions are welcome! Please check out the `contributing guide <https://github.com/ypauli/pelican_nlp/blob/main/CONTRIBUTING.md>`_.
|
88
89
|
|
89
90
|
License
|
90
91
|
=======
|
91
92
|
|
92
|
-
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/
|
93
|
+
This project is licensed under Attribution-NonCommercial 4.0 International. See the `LICENSE <https://github.com/ypauli/pelican_nlp/blob/main/LICENSE>`_ file for details.
|
@@ -0,0 +1,40 @@
|
|
1
|
+
{\rtf1\ansi\deff3\adeflang1025
|
2
|
+
{\fonttbl{\f0\froman\fprq2\fcharset0 Times New Roman;}{\f1\froman\fprq2\fcharset2 Symbol;}{\f2\fswiss\fprq2\fcharset0 Arial;}{\f3\froman\fprq2\fcharset0 Liberation Serif{\*\falt Times New Roman};}{\f4\froman\fprq2\fcharset0 Arial;}{\f5\froman\fprq2\fcharset0 Liberation Sans{\*\falt Arial};}{\f6\fnil\fprq2\fcharset0 Noto Sans CJK SC;}{\f7\fnil\fprq2\fcharset0 0;}{\f8\fnil\fprq2\fcharset0 Noto Sans Devanagari;}}
|
3
|
+
{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}
|
4
|
+
{\stylesheet{\s0\snext0\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\nowidctlpar\hyphpar1\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052 Normal;}
|
5
|
+
{\*\cs15\snext15 Footnote Characters;}
|
6
|
+
{\*\cs16\snext16\rtlch\ab \ltrch\loch\b Strong;}
|
7
|
+
{\s17\sbasedon0\snext18\rtlch\af8\afs28\alang1081 \ltrch\lang1033\langfe2052\hich\af5\loch\ql\nowidctlpar\hyphpar1\sb240\sa120\keepn\ltrpar\cf0\f5\fs28\lang1033\kerning1\dbch\af6\langfe2052 Heading;}
|
8
|
+
{\s18\sbasedon0\snext18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052 Body Text;}
|
9
|
+
{\s19\sbasedon18\snext19\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052 List;}
|
10
|
+
{\s20\sbasedon0\snext20\rtlch\af8\afs24\alang1081\ai \ltrch\lang1033\langfe2052\hich\af3\loch\ql\nowidctlpar\hyphpar1\sb120\sa120\ltrpar\cf0\f3\fs24\lang1033\i\kerning1\dbch\af7\langfe2052 Caption;}
|
11
|
+
{\s21\sbasedon0\snext21\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\nowidctlpar\hyphpar1\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052 Index;}
|
12
|
+
}{\*\generator LibreOffice/24.2.7.2$Linux_X86_64 LibreOffice_project/420$Build-2}{\info{\title 648866ebdbd870441d179a92}{\author Thomas Luthi-Bhatti}{\creatim\yr2023\mo6\dy14\hr17\min6}{\revtim\yr2025\mo4\dy8\hr13\min51}{\printim\yr0\mo0\dy0\hr0\min0}}{\*\userprops{\propname Operator}\proptype30{\staticval Ulrike Rachner}}\deftab720
|
13
|
+
\hyphauto1\viewscale100\formshade\paperh16838\paperw11906\margl1417\margr1417\margt1417\margb1398\sectd\sbknone\sftnnar\saftnnrlc\sectunlocked1\pgwsxn11906\pghsxn16838\marglsxn1417\margrsxn1417\margtsxn1417\margbsxn1398\ftnbj\ftnstart1\ftnrestart\ftnnar\aenddoc\aftnrstcont\aftnstart1\aftnnrlc
|
14
|
+
{\*\ftnsep\chftnsep}\pgndec\pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052\sl100\slmult0\qc\hyphpar0\fi0\li0\lin0\ri0\rin0\sb238\sa0{\hich\af4\loch\cs16\rtlch\ab \ltrch\loch\b\fs22\lang1031\f4\loch
|
15
|
+
Interview with Interviewee}
|
16
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052\sl100\slmult0\qc\hyphpar0\fi0\li0\lin0\ri0\rin0\sb238\sa0\loch
|
17
|
+
|
18
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
19
|
+
I: Das ist f\u252\'fcr mich. Ich m\u246\'f6chte, dass Sie \u252\'fcber ein paar Dinge aus Ihrem t\u228\'e4glichen Leben sprechen. Sie m\u252\'fcssen (keinerlei?) Namen nennen, w\u228\'e4hrend Sie dieses Ereignis beschreiben. K\u246\'f6nnen Sie mir ein wenig \u252\'fcber sich erz\u228\'e4hlen? #00:00:14-00#\line B: (In Schriftsprache.) Ja, nat\u252\'fcrlich. Jeden Morgen beginne ich den Tag mit einer Tasse Tee. Ich bin jemand, der viel Wert auf eine ruhige Morgenroutine legt. Es ist f\u252\'fcr mich sehr wichtig, dass der Start in den Tag entspannt und nicht hektisch ist. Oft lese ich auch ein paar Seiten in einem Buch, das ich gerade lese. Danach gehe ich meistens zur Arbeit, entweder ins B\u252\'fcro oder arbeite von zu Hause aus. Mein Job ist sehr abwechslungsreich, und es gef\u228\'e4llt mir, immer neue Herausforderungen zu haben. Am Nachmittag gehe ich oft spazieren oder treffe mich mit Freunden. Ein gutes Gespr\u228\'e4ch oder eine kleine Wanderung in der Natur tut mir immer sehr gut. Am Abend koche ich gerne etwas Leckeres und entspanne mich beim Fernsehen oder h\u246\'f6re Musik. #00:00:51-00#}
|
20
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
21
|
+
I: Wenn Sie zur\u252\'fcckdenken, k\u246\'f6nnen Sie mir eine Geschichte \u252\'fcber etwas Wichtiges erz\u228\'e4hlen, das in Ihrem Leben passiert ist? Die Geschichte kann aus einer beliebigen Zeit Ihres Lebens stammen, aus Ihrer Kindheit oder auch vor Kurzem. Sie brauchen keine Namen zu nennen, wenn Sie dieses Ereignis beschreiben. #00:04:19-00#\line B: Ich erinnere mich an eine Zeit, als ich mit meiner Familie in einem kleinen Dorf auf einem Berg war. Es war ein Winterwochenende, und wir hatten viel Schnee. An diesem Tag sind wir alle zusammen mit Schlitten den Hang hinuntergefahren. Es war eine sehr lustige Erfahrung, weil wir alle wie Kinder waren, trotz des Alters. Aber was mir wirklich in Erinnerung geblieben ist, war, dass ich mich nach diesem Tag viel n\u228\'e4her mit meiner Familie verbunden f\u252\'fchlte. Es war ein Moment, in dem wir uns alle unterst\u252\'fctzt und gemeinsam gelacht haben, was damals sehr wichtig f\u252\'fcr mich war. Diese Momente mit der Familie sind f\u252\'fcr mich unersetzlich. #00:05:42-00#}
|
22
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
23
|
+
I: Ich m\u246\'f6chte Sie nun bitten, dass Sie ein wenig \u252\'fcber Ihre Gesundheit sprechen. Sie brauchen keine Namen zu nennen. Glauben Sie, dass Sie eine psychische Krankheit haben? Und wenn ja, worum handelt es sich Ihrer Meinung nach? #00:06:03-00#\line B: In letzter Zeit habe ich mich mehr mit meiner mentalen Gesundheit besch\u228\'e4ftigt. Ich w\u252\'fcrde sagen, dass ich in einer stabilen psychischen Verfassung bin, aber es gibt Momente, in denen ich mich \u252\'fcberfordert f\u252\'fchle. Gerade in stressigen Phasen merke ich, dass es schwieriger f\u252\'fcr mich ist, den Kopf klar zu behalten. Aber ich versuche, mir Hilfe zu suchen und achte sehr darauf, auf mich selbst zu h\u246\'f6ren. Es gibt Phasen, in denen ich das Gef\u252\'fchl habe, dass ich eine kurze Auszeit brauche, um mich wieder zu sortieren. Aber insgesamt denke ich, dass ich psychisch gesund bin, solange ich mir genug Zeit f\u252\'fcr mich nehme. #00:06:48-00#}
|
24
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
25
|
+
I: Und wurde Ihnen eine Diagnose gestellt? #00:07:03-00#\line B: Nein, bisher nicht. #00:07:04-00#}
|
26
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
27
|
+
I: Danke. Okay. Ich werde Ihnen jetzt drei Bilder zeigen, und eins nach dem anderen. Jedes Mal, wenn ich das Bild vor Sie lege, m\u246\'f6chte ich Sie bitten, das Bild so vollst\u228\'e4ndig wie m\u246\'f6glich zu beschreiben. Sagen Sie mir, was Sie auf dem Bild sehen und was Ihrer Meinung nach passieren k\u246\'f6nnte. Bitte sprechen Sie, bis ich Stopp sage. (...) Bild Nummer zwei. Bitte sagen Sie, was Sie auf diesem Bild sehen. #00:09:10-00#\line B: (Startet in Schriftsprache.) Auf diesem Bild sieht man eine Gruppe von Personen, die auf einem Markt stehen. Es ist ein lebhafter Ort, mit vielen St\u228\'e4nden und bunten Waren. In der Mitte sieht man eine \u228\'e4ltere Frau, die gerade eine Melone ausw\u228\'e4hlt. Sie tr\u228\'e4gt eine einfache, aber stilvolle Kleidung. Links sieht man einen jungen Mann, der mit einem Verk\u228\'e4ufer spricht, der gerade Tomaten in eine T\u252\'fcte packt. Im Hintergrund sieht man weitere Marktst\u228\'e4nde, die mit Obst und Gem\u252\'fcse voll sind. Der Himmel ist bew\u246\'f6lkt, und es sieht aus, als w\u252\'fcrde es bald regnen. Es scheint ein sch\u246\'f6ner, aber auch sehr besch\u228\'e4ftigter Tag zu sein. #00:10:37-00#}
|
28
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
29
|
+
I: Danke sch\u246\'f6n. Bild Nummer vier. Was passiert auf diesem Bild? Oder was sehen Sie auf diesem Bild? #00:10:46-00#\line B: Auf diesem Bild sieht man einen Mann und eine Frau, die zusammen auf einer Bank sitzen. Der Mann ist in einem Anzug und schaut auf sein Handy. Die Frau tr\u228\'e4gt ein sommerliches Kleid und schaut nachdenklich in die Ferne. Sie scheint in einer anderen Welt zu sein, w\u228\'e4hrend der Mann abgelenkt ist. Im Hintergrund ist ein Park zu sehen, mit B\u228\'e4umen und einem kleinen See. Die Stimmung wirkt ein bisschen melancholisch, als ob beide Menschen in Gedanken versunken sind. Es scheint, als ob sie ein Gespr\u228\'e4ch f\u252\'fchren, aber jeder ist in seiner eigenen Welt. #00:12:00-00#}
|
30
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
31
|
+
I: Danke sch\u246\'f6n. Und Bild Nummer 17GF, was sehen Sie auf diesem Bild? #00:12:09-00#\line B: Auf diesem Bild sieht man einen alten Leuchtturm, der auf einem Felsen \u252\'fcber dem Meer thront. Der Himmel ist dramatisch, mit dunklen Wolken und einer Art Sturmstimmung. Das Meer ist unruhig und st\u252\'fcrmisch, und man sieht die Wellen gegen den Felsen schlagen. In der N\u228\'e4he des Leuchtturms ist ein kleiner, alter Kutter zu sehen, der versucht, gegen die Wellen anzukommen. Es wirkt wie eine dramatische Szene, bei der der Leuchtturm als Rettungsanker in dieser st\u252\'fcrmischen See dient. Der Leuchtturm strahlt ein warmes Licht aus, das den Kutter zu f\u252\'fchren scheint. #00:13:23-00#}
|
32
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
33
|
+
I: Danke. Gut, ich werde Ihnen nun einige Bilder aus einer Geschichte zeigen. Sie k\u246\'f6nnen sich so viel Zeit nehmen, wie Sie brauchen, um die Bilder anzuschauen. Nachdem Sie alle Bilder der Reihe nach angesehen haben, m\u246\'f6chte ich Sie bitten, mir die Geschichten auf den Bildern in Ihren eigenen Worten zu erz\u228\'e4hlen. Das ist die Geschichte. #00:13:47-00#\line B: Ich habe in der Kindheit oft getr\u228\'e4umt, dass ich in einem Wald unterwegs war. #00:15:59-00#}
|
34
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
35
|
+
I: Im Wald? #00:15:56-00#\line B: Ja, genau. Ich war als Kind oft drau\u223\'dfen in den W\u228\'e4ldern, und in meinen Tr\u228\'e4umen bin ich immer tiefer in den Wald gegangen. Eines Tages kam ich an einen kleinen Bach, der durch den Wald floss. Der Bach war klar und das Wasser funkelte im Sonnenlicht. Ich sa\u223\'df dort und beobachtete die Fische, die durch das Wasser schwammen. Es war sehr ruhig, und ich f\u252\'fchlte mich v\u246\'f6llig friedlich. In diesem Moment hatte ich das Gef\u252\'fchl, dass ich ein Teil der Natur war und mit der Welt um mich herum eins. Es war ein sch\u246\'f6ner, friedlicher Traum, der mir auch als Erwachsener oft in den Sinn kommt. #00:16:44-00#}
|
36
|
+
\par \pard\plain \s18\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\sl276\slmult1\nowidctlpar\hyphpar1\sb0\sa140\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052{\loch
|
37
|
+
I: Gut. Jetzt haben Sie eine Minute Zeit, um den Text durchzugehen, danach bitte ich Sie, das Blatt wegzulegen und mir die Geschichte in eigenen Worten zu erz\u228\'e4hlen. #00:17:51-00#\line B: Okay. #00:17:52-00#\line (Stille. B liest. #00:17:52-00# - #00:19:13-00#)\line B: (in Schriftsprache.) Die Geschichte handelt von einer kleinen Katze, die an einem sehr hei\u223\'dfen Tag im Schatten eines Baumes schl\u228\'e4ft. Sie tr\u228\'e4umt von einem k\u252\'fchlen Teich, an dem sie trinken kann. Als sie aufwacht, ist der Teich nicht mehr weit, und die Katze folgt einem Schmetterling, der sie zu einem geheimen, versteckten Ort f\u252\'fchrt. Der Teich ist klar, und die Katze kann endlich ihren Durst l\u246\'f6schen. Sie ist sehr zufrieden und kehrt sp\u228\'e4ter zur\u252\'fcck zu ihrem Baum, um sich wieder auszuruhen. #00:20:09-00#\line I: Danke sch\u246\'f6n.}
|
38
|
+
\par \pard\plain \s0\rtlch\af8\afs24\alang1081 \ltrch\lang1033\langfe2052\hich\af3\loch\ql\nowidctlpar\hyphpar1\ltrpar\cf0\f3\fs24\lang1033\kerning1\dbch\af7\langfe2052\sl100\slmult0\qc\hyphpar0\fi0\li0\lin0\ri0\rin0\sb238\sa0\loch
|
39
|
+
|
40
|
+
\par }
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
File without changes
|
@@ -0,0 +1,274 @@
|
|
1
|
+
#import pandas as pd
|
2
|
+
#import numpy as np
|
3
|
+
#from pydub import AudioSegment
|
4
|
+
#from pyannote.audio import Model, Inference
|
5
|
+
|
6
|
+
class AudioFeatureExtractor:
|
7
|
+
def __init__(self, model_name, token, device="cpu"):
|
8
|
+
"""
|
9
|
+
Initializes the AudioFeatureExtractor class.
|
10
|
+
|
11
|
+
Parameters:
|
12
|
+
- model_name: str, name of the pretrained model_instance from pyannote
|
13
|
+
- token: str, the Hugging Face authentication token for downloading the model_instance
|
14
|
+
- device: str, device to run the model_instance on (default is "cpu")
|
15
|
+
"""
|
16
|
+
self.model = Model.from_pretrained(model_name, use_auth_token=token).to(device)
|
17
|
+
|
18
|
+
def extract_audio_window(self, audio, start_time=0, duration=60000):
|
19
|
+
"""
|
20
|
+
Extract a segment from the audio starting at `start_time` with a specified duration.
|
21
|
+
|
22
|
+
Parameters:
|
23
|
+
- audio: AudioSegment object, the input audio
|
24
|
+
- start_time: int, starting point of the window in milliseconds (default is 0)
|
25
|
+
- duration: int, duration of the window to extract in milliseconds (default is 60000)
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
- AudioSegment object of the extracted window
|
29
|
+
"""
|
30
|
+
end_time = start_time + duration
|
31
|
+
return audio[start_time:end_time]
|
32
|
+
|
33
|
+
def extract_embeddings(self, inference, file_path):
|
34
|
+
"""
|
35
|
+
Extract embeddings from an audio file using the inference model_instance.
|
36
|
+
|
37
|
+
Parameters:
|
38
|
+
- inference: Inference object from pyannote
|
39
|
+
- file_path: str, path to the audio file
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
- numpy array of embeddings
|
43
|
+
"""
|
44
|
+
embeddings = inference(file_path)
|
45
|
+
return np.asarray(embeddings)
|
46
|
+
|
47
|
+
def process_audio(self, file_path, mode="whole", start_time=0, duration=60000, window_step=None):
|
48
|
+
"""
|
49
|
+
Process an audio file, extracting either whole or windowed embeddings based on mode.
|
50
|
+
|
51
|
+
Parameters:
|
52
|
+
- file_path: str, path to the audio file
|
53
|
+
- mode: str, "whole" for whole file extraction or "window" for windowed extraction (default is "whole")
|
54
|
+
- start_time: int, start time for the audio segment in milliseconds (only for window mode, default is 0)
|
55
|
+
- duration: int, duration for the audio segment in milliseconds (default is 60000)
|
56
|
+
- window_step: int, step size for window extraction in milliseconds (only for "window" mode)
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
- numpy array of embeddings
|
60
|
+
"""
|
61
|
+
audio = AudioSegment.from_file(file_path)
|
62
|
+
if mode == "whole":
|
63
|
+
inference = Inference(self.model, window="whole")
|
64
|
+
embeddings = self.extract_embeddings(inference, file_path)
|
65
|
+
elif mode == "window":
|
66
|
+
# If window mode is specified, we extract in a sliding window fashion
|
67
|
+
embeddings = []
|
68
|
+
inference = Inference(self.model, window="sliding", duration=duration, step=window_step)
|
69
|
+
|
70
|
+
# Split audio into windows and extract embeddings for each window
|
71
|
+
for i in range(0, len(audio), window_step):
|
72
|
+
window_audio = self.extract_audio_window(audio, start_time=i, duration=duration)
|
73
|
+
temp_path = f"temp_window_{i}.wav"
|
74
|
+
window_audio.export(temp_path, format="wav")
|
75
|
+
window_embeddings = self.extract_embeddings(inference, temp_path)
|
76
|
+
embeddings.append(window_embeddings)
|
77
|
+
os.remove(temp_path)
|
78
|
+
embeddings = np.vstack(embeddings) # Stack all window embeddings
|
79
|
+
else:
|
80
|
+
raise ValueError("Invalid mode. Use 'whole' or 'window'.")
|
81
|
+
|
82
|
+
return embeddings
|
83
|
+
|
84
|
+
def save_embeddings(self, embeddings, output_path):
|
85
|
+
"""
|
86
|
+
Save the embeddings to a CSV file.
|
87
|
+
|
88
|
+
Parameters:
|
89
|
+
- embeddings: numpy array of embeddings
|
90
|
+
- output_path: str, path to save the CSV file
|
91
|
+
"""
|
92
|
+
df = pd.DataFrame(embeddings)
|
93
|
+
df.to_csv(output_path, index=False)
|
94
|
+
|
95
|
+
# Example usage:
|
96
|
+
if __name__ == "__main__":
|
97
|
+
# Initialize the extractor
|
98
|
+
extractor = AudioFeatureExtractor(
|
99
|
+
model_name="pyannote/embedding",
|
100
|
+
token="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",
|
101
|
+
device="mps"
|
102
|
+
)
|
103
|
+
|
104
|
+
# Process a whole file
|
105
|
+
whole_embeddings = extractor.process_audio(
|
106
|
+
file_path="path/to/audio_file.wav",
|
107
|
+
mode="whole"
|
108
|
+
)
|
109
|
+
|
110
|
+
# Process a file using sliding window extraction
|
111
|
+
window_embeddings = extractor.process_audio(
|
112
|
+
file_path="path/to/audio_file.wav",
|
113
|
+
mode="window",
|
114
|
+
start_time=0,
|
115
|
+
duration=10000, # e.g., 10 seconds window
|
116
|
+
window_step=5000 # e.g., 5 seconds step
|
117
|
+
)
|
118
|
+
|
119
|
+
# Save the embeddings
|
120
|
+
extractor.save_embeddings(whole_embeddings, "path/to/output_whole.csv")
|
121
|
+
extractor.save_embeddings(window_embeddings, "path/to/output_window.csv")
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
'''import os
|
126
|
+
import numpy as np
|
127
|
+
from pydub import AudioSegment
|
128
|
+
from pyannote.audio import Model, Inference'''
|
129
|
+
|
130
|
+
|
131
|
+
class AudioFeatureExtractor:
|
132
|
+
def __init__(self, model_name_or_instance, device="cpu", use_auth_token=None):
|
133
|
+
"""
|
134
|
+
Initializes the AudioFeatureExtractor class.
|
135
|
+
|
136
|
+
Parameters:
|
137
|
+
- model_name_or_instance: str or Model, the name of the pretrained model_instance from pyannote or an instance of Model
|
138
|
+
- device: str, device to run the model_instance on (default is "cpu")
|
139
|
+
- use_auth_token: str, Hugging Face authentication token if required
|
140
|
+
"""
|
141
|
+
if isinstance(model_name_or_instance, str):
|
142
|
+
self.model = Model.from_pretrained(
|
143
|
+
model_name_or_instance, use_auth_token=use_auth_token
|
144
|
+
).to(device)
|
145
|
+
else:
|
146
|
+
self.model = model_name_or_instance.to(device)
|
147
|
+
self.device = device
|
148
|
+
|
149
|
+
def extract_audio_window(self, audio, start_time=0, duration=None):
|
150
|
+
"""
|
151
|
+
Extract a segment from the audio starting at 'start_time' with a specified 'duration'.
|
152
|
+
|
153
|
+
Parameters:
|
154
|
+
- audio: AudioSegment object, the input audio
|
155
|
+
- start_time: int, starting point of the window in milliseconds (default is 0)
|
156
|
+
- duration: int, duration of the window to extract in milliseconds (default is None, till the end)
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
- AudioSegment object of the extracted window
|
160
|
+
"""
|
161
|
+
if duration is None:
|
162
|
+
duration = len(audio) - start_time
|
163
|
+
end_time = start_time + duration
|
164
|
+
return audio[start_time:end_time]
|
165
|
+
|
166
|
+
def extract_embeddings(self, inference, file_path):
|
167
|
+
"""
|
168
|
+
Extract embeddings from the audio file using the specified inference model_instance.
|
169
|
+
|
170
|
+
Parameters:
|
171
|
+
- inference: Inference object from pyannote
|
172
|
+
- file_path: str, path to the audio file
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
- numpy array of embeddings
|
176
|
+
"""
|
177
|
+
embeddings = inference(file_path)
|
178
|
+
return np.asarray(embeddings)
|
179
|
+
|
180
|
+
def process_audio(self, file_path, mode="whole", window_duration=None, window_step=None, start_time=0, end_time=None):
|
181
|
+
"""
|
182
|
+
Process an audio file, extracting embeddings based on the specified mode.
|
183
|
+
|
184
|
+
Parameters:
|
185
|
+
- file_path: str, path to the audio file
|
186
|
+
- mode: str, "whole" for whole file extraction or "windowed" for windowed extraction (default is "whole")
|
187
|
+
- window_duration: int, duration of the window in milliseconds (required for "windowed" mode)
|
188
|
+
- window_step: int, step size in milliseconds between windows (required for "windowed" mode)
|
189
|
+
- start_time: int, start time in milliseconds for processing (default is 0)
|
190
|
+
- end_time: int, end time in milliseconds for processing (default is None, till the end)
|
191
|
+
|
192
|
+
Returns:
|
193
|
+
- numpy array of embeddings
|
194
|
+
"""
|
195
|
+
# Load and optionally trim the audio file
|
196
|
+
audio = AudioSegment.from_file(file_path)
|
197
|
+
if end_time is None or end_time > len(audio):
|
198
|
+
end_time = len(audio)
|
199
|
+
audio = audio[start_time:end_time]
|
200
|
+
|
201
|
+
# Export the (possibly trimmed) audio to a temporary file
|
202
|
+
temp_dir = "temp_audio"
|
203
|
+
os.makedirs(temp_dir, exist_ok=True)
|
204
|
+
temp_path = os.path.join(temp_dir, "temp_audio.wav")
|
205
|
+
audio.export(temp_path, format="wav")
|
206
|
+
|
207
|
+
if mode == "whole":
|
208
|
+
inference = Inference(self.model, window="whole")
|
209
|
+
embeddings = self.extract_embeddings(inference, temp_path)
|
210
|
+
elif mode == "windowed":
|
211
|
+
if window_duration is None or window_step is None:
|
212
|
+
raise ValueError("window_duration and window_step must be specified for 'windowed' mode.")
|
213
|
+
# Convert milliseconds to seconds for pyannote
|
214
|
+
window_duration_sec = window_duration / 1000.0
|
215
|
+
window_step_sec = window_step / 1000.0
|
216
|
+
inference = Inference(
|
217
|
+
self.model,
|
218
|
+
window="sliding",
|
219
|
+
duration=window_duration_sec,
|
220
|
+
step=window_step_sec
|
221
|
+
)
|
222
|
+
embeddings = self.extract_embeddings(inference, temp_path)
|
223
|
+
else:
|
224
|
+
os.remove(temp_path)
|
225
|
+
raise ValueError("Invalid mode. Use 'whole' or 'windowed'.")
|
226
|
+
|
227
|
+
# Clean up temporary file
|
228
|
+
os.remove(temp_path)
|
229
|
+
return embeddings
|
230
|
+
|
231
|
+
def save_embeddings(self, embeddings, output_path):
|
232
|
+
"""
|
233
|
+
Save the embeddings to a file.
|
234
|
+
|
235
|
+
Parameters:
|
236
|
+
- embeddings: numpy array, the embeddings to save
|
237
|
+
- output_path: str, the path where embeddings will be saved
|
238
|
+
"""
|
239
|
+
np.save(output_path, embeddings)
|
240
|
+
# Alternatively, to save as CSV:
|
241
|
+
# np.savetxt(output_path, embeddings, delimiter=",")
|
242
|
+
|
243
|
+
|
244
|
+
# Example usage:
|
245
|
+
if __name__ == "__main__":
|
246
|
+
# Initialize the extractor with a model_instance name and token if required
|
247
|
+
extractor = AudioFeatureExtractor(
|
248
|
+
model_name_or_instance="pyannote/embedding",
|
249
|
+
device="cpu",
|
250
|
+
use_auth_token="YOUR_HUGGING_FACE_TOKEN" # Replace with your token if necessary
|
251
|
+
)
|
252
|
+
|
253
|
+
# Path to your audio file
|
254
|
+
audio_file_path = "path/to/your/audio_file.wav"
|
255
|
+
|
256
|
+
# Extract embeddings from the whole audio file
|
257
|
+
whole_embeddings = extractor.process_audio(
|
258
|
+
file_path=audio_file_path,
|
259
|
+
mode="whole"
|
260
|
+
)
|
261
|
+
|
262
|
+
# Save the embeddings
|
263
|
+
extractor.save_embeddings(whole_embeddings, "whole_embeddings.npy")
|
264
|
+
|
265
|
+
# Extract embeddings using sliding windows
|
266
|
+
windowed_embeddings = extractor.process_audio(
|
267
|
+
file_path=audio_file_path,
|
268
|
+
mode="windowed",
|
269
|
+
window_duration=5000, # Window duration in milliseconds (e.g., 5000 ms = 5 seconds)
|
270
|
+
window_step=1000 # Window step in milliseconds (e.g., 1000 ms = 1 second)
|
271
|
+
)
|
272
|
+
|
273
|
+
# Save the windowed embeddings
|
274
|
+
extractor.save_embeddings(windowed_embeddings, "windowed_embeddings.npy")
|
File without changes
|