PyPI - pelican-nlp - Versions diffs - 0.2.6__tar.gz → 0.3.0__tar.gz - Mend

pelican-nlp 0.2.6tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

{pelican_nlp-0.2.6/pelican_nlp.egg-info → pelican_nlp-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pelican_nlp
-Version: 0.2.6
+Version: 0.3.0
 Summary: Preprocessing and Extraction of Linguistic Information for Computational Analysis
 Author-email: Yves Pauli <yves.pauli@gmail.com>
 License-Expression: CC-BY-NC-4.0
@@ -69,7 +69,7 @@ Create conda environment
 .. code-block:: bash
-    conda create -n pelican-nlp python=3.10
+    conda create -n pelican-nlp -c defaults python=3.10
 Activate environment
@@ -157,7 +157,7 @@ Features
 Examples
 ========
-You can find example setups on the github repository in the 'examples` folder: https://github.com/ypauli/pelican_nlp/tree/main/examples
+You can find example setups on the github repository in the `examples <https://github.com/ypauli/pelican_nlp/tree/main/examples>`_ folder:
 Contributing
 ============

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/README.rst RENAMED Viewed

@@ -23,7 +23,7 @@ Create conda environment
 .. code-block:: bash
-    conda create -n pelican-nlp python=3.10
+    conda create -n pelican-nlp -c defaults python=3.10
 Activate environment
@@ -111,7 +111,7 @@ Features
 Examples
 ========
-You can find example setups on the github repository in the 'examples` folder: https://github.com/ypauli/pelican_nlp/tree/main/examples
+You can find example setups on the github repository in the `examples <https://github.com/ypauli/pelican_nlp/tree/main/examples>`_ folder:
 Contributing
 ============

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/examples/PyPI_testing_discourse/config_discourse.yml RENAMED Viewed

@@ -5,7 +5,6 @@ discourse: &discourse_flag true
 #=====================================
 #general configurations; always adapt
-PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/PyPI_testing_discourse"
 language: "german" # Possibly add options for German and English
 task_name: "interview" # Give name of task used for creation of the input file (e.g., ['fluency', 'interview'])

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/examples/PyPI_testing_image-descriptions/config_image-descriptions.yml RENAMED Viewed

@@ -4,7 +4,6 @@
 # Basic Settings
 # -------------
 input_file: "text"  # Options: 'text' or 'audio'
-PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/PyPI_testing_image-descriptions"
 language: "german"  # Options: 'german', 'english'
 recompute_everything: true  # If false, reuses previously computed results

pelican_nlp-0.3.0/pelican_nlp/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.3.0"

pelican_nlp-0.3.0/pelican_nlp/config.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+Global configuration settings for the Pelican project.
+This file is not the configuration.yml file created for the users adaptations.
+For consistency of pipeline, DO NOT CHANGE.
+"""
+# Debug flag
+DEBUG_MODE = False
+def debug_print(*args, **kwargs):
+    """Print only if debug mode is enabled."""
+    if DEBUG_MODE:
+        print(*args, **kwargs)

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/pelican_nlp/core/corpus.py RENAMED Viewed

@@ -15,20 +15,24 @@ import os
 import pandas as pd
 import re
+from pelican_nlp.config import debug_print
 class Corpus:
     def __init__(self, corpus_name, documents, configuration_settings, project_folder):
         self.name = corpus_name
+        self.key = corpus_name.split('-')[0]
+        self.value = corpus_name.split('-')[1]
         self.documents = documents
         self.config = configuration_settings
         self.project_folder = project_folder
-        self.derivative_dir = project_folder / 'derivatives'
+        self.derivatives_dir = project_folder / 'derivatives'
         self.pipeline = TextPreprocessingPipeline(self.config)
         self.task = configuration_settings['task_name']
         self.results_path = None
     def preprocess_all_documents(self):
         """Preprocess all documents"""
-        print('Preprocessing all documents...')
+        print(f'Preprocessing all documents of corpus {self.name}...')
         for document in self.documents:
             document.detect_sections()
             document.process_document(self.pipeline)
@@ -43,21 +47,15 @@ class Corpus:
         """Create separate aggregated results CSV files for each metric."""
         print("Creating aggregated results files per metric...")
-        try:
-            derivatives_path = os.path.dirname(os.path.dirname(self.documents[0].results_path))
-        except (AttributeError, IndexError):
-            print("Error: No valid results path found in documents")
-            return
         # Create aggregations folder
-        aggregation_path = os.path.join(derivatives_path, 'aggregations')
+        aggregation_path = os.path.join(self.derivatives_dir, 'aggregations')
         os.makedirs(aggregation_path, exist_ok=True)
         # Initialize results dictionary with metrics as keys
         results_by_metric = {}
         # Walk through all directories in derivatives
-        for root, dirs, files in os.walk(derivatives_path):
+        for root, dirs, files in os.walk(self.derivatives_dir):
             # Skip the aggregations directory itself
             if 'aggregations' in root:
                 continue
@@ -115,6 +113,7 @@ class Corpus:
         logits_options = self.config['options_logits']
         print('logits extraction in progress')
         model_name = logits_options['model_name']
         logitsExtractor = LogitsExtractor(logits_options,
                                           self.pipeline,
@@ -144,7 +143,7 @@ class Corpus:
                     #'logits' list of dictionaries; keys token, logprob_actual, logprob_max, entropy, most_likely_token
                     store_features_to_csv(logits,
-                                          self.derivative_dir,
+                                          self.derivatives_dir,
                                           self.documents[i],
                                           metric='logits')
@@ -154,9 +153,12 @@ class Corpus:
         embedding_options = self.config['options_embeddings']
         print('Embeddings extraction in progress...')
         embeddingsExtractor = EmbeddingsExtractor(embedding_options, self.project_folder)
+        debug_print(len(self.documents))
         for i in range(len(self.documents)):
+            debug_print(f'cleaned sections: {self.documents[i].cleaned_sections}')
             for key, section in self.documents[i].cleaned_sections.items():
-                print(f'Processing section {key}')
+                debug_print(f'Processing section {key}')
                 if self.config['discourse']:
                     section = TextDiarizer.parse_speaker(section, self.config['subject_speakertag'], embedding_options['keep_speakertags'])
@@ -175,7 +177,7 @@ class Corpus:
                         from pelican_nlp.extraction.semantic_similarity import calculate_semantic_similarity, \
                             get_semantic_similarity_windows
                         consecutive_similarities, mean_similarity = calculate_semantic_similarity(utterance)
-                        print(f'Mean semantic similarity: {mean_similarity:.4f}')
+                        debug_print(f'Mean semantic similarity: {mean_similarity:.4f}')
                         for window_size in self.config['options_semantic-similarity']['window_sizes']:
                             window_stats = get_semantic_similarity_windows(utterance, window_size)
@@ -187,7 +189,7 @@ class Corpus:
                                     'std_of_window_stds': window_stats[3],
                                     'mean_of_window_medians': window_stats[4]
                                 }
-                                print(f'Window {window_size} stats - mean: {window_stats[0]:.4f}, std: {window_stats[1]:.4f}, median: {window_stats[4]:.4f}')
+                                debug_print(f'Window {window_size} stats - mean: {window_stats[0]:.4f}, std: {window_stats[1]:.4f}, median: {window_stats[4]:.4f}')
                             else:
                                 window_data = {
                                     'mean': window_stats[0] if isinstance(window_stats, tuple) else window_stats,
@@ -195,16 +197,16 @@ class Corpus:
                                 }
                             store_features_to_csv(window_data,
-                                                  self.derivative_dir,
+                                                  self.derivatives_dir,
                                                   self.documents[i],
                                                   metric=f'semantic-similarity-window-{window_size}')
                     if self.config['options_embeddings']['distance-from-randomness']:
                         from pelican_nlp.extraction.distance_from_randomness import get_distance_from_randomness
                         divergence = get_distance_from_randomness(utterance, self.config["options_dis_from_randomness"])
-                        print(f'Divergence from optimality metrics: {divergence}')
+                        debug_print(f'Divergence from optimality metrics: {divergence}')
                         store_features_to_csv(divergence,
-                                              self.derivative_dir,
+                                              self.derivatives_dir,
                                               self.documents[i],
                                               metric='distance-from-randomness')
@@ -230,7 +232,7 @@ class Corpus:
                         cleaned_embeddings = utterance if isinstance(utterance, list) else [(k, v) for k, v in utterance.items()]
                     store_features_to_csv(cleaned_embeddings,
-                                          self.derivative_dir,
+                                          self.derivatives_dir,
                                           self.documents[i],
                                           metric='embeddings')
         return
@@ -241,11 +243,11 @@ class Corpus:
             results, recording_length = AudioFeatureExtraction.opensmile_extraction(self.documents[i].file, self.config['opensmile_configurations'])
             self.documents[i].recording_length = recording_length  # Store the recording length
             results['subject_ID'] = self.documents[i].subject_ID  # Set the subject ID
-            print('results obtained')
+            print('opensmile results obtained')
             store_features_to_csv(results,
-                                self.derivative_dir,
-                                self.documents[i],
-                                metric='opensmile-features')
+                                  self.derivatives_dir,
+                                  self.documents[i],
+                                  metric='opensmile-features')
     def extract_prosogram(self):
         from pelican_nlp.extraction.acoustic_feature_extraction import AudioFeatureExtraction
@@ -257,14 +259,8 @@ class Corpus:
         """Create CSV file with summarized document parameters based on config specifications."""
         print("Creating document information summary...")
-        try:
-            derivatives_path = os.path.dirname(os.path.dirname(self.documents[0].results_path))
-        except (AttributeError, IndexError):
-            print("Error: No valid results path found in documents")
-            return
         # Create document_information folder inside aggregations
-        doc_info_path = os.path.join(derivatives_path, 'aggregations', 'document_information')
+        doc_info_path = os.path.join(self.derivatives_dir, 'aggregations', 'document_information')
         os.makedirs(doc_info_path, exist_ok=True)
         # Define output file path
@@ -293,4 +289,4 @@ class Corpus:
         # Convert to DataFrame and save to CSV
         df = pd.DataFrame(document_info)
         df.to_csv(output_file, index=False)
-        print(f"Document information saved to: {output_file}")
+        debug_print(f"Document information saved to: {output_file}")

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/pelican_nlp/core/subject.py RENAMED Viewed

@@ -4,12 +4,12 @@ The Subject class stores all subject specific information and a list of correspo
 """
 class Subject:
-    def __init__(self, subjectID, description=None):
+    def __init__(self, name, description=None):
-        self.subjectID = subjectID
+        self.name = name
+        self.subjectID = None
         self.gender = None
         self.age = None
-        self.name = None
         self.description = description  # Description of the subject
         self.documents = []  # List of TextDocument instances
         self.numberOfSessions = None

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/pelican_nlp/extraction/acoustic_feature_extraction.py RENAMED Viewed

@@ -49,7 +49,7 @@ class AudioFeatureExtraction:
             profile (DataFrame): Prosogram analysis results
         """
         import parselmouth
-        from pelican.praat import PROSOGRAM_SCRIPT
+        from pelican_nlp.praat import PROSOGRAM_SCRIPT
         try:
             sound = parselmouth.Sound(file)
             # Common Prosogram parameters

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/pelican_nlp/extraction/extract_embeddings.py RENAMED Viewed

@@ -1,6 +1,8 @@
 from pelican_nlp.extraction.language_model import Model
 from pelican_nlp.preprocessing.text_tokenizer import TextTokenizer
+from pelican_nlp.config import debug_print
 class EmbeddingsExtractor:
     def __init__(self, embeddings_configurations, project_path):
         self.embeddings_configurations = embeddings_configurations
@@ -22,7 +24,7 @@ class EmbeddingsExtractor:
             # Tokenize the input text
             inputs = self.Tokenizer.tokenize_text(text)
-            print(f'inputs are: {inputs}')
+            debug_print(f'inputs are: {inputs}')
             if self.embeddings_configurations['pytorch_based_model']:
                 #e.g. RoBERTa Model or Llama Model

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/pelican_nlp/extraction/extract_logits.py RENAMED Viewed

@@ -2,6 +2,8 @@ import torch
 import torch.nn.functional as F
 from tqdm import tqdm
+from pelican_nlp.config import debug_print
 class LogitsExtractor:
     def __init__(self, options, pipeline, project_path):
@@ -13,9 +15,9 @@ class LogitsExtractor:
     def extract_features(self, section, tokenizer, model):
-        print(f'section to tokenize: {section}')
+        debug_print(f'section to tokenize: {section}')
         tokens = tokenizer.tokenize_text(section)
-        print(tokens)
+        debug_print(tokens)
         chunk_size = self.options['chunk_size']
         overlap_size = self.options['overlap_size']

pelican_nlp-0.3.0/pelican_nlp/extraction/language_model.py ADDED Viewed

@@ -0,0 +1,125 @@
+import torch
+import psutil
+import os
+import shutil
+from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model
+from transformers import AutoModelForCausalLM
+class Model:
+    def __init__(self, model_name, project_path):
+        self.model_name = model_name
+        self.model_instance = None
+        self.device_map = None
+        self.PROJECT_PATH = project_path
+    def load_model(self, empty_weights=False, trust_remote_code=False):
+        """Loads and configures the model"""
+        if self.model_name == 'fastText':
+            import fasttext
+            import fasttext.util
+            # Create a model directory if it doesn't exist
+            model_dir = os.path.join(os.path.expanduser('~'), '.fasttext')
+            os.makedirs(model_dir, exist_ok=True)
+            # Set the model path using proper OS path joining
+            model_path = os.path.join(model_dir, 'cc.de.300.bin')
+            # Download only if model doesn't exist or is invalid
+            need_download = True
+            if os.path.exists(model_path):
+                try:
+                    self.model_instance = fasttext.load_model(model_path)
+                    need_download = False
+                except ValueError:
+                    print(f"Existing model file is corrupted, re-downloading...")
+                    os.remove(model_path)
+            if need_download:
+                print("Downloading FastText model...")
+                try:
+                    # Try the built-in FastText downloader first
+                    fasttext.util.download_model('de', if_exists='ignore')
+                    # Find the downloaded file in current directory
+                    downloaded_file = 'cc.de.300.bin'
+                    if os.path.exists(downloaded_file):
+                        # Move the file to the correct location
+                        shutil.move(downloaded_file, model_path)
+                    else:
+                        raise FileNotFoundError("FastText downloader didn't create the expected file")
+                except (OSError, ValueError, FileNotFoundError) as e:
+                    print(f"FastText downloader failed, using direct download: {str(e)}")
+                    # Direct download fallback
+                    import urllib.request
+                    url = 'https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.de.300.bin.gz'
+                    print(f"Downloading from {url}...")
+                    temp_gz_path = model_path + '.gz'
+                    urllib.request.urlretrieve(url, temp_gz_path)
+                    # Decompress the file
+                    print("Decompressing model file...")
+                    import gzip
+                    with gzip.open(temp_gz_path, 'rb') as f_in:
+                        with open(model_path, 'wb') as f_out:
+                            f_out.write(f_in.read())
+                    os.remove(temp_gz_path)
+                    print("Model decompressed successfully")
+                # Verify the downloaded model
+                try:
+                    self.model_instance = fasttext.load_model(model_path)
+                except ValueError as e:
+                    raise ValueError(f"Failed to load downloaded model: {str(e)}. Please try removing {model_path} and running again.")
+            print(f'FastText model loaded successfully from {model_path}')
+        elif self.model_name == 'xlm-roberta-base':
+            from transformers import AutoModel
+            self.model_instance = AutoModel.from_pretrained(
+                self.model_name,
+                trust_remote_code=trust_remote_code,
+                use_safetensors=True
+            )
+            print('RoBERTa model loaded.')
+        elif self.model_name == 'DiscoResearch/Llama3-German-8B-32k':
+            if empty_weights:
+                with init_empty_weights():
+                    self.model_instance = AutoModelForCausalLM.from_pretrained(
+                        self.model_name,
+                        trust_remote_code=trust_remote_code,
+                        use_safetensors=True
+                    )
+            else:
+                self.model_instance = AutoModelForCausalLM.from_pretrained(
+                    self.model_name,
+                    trust_remote_code=trust_remote_code,
+                    use_safetensors=True
+                )
+            print(f'Llama3-German-8B-32k loaded')
+        else:
+            raise ValueError("Invalid model name.")
+        if self.model_name == 'xlm-roberta-base' or self.model_name == 'DiscoResearch/Llama3-German-8B-32k':
+            # Additional model setup
+            self.device_map_creation()
+            self.model_instance = dispatch_model(self.model_instance, device_map=self.device_map)
+            print('Model dispatched to appropriate devices.')
+    def device_map_creation(self):
+        #check if cuda is available
+        if not torch.cuda.is_available():
+            print('Careful: Cuda not available, using CPU. This can be slow. Consider running pipeline on different device')
+        else:
+            print(f'{torch.cuda.get_device_name(0)} available.')
+        available_VRAM = str(int(torch.cuda.get_device_properties(0).total_memory/(1024 ** 3))-3)+'GB'
+        available_RAM = str(int(psutil.virtual_memory().total/(1024 ** 3))-3)+'GB'
+        #create device map and offload directory if it doesn't exist
+        self.device_map = infer_auto_device_map(self.model_instance, max_memory={
+            0: available_VRAM,
+            'cpu': available_RAM,
+            'disk': '200GB'
+        })

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/pelican_nlp/main.py RENAMED Viewed

@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 """
-Pelican Project
-===============
+Pelican-nlp Project
+===================
-Pelican is a tool developed to enable consistent and reproducible language processing.
+Pelican-nlp is a tool developed to enable consistent and reproducible language processing.
 Main entry point for the Pelican project handling document processing and metric extraction.
 Author: Yves Pauli
@@ -23,6 +23,9 @@ import sys
 from pelican_nlp.core import Corpus
 from pelican_nlp.utils.setup_functions import subject_instantiator, load_config, remove_previous_derivative_dir
 from pelican_nlp.preprocessing import LPDS
+from pelican_nlp.utils.filename_parser import parse_lpds_filename
+from config import debug_print
 project_path = '/home/yvespauli/PycharmProjects/PyPI_testing_fluency/config_fluency.yml'
@@ -30,7 +33,8 @@ class Pelican:
     """Main class for the Pelican project handling document processing and metric extraction."""
-    def __init__(self, config_path: str = None, dev_mode: bool = True) -> None:
+    def __init__(self, config_path: str = None, dev_mode: bool = False) -> None:
         self.dev_mode = dev_mode
         # If no config path is provided, use the default config from package; used for dev-mode
@@ -83,23 +87,25 @@ class Pelican:
         subjects = subject_instantiator(self.config, self.project_path)
         # Process each corpus
-        for corpus_name in self.config['corpus_names']:
-            self._process_corpus(corpus_name, subjects)
-    def _process_corpus(self, corpus_name: str, subjects: List) -> None:
+        for corpus_value in self.config['corpus_values']:
+            self._process_corpus(self.config['corpus_key'], corpus_value, subjects)
+    def _process_corpus(self, corpus_key: str, corpus_value: str, subjects: List) -> None:
         """Process a single corpus including preprocessing and metric extraction."""
-        print(f'Processing corpus: {corpus_name}')
-        corpus_documents = self._identify_corpus_files(subjects, corpus_name)
-        corpus = Corpus(corpus_name, corpus_documents[corpus_name], self.config, self.project_path)
+        corpus_entity = corpus_key + '-' + corpus_value
+        print(f'Processing corpus: {corpus_entity}')
+        debug_print(subjects, corpus_entity)
+        corpus_documents = self._identify_corpus_files(subjects, corpus_entity)
+        debug_print(len(corpus_documents))
+        corpus = Corpus(corpus_entity, corpus_documents[corpus_entity], self.config, self.project_path)
-        for document in corpus_documents[corpus_name]:
-            document.corpus_name = corpus_name
+        for document in corpus_documents[corpus_entity]:
+            document.corpus_name = corpus_entity
         if self.config['input_file']=='text':
             corpus.preprocess_all_documents()
-            print(f'Corpus {corpus_name} is preprocessed')
+            print(f'Corpus {corpus_key} is preprocessed')
             self._extract_metrics(corpus)
@@ -140,18 +146,34 @@ class Pelican:
         self._clear_gpu_memory()
-    def _identify_corpus_files(self, subjects: List, corpus: str) -> Dict:
-        """Identify and group files belonging to a specific corpus."""
-        corpus_dict = {corpus: []}
-        for subject in subjects:
-            for document in subject.documents:
-                name = Path(document.name)
-                document.extension = name.suffix
-                # Split by both '_' and '.' to get all parts
-                parts = name.stem.replace('.', '_').split('_')
-                # Check if corpus name appears in any part
-                if corpus in parts:
-                    corpus_dict[corpus].append(document)
+    def _identify_corpus_files(self, subjects: List, entity: str) -> Dict:
+        """Identify and group files based on specified entity-value pair."""
+        debug_print(f'identifying corpus files')
+        corpus_dict = {entity: []}
+        debug_print(len(subjects))
+        # Check if entity is in key-value format
+        if '-' in entity:
+            key, value = entity.split('-', 1)
+            for subject in subjects:
+                debug_print(subject.documents)
+                for document in subject.documents:
+                    entities = parse_lpds_filename(document.name)
+                    debug_print(entities)
+                    if key in entities and str(entities[key]) == value:
+                        corpus_dict[entity].append(document)
+        else:
+            # Entity is just a value, check all keys
+            for subject in subjects:
+                debug_print(subject.documents)
+                for document in subject.documents:
+                    entities = parse_lpds_filename(document.name)
+                    debug_print(entities)
+                    # Convert all values to strings for comparison
+                    if any(str(val) == entity for val in entities.values()):
+                        corpus_dict[entity].append(document)
         return corpus_dict
     def _handle_output_directory(self) -> None:
@@ -207,4 +229,4 @@ class Pelican:
 if __name__ == '__main__':
-    Pelican(project_path).run()
+    Pelican(project_path, dev_mode=True).run()

{pelican_nlp-0.2.6 → pelican_nlp-0.3.0}/pelican_nlp/preprocessing/LPDS.py RENAMED Viewed

@@ -1,6 +1,8 @@
 import re
 import os
+from pelican_nlp.config import debug_print
 class LPDS:
     def __init__(self, project_folder, multiple_sessions):
         self.project_folder = project_folder
@@ -18,7 +20,7 @@ class LPDS:
         suggested_files = ["dataset_description.json", "README", "CHANGES", "participants.tsv"]
         for file in suggested_files:
             if not os.path.isfile(os.path.join(self.project_folder, file)):
-                print(f"Warning: Missing suggested file '{file}' in the project folder.")
+                debug_print(f"Warning: Missing suggested file '{file}' in the project folder.")
         # Check for the 'subjects' folder
         if not os.path.isdir(self.subjects_folder):
@@ -38,15 +40,16 @@ class LPDS:
             if self.multiple_sessions:
                 session_folders = [f for f in os.listdir(subject_path) if
                                    os.path.isdir(os.path.join(subject_path, f))]
-                if not session_folders:
+                if session_folders:
+                    if 'ses-01' not in session_folders:
+                        print(f"Warning: Ideally, the session folders should follow the naming convention 'ses-x'.")
+                else:
                     print(f"Warning: No session folders found in '{subject_folder}'.")
-                if 'ses-01' not in session_folders:
-                    print(f"Warning: Ideally, the session folders should follow the naming convention 'ses-x'.")
             # Check for optional subject_metadata file
             metadata_file = os.path.join(subject_path, "subject_metadata")
             if not os.path.isfile(metadata_file):
-                #print(f"Note: Optional 'subject_metadata' file is missing in '{subject_folder}'.")
+                debug_print(f"Note: Optional 'subject_metadata' file is missing in '{subject_folder}'.")
                 continue
             session_folders = subject_folder
@@ -68,7 +71,7 @@ class LPDS:
                         else:
                             pattern = fr"^{subject_folder}_{task_folder}.*"
                         if not re.match(pattern, file):
-                            print(f"Warning: File '{file}' in '{task_folder}' does not follow the LPDS naming conventions")
+                            debug_print(f"Warning: File '{file}' in '{task_folder}' does not follow the LPDS naming conventions")
     def derivative_dir_creator(self):
         # Create the 'derivatives' folder if it doesn't exist

{pelican_nlp-0.2.6/examples/PyPI_testing_fluency → pelican_nlp-0.3.0/pelican_nlp/sample_configuration_files}/config_fluency.yml RENAMED Viewed

@@ -5,7 +5,6 @@ fluency_task: &fluency_flag true
 #========================================
 #general configurations; always adapt
-PATH_TO_PROJECT_FOLDER: "/home/yvespauli/PycharmProjects/pelican_testing"
 language: "german"
 multiple_sessions: &session_flag false

pelican-nlp 0.2.6__tar.gz → 0.3.0__tar.gz

pelican-nlp 0.2.6tar.gz → 0.3.0tar.gz