nkululeko 0.94.2__py3-none-any.whl → 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/glob_conf.py CHANGED
@@ -1,5 +1,14 @@
1
1
  # glob_conf.py
2
2
 
3
+ # Initialize global variables
4
+ config = None
5
+ label_encoder = None
6
+ util = None
7
+ module = None
8
+ report = None
9
+ labels = None
10
+ target = None
11
+
3
12
 
4
13
  def init_config(config_obj):
5
14
  global config
nkululeko/modelrunner.py CHANGED
@@ -279,9 +279,18 @@ class Modelrunner:
279
279
  self.util.debug(
280
280
  f"balanced with: {balancing}, new size: {X_res.shape[0]} (was {orig_size})"
281
281
  )
282
- le = glob_conf.label_encoder
283
- res = y_res.value_counts()
284
- resd = {}
285
- for i, e in enumerate(le.inverse_transform(res.index.values)):
286
- resd[e] = res.values[i]
287
- self.util.debug(f"{resd})")
282
+ # Check if label encoder is available before using it
283
+ if (
284
+ hasattr(glob_conf, "label_encoder")
285
+ and glob_conf.label_encoder is not None
286
+ ):
287
+ le = glob_conf.label_encoder
288
+ res = y_res.value_counts()
289
+ resd = {}
290
+ for i, e in enumerate(le.inverse_transform(res.index.values)):
291
+ resd[e] = res.values[i]
292
+ self.util.debug(f"class distribution after balancing: {resd}")
293
+ else:
294
+ self.util.debug(
295
+ "Label encoder not available, skipping class distribution report"
296
+ )
@@ -84,6 +84,17 @@ class TunedModel(BaseModel):
84
84
  # print finetuning information via debug
85
85
  self.util.debug(f"Finetuning from model: {pretrained_model}")
86
86
 
87
+ if any(
88
+ emotion_model in pretrained_model
89
+ for emotion_model in ["emotion2vec", "iic/emotion2vec"]
90
+ ):
91
+ self._init_emotion2vec_model(pretrained_model)
92
+ return
93
+
94
+ self._init_huggingface_model(pretrained_model)
95
+
96
+ def _init_huggingface_model(self, pretrained_model):
97
+ """Initialize HuggingFace transformer model for finetuning."""
87
98
  # create dataset
88
99
  dataset = {}
89
100
  target_name = glob_conf.target
@@ -99,41 +110,9 @@ class TunedModel(BaseModel):
99
110
  df = y.reset_index()
100
111
  df.start = df.start.dt.total_seconds()
101
112
  df.end = df.end.dt.total_seconds()
102
- # ds = datasets.Dataset.from_pandas(df)
103
- # dataset[split] = ds
104
113
 
105
- # self.dataset = datasets.DatasetDict(dataset)
106
114
  if split == "train" and self.balancing:
107
- if self.balancing == "ros":
108
- from imblearn.over_sampling import RandomOverSampler
109
-
110
- sampler = RandomOverSampler(random_state=42)
111
- elif self.balancing == "smote":
112
- from imblearn.over_sampling import SMOTE
113
-
114
- sampler = SMOTE(random_state=42)
115
- elif self.balancing == "adasyn":
116
- from imblearn.over_sampling import ADASYN
117
-
118
- sampler = ADASYN(random_state=42)
119
- else:
120
- self.util.error(f"Unknown balancing algorithm: {self.balancing}")
121
-
122
- X_resampled, y_resampled = sampler.fit_resample(
123
- df[["start", "end"]], df["targets"]
124
- )
125
- df = pd.DataFrame(
126
- {
127
- "start": X_resampled["start"],
128
- "end": X_resampled["end"],
129
- "targets": y_resampled,
130
- }
131
- )
132
-
133
- # print the before and after class distribution
134
- self.util.debug(
135
- f"balanced with: {self.balancing}, new size: {len(df)}, was {len(data_sources[split])}"
136
- )
115
+ df = self._apply_balancing(df, data_sources[split])
137
116
 
138
117
  ds = datasets.Dataset.from_pandas(df)
139
118
  dataset[split] = ds
@@ -144,6 +123,13 @@ class TunedModel(BaseModel):
144
123
  if self.is_classifier:
145
124
  self.util.debug("Task is classification.")
146
125
  le = glob_conf.label_encoder
126
+ if le is None:
127
+ self.util.error(
128
+ "Label encoder is not available. Make sure to set up data loading properly."
129
+ )
130
+ raise ValueError(
131
+ "Label encoder is missing. Initialization cannot proceed. Ensure data loading is correctly configured."
132
+ )
147
133
  mapping = dict(zip(le.classes_, range(len(le.classes_))))
148
134
  target_mapping = {k: int(v) for k, v in mapping.items()}
149
135
  target_mapping_reverse = {
@@ -191,16 +177,136 @@ class TunedModel(BaseModel):
191
177
  feature_extractor=feature_extractor,
192
178
  tokenizer=tokenizer,
193
179
  )
194
- assert self.processor.feature_extractor.sampling_rate == self.sampling_rate
180
+ assert self.processor.feature_extractor.sampling_rate == self.sampling_rate # type: ignore
195
181
 
196
- self.model = Model.from_pretrained(
182
+ self.model = Model.from_pretrained( # type: ignore
197
183
  pretrained_model,
198
184
  config=self.config,
199
185
  )
200
- self.model.freeze_feature_extractor()
186
+ self.model.freeze_feature_extractor() # type: ignore
187
+ self.model.train() # type: ignore
188
+ self.model_initialized = True
189
+
190
+ def _init_emotion2vec_model(self, pretrained_model):
191
+ """Initialize emotion2vec model for finetuning."""
192
+ try:
193
+ from funasr import AutoModel
194
+ except ImportError:
195
+ self.util.error(
196
+ "FunASR is required for emotion2vec finetuning. "
197
+ "Please install with: pip install funasr"
198
+ )
199
+ return
200
+
201
+ model_mapping = {
202
+ "emotion2vec": "emotion2vec/emotion2vec_base",
203
+ "emotion2vec-base": "emotion2vec/emotion2vec_base",
204
+ "emotion2vec-seed": "emotion2vec/emotion2vec_plus_seed",
205
+ "emotion2vec-large": "emotion2vec/emotion2vec_plus_large",
206
+ }
207
+
208
+ if pretrained_model in model_mapping:
209
+ model_path = model_mapping[pretrained_model]
210
+ else:
211
+ model_path = pretrained_model
212
+
213
+ self._create_emotion2vec_dataset()
214
+
215
+ self.emotion2vec_backbone = AutoModel(
216
+ model=model_path,
217
+ hub="hf" # Use HuggingFace Hub instead of ModelScope
218
+ )
219
+
220
+ if self.is_classifier:
221
+ le = glob_conf.label_encoder
222
+ if le is None:
223
+ self.util.error("Label encoder not available for classification")
224
+ return
225
+ num_labels = len(le.classes_)
226
+ label_mapping = dict(zip(le.classes_, range(len(le.classes_))))
227
+ self.config = EmotionVecConfig(
228
+ num_labels=num_labels,
229
+ label2id=label_mapping,
230
+ id2label={v: k for k, v in label_mapping.items()},
231
+ is_classifier=True,
232
+ sampling_rate=self.sampling_rate,
233
+ final_dropout=self.drop,
234
+ model_name=pretrained_model,
235
+ )
236
+ else:
237
+ self.config = EmotionVecConfig(
238
+ num_labels=1,
239
+ is_classifier=False,
240
+ sampling_rate=self.sampling_rate,
241
+ final_dropout=self.drop,
242
+ model_name=pretrained_model,
243
+ )
244
+
245
+ self.model = Emotion2vecModel(self.emotion2vec_backbone, self.config)
201
246
  self.model.train()
202
247
  self.model_initialized = True
203
248
 
249
+ self.processor = None
250
+
251
+ def _create_emotion2vec_dataset(self):
252
+ """Create dataset for emotion2vec training."""
253
+ dataset = {}
254
+ target_name = glob_conf.target
255
+ data_sources = {
256
+ "train": pd.DataFrame(self.df_train[target_name]),
257
+ "dev": pd.DataFrame(self.df_test[target_name]),
258
+ }
259
+
260
+ for split in ["train", "dev"]:
261
+ df = data_sources[split]
262
+ y = df[target_name].astype("float")
263
+ y.name = "targets"
264
+ df = y.reset_index()
265
+ df.start = df.start.dt.total_seconds()
266
+ df.end = df.end.dt.total_seconds()
267
+
268
+ if split == "train" and self.balancing:
269
+ df = self._apply_balancing(df, data_sources[split])
270
+
271
+ ds = datasets.Dataset.from_pandas(df)
272
+ dataset[split] = ds
273
+
274
+ self.dataset = datasets.DatasetDict(dataset)
275
+
276
+ def _apply_balancing(self, df, original_df):
277
+ """Apply data balancing to training dataset."""
278
+ if self.balancing == "ros":
279
+ from imblearn.over_sampling import RandomOverSampler
280
+
281
+ sampler = RandomOverSampler(random_state=42)
282
+ elif self.balancing == "smote":
283
+ from imblearn.over_sampling import SMOTE
284
+
285
+ sampler = SMOTE(random_state=42)
286
+ elif self.balancing == "adasyn":
287
+ from imblearn.over_sampling import ADASYN
288
+
289
+ sampler = ADASYN(random_state=42)
290
+ else:
291
+ self.util.error(f"Unknown balancing algorithm: {self.balancing}")
292
+ return df
293
+
294
+ X_resampled, y_resampled = sampler.fit_resample(
295
+ df[["start", "end"]], df["targets"]
296
+ )
297
+ df = pd.DataFrame(
298
+ {
299
+ "start": X_resampled["start"],
300
+ "end": X_resampled["end"],
301
+ "targets": y_resampled,
302
+ }
303
+ )
304
+
305
+ self.util.debug(
306
+ f"balanced with: {self.balancing}, new size: {len(df)}, was {len(original_df)}"
307
+ )
308
+ return df
309
+
204
310
  def set_model_type(self, type):
205
311
  self.model_type = type
206
312
 
@@ -224,11 +330,7 @@ class TunedModel(BaseModel):
224
330
  targets = [d["targets"] for d in data]
225
331
 
226
332
  signals = []
227
- for file, start, end in zip(
228
- files,
229
- starts,
230
- ends,
231
- ):
333
+ for file, start, end in zip(files, starts, ends):
232
334
  offset = start
233
335
  duration = end - offset
234
336
  if self.max_duration_sec is not None:
@@ -240,18 +342,37 @@ class TunedModel(BaseModel):
240
342
  )
241
343
  signals.append(signal.squeeze())
242
344
 
243
- input_values = self.processor(
244
- signals,
245
- sampling_rate=self.sampling_rate,
246
- padding=True,
247
- )
248
- batch = self.processor.pad(
249
- input_values,
250
- padding=True,
251
- return_tensors="pt",
252
- )
253
-
254
- batch["labels"] = torch.Tensor(targets)
345
+ if hasattr(self, "emotion2vec_backbone"):
346
+ max_length = max(len(s) for s in signals)
347
+ padded_signals = []
348
+ for s in signals:
349
+ if len(s) < max_length:
350
+ padded = np.pad(s, (0, max_length - len(s)), mode="constant")
351
+ else:
352
+ padded = s[:max_length]
353
+ padded_signals.append(padded)
354
+
355
+ batch = {
356
+ "input_values": torch.stack(
357
+ [torch.tensor(s, dtype=torch.float32) for s in padded_signals]
358
+ ),
359
+ "labels": torch.tensor(
360
+ targets,
361
+ dtype=torch.float32 if not self.is_classifier else torch.long,
362
+ ),
363
+ }
364
+ else:
365
+ input_values = self.processor(
366
+ signals,
367
+ sampling_rate=self.sampling_rate,
368
+ padding=True,
369
+ )
370
+ batch = self.processor.pad(
371
+ input_values,
372
+ padding=True,
373
+ return_tensors="pt",
374
+ )
375
+ batch["labels"] = torch.Tensor(targets)
255
376
 
256
377
  return batch
257
378
 
@@ -270,7 +391,22 @@ class TunedModel(BaseModel):
270
391
  # truth = p.label_ids[:, 0].astype(int)
271
392
  truth = p.label_ids
272
393
  preds = p.predictions
273
- preds = np.argmax(preds, axis=1)
394
+
395
+ if isinstance(preds, tuple):
396
+ if len(preds) > 0:
397
+ preds = preds[0] # Extract logits from tuple
398
+ else:
399
+ raise ValueError(f"Empty predictions tuple received: {preds}")
400
+
401
+ if hasattr(preds, 'numpy'):
402
+ preds = preds.numpy()
403
+ elif hasattr(preds, 'detach'):
404
+ preds = preds.detach().numpy()
405
+
406
+ if len(preds.shape) > 1 and preds.shape[1] > 1:
407
+ preds = np.argmax(preds, axis=1)
408
+ else:
409
+ preds = preds.flatten()
274
410
  scores = {}
275
411
  if self.is_classifier:
276
412
  for name, metric in metrics.items():
@@ -329,12 +465,16 @@ class TunedModel(BaseModel):
329
465
  model,
330
466
  inputs,
331
467
  return_outputs=False,
468
+ num_items_in_batch=None,
332
469
  ):
333
470
  targets = inputs.pop("labels").squeeze()
334
471
  targets = targets.type(torch.long)
335
472
 
336
473
  outputs = model(**inputs)
337
- logits = outputs[0].squeeze()
474
+ if hasattr(outputs, 'logits'):
475
+ logits = outputs.logits.squeeze()
476
+ else:
477
+ logits = outputs[0].squeeze()
338
478
 
339
479
  loss = criterion(logits, targets)
340
480
 
@@ -365,7 +505,7 @@ class TunedModel(BaseModel):
365
505
  per_device_train_batch_size=self.batch_size,
366
506
  per_device_eval_batch_size=self.batch_size,
367
507
  gradient_accumulation_steps=self.accumulation_steps,
368
- evaluation_strategy="steps",
508
+ eval_strategy="steps",
369
509
  num_train_epochs=self.epoch_num,
370
510
  fp16=self.device != "cpu",
371
511
  use_cpu=self.device == "cpu",
@@ -385,16 +525,20 @@ class TunedModel(BaseModel):
385
525
  overwrite_output_dir=True,
386
526
  )
387
527
 
388
- trainer = Trainer(
389
- model=self.model,
390
- data_collator=self.data_collator,
391
- args=training_args,
392
- compute_metrics=self.compute_metrics,
393
- train_dataset=self.dataset["train"],
394
- eval_dataset=self.dataset["dev"],
395
- tokenizer=self.processor.feature_extractor,
396
- callbacks=[transformers.integrations.TensorBoardCallback()],
397
- )
528
+ trainer_kwargs = {
529
+ "model": self.model,
530
+ "data_collator": self.data_collator,
531
+ "args": training_args,
532
+ "compute_metrics": self.compute_metrics,
533
+ "train_dataset": self.dataset["train"],
534
+ "eval_dataset": self.dataset["dev"],
535
+ "callbacks": [transformers.integrations.TensorBoardCallback()],
536
+ }
537
+
538
+ if self.processor is not None:
539
+ trainer_kwargs["tokenizer"] = self.processor.feature_extractor
540
+
541
+ trainer = Trainer(**trainer_kwargs)
398
542
 
399
543
  trainer.train()
400
544
  trainer.save_model(self.torch_root)
@@ -421,7 +565,7 @@ class TunedModel(BaseModel):
421
565
  file, duration=end - start, offset=start, always_2d=True
422
566
  )
423
567
  assert sr == self.sampling_rate
424
- prediction = self.model.predict(signal)
568
+ prediction = self.model.predict(signal) # type: ignore
425
569
  results.append(prediction)
426
570
  # results.append(predictions.argmax())
427
571
  predictions = np.asarray(results)
@@ -483,13 +627,13 @@ class TunedModel(BaseModel):
483
627
  prediction = {}
484
628
  if self.is_classifier:
485
629
  # get the class probabilities
486
- predictions = self.model.predict(signal)
630
+ predictions = self.model.predict(signal) # type: ignore
487
631
  # pred = self.clf.predict(features)
488
632
  for i in range(len(self.labels)):
489
633
  cat = self.labels[i]
490
634
  prediction[cat] = predictions[i]
491
635
  else:
492
- predictions = self.model.predict(signal)
636
+ predictions = self.model.predict(signal) # type: ignore
493
637
  prediction = predictions
494
638
  return prediction
495
639
 
@@ -498,10 +642,16 @@ class TunedModel(BaseModel):
498
642
 
499
643
  def load(self, run, epoch):
500
644
  self.set_id(run, epoch)
501
- self.model = Model.from_pretrained(
502
- self.torch_root,
503
- config=self.config,
504
- )
645
+ if hasattr(self, "emotion2vec_backbone"):
646
+ model_path = os.path.join(self.torch_root, "pytorch_model.bin")
647
+ if os.path.exists(model_path):
648
+ self.model.load_state_dict(torch.load(model_path))
649
+ self.model.eval()
650
+ else:
651
+ self.model = Model.from_pretrained(
652
+ self.torch_root,
653
+ config=self.config,
654
+ )
505
655
  # print(f"loaded model type {type(self.model)}")
506
656
 
507
657
  def load_path(self, path, run, epoch):
@@ -511,20 +661,70 @@ class TunedModel(BaseModel):
511
661
 
512
662
 
513
663
  @dataclasses.dataclass
514
- class ModelOutput(transformers.file_utils.ModelOutput):
515
- logits: torch.FloatTensor = None
516
- hidden_states: typing.Tuple[torch.FloatTensor] = None
517
- cnn_features: torch.FloatTensor = None
664
+ class ModelOutput:
665
+ logits: typing.Optional[torch.Tensor] = None
666
+ hidden_states: typing.Optional[torch.Tensor] = None
667
+ cnn_features: typing.Optional[torch.Tensor] = None
668
+
669
+ def __getitem__(self, index):
670
+ """Make ModelOutput subscriptable for HuggingFace compatibility."""
671
+ if isinstance(index, slice):
672
+ items = [self.logits, self.hidden_states, self.cnn_features]
673
+ result = items[index]
674
+ filtered_result = [item for item in result if item is not None]
675
+
676
+ if not filtered_result and self.logits is not None:
677
+ return (self.logits,)
678
+
679
+ return tuple(filtered_result)
680
+ elif index == 0:
681
+ return self.logits
682
+ elif index == 1:
683
+ return self.hidden_states
684
+ elif index == 2:
685
+ return self.cnn_features
686
+ else:
687
+ raise IndexError(f"Index {index} out of range for ModelOutput")
688
+
689
+ def __len__(self):
690
+ """Return the number of available outputs."""
691
+ return 3
518
692
 
519
693
 
520
694
  @dataclasses.dataclass
521
- class ModelOutputReg(transformers.file_utils.ModelOutput):
522
- logits: torch.FloatTensor
523
- hidden_states: typing.Tuple[torch.FloatTensor] = None
524
- attentions: typing.Tuple[torch.FloatTensor] = None
525
- logits_framewise: torch.FloatTensor = None
526
- hidden_states_framewise: torch.FloatTensor = None
527
- cnn_features: torch.FloatTensor = None
695
+ class ModelOutputReg:
696
+ logits: torch.Tensor
697
+ hidden_states: typing.Optional[torch.Tensor] = None
698
+ attentions: typing.Optional[torch.Tensor] = None
699
+ logits_framewise: typing.Optional[torch.Tensor] = None
700
+ hidden_states_framewise: typing.Optional[torch.Tensor] = None
701
+ cnn_features: typing.Optional[torch.Tensor] = None
702
+
703
+ def __getitem__(self, index):
704
+ """Make ModelOutputReg subscriptable for HuggingFace compatibility."""
705
+ if isinstance(index, slice):
706
+ items = [self.logits, self.hidden_states, self.attentions,
707
+ self.logits_framewise, self.hidden_states_framewise, self.cnn_features]
708
+ result = items[index]
709
+ return tuple(item for item in result if item is not None)
710
+ elif index == 0:
711
+ return self.logits
712
+ elif index == 1:
713
+ return self.hidden_states
714
+ elif index == 2:
715
+ return self.attentions
716
+ elif index == 3:
717
+ return self.logits_framewise
718
+ elif index == 4:
719
+ return self.hidden_states_framewise
720
+ elif index == 5:
721
+ return self.cnn_features
722
+ else:
723
+ raise IndexError(f"Index {index} out of range for ModelOutputReg")
724
+
725
+ def __len__(self):
726
+ """Return the number of available outputs."""
727
+ return 6
528
728
 
529
729
 
530
730
  class ModelHead(torch.nn.Module):
@@ -637,6 +837,138 @@ class Model(Wav2Vec2PreTrainedModel):
637
837
  return result
638
838
 
639
839
 
840
+ class EmotionVecConfig:
841
+ """Configuration class for emotion2vec models."""
842
+
843
+ def __init__(
844
+ self,
845
+ num_labels,
846
+ is_classifier=True,
847
+ sampling_rate=16000,
848
+ final_dropout=0.1,
849
+ model_name=None,
850
+ **kwargs,
851
+ ):
852
+ self.num_labels = num_labels
853
+ self.is_classifier = is_classifier
854
+ self.sampling_rate = sampling_rate
855
+ self.final_dropout = final_dropout
856
+ self.model_name = model_name
857
+ for key, value in kwargs.items():
858
+ setattr(self, key, value)
859
+
860
+ def to_json_string(self):
861
+ """Convert config to JSON string for HuggingFace compatibility."""
862
+ import json
863
+ config_dict = {
864
+ "num_labels": self.num_labels,
865
+ "is_classifier": self.is_classifier,
866
+ "sampling_rate": self.sampling_rate,
867
+ "final_dropout": self.final_dropout,
868
+ }
869
+ for key, value in self.__dict__.items():
870
+ if key not in config_dict:
871
+ config_dict[key] = value
872
+ return json.dumps(config_dict, indent=2)
873
+
874
+
875
+ class Emotion2vecModel(torch.nn.Module):
876
+ """Wrapper class for emotion2vec finetuning."""
877
+
878
+ def __init__(self, emotion2vec_backbone, config):
879
+ super().__init__()
880
+ self.emotion2vec_backbone = emotion2vec_backbone
881
+ self.config = config
882
+ self.is_classifier = config.is_classifier
883
+
884
+ # Determine embedding dimension based on model variant (hardcoded)
885
+ embedding_dim = self._get_embedding_dim_by_model()
886
+ self.head = torch.nn.Sequential(
887
+ torch.nn.Dropout(config.final_dropout),
888
+ torch.nn.Linear(embedding_dim, config.num_labels),
889
+ )
890
+
891
+ def _get_embedding_dim_by_model(self):
892
+ """Get embedding dimension based on model variant."""
893
+ model_name = getattr(self.config, 'model_name', '')
894
+
895
+ # Large models have 1024 dimensions
896
+ if 'large' in model_name.lower():
897
+ return 1024
898
+ # Base, seed, and other models have 768 dimensions
899
+ else:
900
+ return 768
901
+
902
+ def forward(self, input_values, labels=None, **kwargs):
903
+ embeddings = self._extract_embeddings(input_values)
904
+
905
+ logits = self.head(embeddings)
906
+
907
+ if not self.training and self.is_classifier:
908
+ logits = torch.softmax(logits, dim=1)
909
+
910
+ if self.is_classifier:
911
+ return ModelOutput(logits=logits)
912
+ else:
913
+ return ModelOutputReg(logits=logits)
914
+
915
+ def _extract_embeddings(self, input_values):
916
+ batch_embeddings = []
917
+ device = next(self.parameters()).device # Get the device of the model
918
+ for audio_tensor in input_values:
919
+ embedding = self._process_single_audio(audio_tensor)
920
+ # Ensure embedding is on the same device as the model
921
+ embedding = embedding.to(device)
922
+ batch_embeddings.append(embedding)
923
+ return torch.stack(batch_embeddings)
924
+
925
+ def _process_single_audio(self, audio_tensor):
926
+ import tempfile
927
+ import soundfile as sf
928
+
929
+ signal_np = audio_tensor.squeeze().cpu().numpy()
930
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
931
+ sf.write(tmp_file.name, signal_np, self.config.sampling_rate)
932
+
933
+ try:
934
+ res = self.emotion2vec_backbone.generate(
935
+ tmp_file.name, granularity="utterance", extract_embedding=True
936
+ )
937
+
938
+ if isinstance(res, list) and len(res) > 0:
939
+ embeddings = res[0].get("feats", None)
940
+ if embeddings is not None:
941
+ if isinstance(embeddings, list):
942
+ embeddings = np.array(embeddings)
943
+ return torch.tensor(embeddings.flatten(), dtype=torch.float32)
944
+
945
+ # Fallback based on model type
946
+ model_name = getattr(self.config, 'model_name', '')
947
+ if 'large' in model_name.lower():
948
+ return torch.zeros(1024, dtype=torch.float32)
949
+ else:
950
+ return torch.zeros(768, dtype=torch.float32)
951
+ finally:
952
+ os.unlink(tmp_file.name)
953
+
954
+ def predict(self, signal):
955
+ """Predict method for compatibility with nkululeko prediction pipeline."""
956
+ if isinstance(signal, np.ndarray):
957
+ signal_tensor = torch.from_numpy(signal).unsqueeze(0)
958
+ else:
959
+ signal_tensor = signal.unsqueeze(0) if signal.dim() == 1 else signal
960
+
961
+ with torch.no_grad():
962
+ result = self(signal_tensor)
963
+
964
+ if self.is_classifier:
965
+ logits = result.logits
966
+ else:
967
+ logits = result.logits
968
+
969
+ return logits.detach().cpu().numpy()[0]
970
+
971
+
640
972
  class ConcordanceCorCoeff(torch.nn.Module):
641
973
  def __init__(self):
642
974
  super().__init__()