PyPI - celldetective - Versions diffs - 1.0.2.post1__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

celldetective 1.0.2.post1py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

celldetective/__main__.py +7 -21
celldetective/events.py +2 -44
celldetective/extra_properties.py +62 -52
celldetective/filters.py +4 -5
celldetective/gui/__init__.py +1 -1
celldetective/gui/analyze_block.py +37 -10
celldetective/gui/btrack_options.py +24 -23
celldetective/gui/classifier_widget.py +62 -19
celldetective/gui/configure_new_exp.py +32 -35
celldetective/gui/control_panel.py +120 -81
celldetective/gui/gui_utils.py +674 -396
celldetective/gui/json_readers.py +7 -6
celldetective/gui/layouts.py +756 -0
celldetective/gui/measurement_options.py +98 -513
celldetective/gui/neighborhood_options.py +322 -270
celldetective/gui/plot_measurements.py +1114 -0
celldetective/gui/plot_signals_ui.py +21 -20
celldetective/gui/process_block.py +449 -169
celldetective/gui/retrain_segmentation_model_options.py +27 -26
celldetective/gui/retrain_signal_model_options.py +25 -24
celldetective/gui/seg_model_loader.py +31 -27
celldetective/gui/signal_annotator.py +2326 -2295
celldetective/gui/signal_annotator_options.py +18 -16
celldetective/gui/styles.py +16 -1
celldetective/gui/survival_ui.py +67 -39
celldetective/gui/tableUI.py +337 -48
celldetective/gui/thresholds_gui.py +75 -71
celldetective/gui/viewers.py +743 -0
celldetective/io.py +247 -27
celldetective/measure.py +43 -263
celldetective/models/segmentation_effectors/primNK_cfse/config_input.json +29 -0
celldetective/models/segmentation_effectors/primNK_cfse/cp-cfse-transfer +0 -0
celldetective/models/segmentation_effectors/primNK_cfse/training_instructions.json +37 -0
celldetective/neighborhood.py +498 -27
celldetective/preprocessing.py +1023 -0
celldetective/scripts/analyze_signals.py +7 -0
celldetective/scripts/measure_cells.py +12 -0
celldetective/scripts/segment_cells.py +20 -4
celldetective/scripts/track_cells.py +11 -0
celldetective/scripts/train_segmentation_model.py +35 -34
celldetective/segmentation.py +14 -9
celldetective/signals.py +234 -329
celldetective/tracking.py +2 -2
celldetective/utils.py +602 -49
celldetective-1.1.1.dist-info/METADATA +305 -0
celldetective-1.1.1.dist-info/RECORD +84 -0
{celldetective-1.0.2.post1.dist-info → celldetective-1.1.1.dist-info}/top_level.txt +1 -0
tests/__init__.py +0 -0
tests/test_events.py +28 -0
tests/test_filters.py +24 -0
tests/test_io.py +70 -0
tests/test_measure.py +141 -0
tests/test_neighborhood.py +70 -0
tests/test_preprocessing.py +37 -0
tests/test_segmentation.py +93 -0
tests/test_signals.py +135 -0
tests/test_tracking.py +164 -0
tests/test_utils.py +118 -0
celldetective-1.0.2.post1.dist-info/METADATA +0 -221
celldetective-1.0.2.post1.dist-info/RECORD +0 -66
{celldetective-1.0.2.post1.dist-info → celldetective-1.1.1.dist-info}/LICENSE +0 -0
{celldetective-1.0.2.post1.dist-info → celldetective-1.1.1.dist-info}/WHEEL +0 -0
{celldetective-1.0.2.post1.dist-info → celldetective-1.1.1.dist-info}/entry_points.txt +0 -0

celldetective/signals.py CHANGED Viewed

@@ -6,7 +6,7 @@ import json
 from tensorflow.keras.optimizers import Adam
 from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau, CSVLogger
 from tensorflow.keras.losses import CategoricalCrossentropy, MeanSquaredError, MeanAbsoluteError
-from tensorflow.keras.metrics import Precision, Recall
+from tensorflow.keras.metrics import Precision, Recall, MeanIoU
 from tensorflow.keras.models import load_model,clone_model
 from tensorflow.config.experimental import list_physical_devices, set_memory_growth
 from tensorflow.keras.utils import to_categorical, plot_model
@@ -92,6 +92,7 @@ class TimeHistory(Callback):
 def analyze_signals(trajectories, model, interpolate_na=True,
 					selected_signals=None,
+					model_path=None,
 					column_labels = {'track': "TRACK_ID", 'time': 'FRAME', 'x': 'POSITION_X', 'y': 'POSITION_Y'},
 					plot_outcome=False, output_dir=None):
@@ -139,7 +140,7 @@ def analyze_signals(trajectories, model, interpolate_na=True,
 	"""
-	model_path = locate_signal_model(model)
+	model_path = locate_signal_model(model, path=model_path)
 	complete_path = model_path #+model
 	complete_path = rf"{complete_path}"
 	model_config_path = os.sep.join([complete_path,'config_input.json'])
@@ -196,6 +197,7 @@ def analyze_signals(trajectories, model, interpolate_na=True,
 		for j,col in enumerate(selected_signals):
 			signal = group[col].to_numpy()
 			signals[i,frames,j] = signal
+			signals[i,max(frames):,j] = signal[-1]
 	# for i in range(5):
 	# 	print('pre model')
@@ -422,6 +424,7 @@ class SignalDetectionModel(object):
 		self.dense_collection = dense_collection
 		self.dropout_rate = dropout_rate
 		self.label = label
+		self.show_plots = True
 		if self.pretrained is not None:
@@ -430,6 +433,7 @@ class SignalDetectionModel(object):
 		else:
 			print("Create models from scratch...")
 			self.create_models_from_scratch()
+			print("Models successfully created.")
 	def load_pretrained_model(self):
@@ -545,10 +549,10 @@ class SignalDetectionModel(object):
 		except:
 			pass
-	def fit_from_directory(self, ds_folders, normalize=True, normalization_percentile=None, normalization_values = None,
+	def fit_from_directory(self, datasets, normalize=True, normalization_percentile=None, normalization_values = None,
 						  normalization_clip = None, channel_option=["live_nuclei_channel"], model_name=None, target_directory=None,
 						  augment=True, augmentation_factor=2, validation_split=0.20, test_split=0.0, batch_size = 64, epochs=300,
-						  recompile_pretrained=False, learning_rate=0.01, loss_reg="mse", loss_class = CategoricalCrossentropy(from_logits=False)):
+						  recompile_pretrained=False, learning_rate=0.01, loss_reg="mse", loss_class = CategoricalCrossentropy(from_logits=False), show_plots=True):
 		"""
 		Trains the model using data from specified directories.
@@ -600,19 +604,17 @@ class SignalDetectionModel(object):
 		- The method automatically splits the dataset into training, validation, and test sets according to the specified splits.
 		"""
 		if not hasattr(self, 'normalization_percentile'):
 			self.normalization_percentile = normalization_percentile
 		if not hasattr(self, 'normalization_values'):
 			self.normalization_values = normalization_values
 		if not hasattr(self, 'normalization_clip'):
 			self.normalization_clip = normalization_clip
-		print('Actual clip option:', self.normalization_clip)
 		self.normalize = normalize
 		self.normalization_percentile, self. normalization_values, self.normalization_clip =  _interpret_normalization_parameters(self.n_channels, self.normalization_percentile, self.normalization_values, self.normalization_clip)
-		self.ds_folders = [rf'{d}' for d in ds_folders]
+		self.datasets = [rf'{d}' if isinstance(d,str) else d for d in datasets]
 		self.batch_size = batch_size
 		self.epochs = epochs
 		self.validation_split = validation_split
@@ -626,29 +628,24 @@ class SignalDetectionModel(object):
 		self.learning_rate = learning_rate
 		self.loss_reg = loss_reg
 		self.loss_class = loss_class
-		if not os.path.exists(self.model_folder):
-			#shutil.rmtree(self.model_folder)
-			os.mkdir(self.model_folder)
+		self.show_plots = show_plots
 		self.channel_option = channel_option
 		assert self.n_channels==len(self.channel_option), f'Mismatch between the channel option and the number of channels of the model...'
-		self.list_of_sets = []
-		print(self.ds_folders)
-		for f in self.ds_folders:
-			self.list_of_sets.extend(glob(os.sep.join([f,"*.npy"])))
-		print(f"Found {len(self.list_of_sets)} annotation files...")
-		self.generate_sets()
+		if isinstance(self.datasets[0], dict):
+			self.datasets = [self.datasets]
-		self.train_classifier()
-		self.train_regressor()
+		self.list_of_sets = []
+		for ds in self.datasets:
+			if isinstance(ds,str):
+				self.list_of_sets.extend(glob(os.sep.join([ds,"*.npy"])))
+			else:
+				self.list_of_sets.append(ds)
+		print(f"Found {len(self.list_of_sets)} datasets...")
-		config_input = {"channels": self.channel_option, "model_signal_length": self.model_signal_length, 'label': self.label, 'normalize': self.normalize, 'normalization_percentile': self.normalization_percentile, 'normalization_values': self.normalization_values, 'normalization_clip': self.normalization_clip}
-		json_string = json.dumps(config_input)
-		with open(os.sep.join([self.model_folder,"config_input.json"]), 'w') as outfile:
-			outfile.write(json_string)
+		self.prepare_sets()
+		self.train_generic()
 	def fit(self, x_train, y_time_train, y_class_train, normalize=True, normalization_percentile=None, normalization_values = None, normalization_clip = None, pad=True, validation_data=None, test_data=None, channel_option=["live_nuclei_channel","dead_nuclei_channel"], model_name=None,
 			target_directory=None, augment=True, augmentation_factor=3, validation_split=0.25, batch_size = 64, epochs=300,
@@ -691,7 +688,7 @@ class SignalDetectionModel(object):
 		# If y-class is not one-hot encoded, encode it
 		if self.y_class_train.shape[-1] != self.n_classes:
-			self.class_weights = compute_weights(self.y_class_train)
+			self.class_weights = compute_weights(y=self.y_class_train,class_weight="balanced", classes=np.unique(self.y_class_train))
 			self.y_class_train = to_categorical(self.y_class_train)
 		if self.normalize:
@@ -753,13 +750,21 @@ class SignalDetectionModel(object):
 		self.loss_reg = loss_reg
 		self.loss_class = loss_class
-		if os.path.exists(self.model_folder):
-			shutil.rmtree(self.model_folder)
-		os.mkdir(self.model_folder)
+		self.train_generic()
+	def train_generic(self):
+		if not os.path.exists(self.model_folder):
+			os.mkdir(self.model_folder)
 		self.train_classifier()
 		self.train_regressor()
+		config_input = {"channels": self.channel_option, "model_signal_length": self.model_signal_length, 'label': self.label, 'normalize': self.normalize, 'normalization_percentile': self.normalization_percentile, 'normalization_values': self.normalization_values, 'normalization_clip': self.normalization_clip}
+		json_string = json.dumps(config_input)
+		with open(os.sep.join([self.model_folder,"config_input.json"]), 'w') as outfile:
+			outfile.write(json_string)
 	def predict_class(self, x, normalize=True, pad=True, return_one_hot=False, interpolate=True):
 		"""
@@ -936,21 +941,21 @@ class SignalDetectionModel(object):
 				self.model_class.set_weights(clone_model(self.model_class).get_weights())
 				self.model_class.compile(optimizer=Adam(learning_rate=self.learning_rate),
 							  loss=self.loss_class,
-							  metrics=['accuracy', Precision(), Recall()])
+							  metrics=['accuracy', Precision(), Recall(), MeanIoU(num_classes=self.n_classes, name='iou', dtype=float, sparse_y_true=False, sparse_y_pred=False)])
 			else:
 				self.initial_model = clone_model(self.model_class)
 				self.model_class.set_weights(self.initial_model.get_weights())
 				# Recompile to avoid crash
 				self.model_class.compile(optimizer=Adam(learning_rate=self.learning_rate),
 							  loss=self.loss_class,
-							  metrics=['accuracy', Precision(), Recall()])
+							  metrics=['accuracy', Precision(), Recall(),MeanIoU(num_classes=self.n_classes, name='iou', dtype=float, sparse_y_true=False, sparse_y_pred=False)])
 				# Reset weights
 				self.model_class.set_weights(self.initial_model.get_weights())
 		else:
 			print("Compiling the classifier...")
 			self.model_class.compile(optimizer=Adam(learning_rate=self.learning_rate),
 						  loss=self.loss_class,
-						  metrics=['accuracy', Precision(), Recall()])
+						  metrics=['accuracy', Precision(), Recall(),MeanIoU(num_classes=self.n_classes, name='iou', dtype=float, sparse_y_true=False, sparse_y_pred=False)])
 		self.gather_callbacks("classifier")
@@ -979,7 +984,8 @@ class SignalDetectionModel(object):
 								validation_split = self.validation_split,
 								verbose=1)
-		self.plot_model_history(mode="classifier")
+		if self.show_plots:
+			self.plot_model_history(mode="classifier")
 		# Set current classification model as the best model
 		self.model_class = load_model(os.sep.join([self.model_folder,"classifier.h5"]))
@@ -1008,10 +1014,12 @@ class SignalDetectionModel(object):
 			results = confusion_matrix(ground_truth,predictions)
 			self.dico.update({"test_IoU": IoU_score, "test_balanced_accuracy": balanced_accuracy, "test_confusion": results, 'test_precision': precision, 'test_recall': recall})
-			try:
-				plot_confusion_matrix(results, ["dead","alive","miscellaneous"], output_dir=self.model_folder+os.sep, title=title)
-			except:
-				pass
+			if self.show_plots:
+				try:
+					plot_confusion_matrix(results, ["dead","alive","miscellaneous"], output_dir=self.model_folder+os.sep, title=title)
+				except Exception as e:
+					print(e)
+					pass
 			print("Test set: ",classification_report(ground_truth,predictions))
 		if hasattr(self, 'x_val'):
@@ -1035,10 +1043,11 @@ class SignalDetectionModel(object):
 			results = confusion_matrix(ground_truth,predictions)
 			self.dico.update({"val_IoU": IoU_score, "val_balanced_accuracy": balanced_accuracy, "val_confusion": results, 'val_precision': precision, 'val_recall': recall})
-			try:
-				plot_confusion_matrix(results, ["dead","alive","miscellaneous"], output_dir=self.model_folder+os.sep, title=title)
-			except:
-				pass
+			if self.show_plots:
+				try:
+					plot_confusion_matrix(results, ["dead","alive","miscellaneous"], output_dir=self.model_folder+os.sep, title=title)
+				except:
+					pass
 			print("Validation set: ",classification_report(ground_truth,predictions))
@@ -1110,7 +1119,8 @@ class SignalDetectionModel(object):
 								validation_split = self.validation_split,
 								verbose=1)
-		self.plot_model_history(mode="regressor")
+		if self.show_plots:
+			self.plot_model_history(mode="regressor")
 		self.dico.update({"history_regressor": self.history_regressor, "execution_time_regressor": self.cb[-1].times})
@@ -1200,7 +1210,8 @@ class SignalDetectionModel(object):
 			test_mae = mae(ground_truth, predictions).numpy()
 			print(f"MSE on test set: {test_mse}...")
 			print(f"MAE on test set: {test_mae}...")
-			regression_plot(predictions, ground_truth, savepath=os.sep.join([self.model_folder,"test_regression.png"]))
+			if self.show_plots:
+				regression_plot(predictions, ground_truth, savepath=os.sep.join([self.model_folder,"test_regression.png"]))
 			self.dico.update({"test_mse": test_mse, "test_mae": test_mae})
 		if hasattr(self, 'x_val'):
@@ -1212,7 +1223,8 @@ class SignalDetectionModel(object):
 			val_mse = mse(ground_truth, predictions).numpy()
 			val_mae = mae(ground_truth, predictions).numpy()
-			regression_plot(predictions, ground_truth, savepath=os.sep.join([self.model_folder,"validation_regression.png"]))
+			if self.show_plots:
+				regression_plot(predictions, ground_truth, savepath=os.sep.join([self.model_folder,"validation_regression.png"]))
 			print(f"MSE on validation set: {val_mse}...")
 			print(f"MAE on validation set: {val_mae}...")
@@ -1239,17 +1251,17 @@ class SignalDetectionModel(object):
 		if mode=="classifier":
-			reduce_lr = ReduceLROnPlateau(monitor='val_precision', factor=0.5, patience=30,
+			reduce_lr = ReduceLROnPlateau(monitor='val_iou', factor=0.5, patience=30,
 										  cooldown=10, min_lr=5e-10, min_delta=1.0E-10,
 										  verbose=1,mode="max")
 			self.cb.append(reduce_lr)
 			csv_logger = CSVLogger(os.sep.join([self.model_folder,'log_classifier.csv']), append=True, separator=';')
 			self.cb.append(csv_logger)
 			checkpoint_path = os.sep.join([self.model_folder,"classifier.h5"])
-			cp_callback = ModelCheckpoint(checkpoint_path,monitor="val_precision",mode="max",verbose=1,save_best_only=True,save_weights_only=False,save_freq="epoch")
+			cp_callback = ModelCheckpoint(checkpoint_path,monitor="val_iou",mode="max",verbose=1,save_best_only=True,save_weights_only=False,save_freq="epoch")
 			self.cb.append(cp_callback)
-			callback_stop = EarlyStopping(monitor='val_precision', patience=100)
+			callback_stop = EarlyStopping(monitor='val_iou', patience=100)
 			self.cb.append(callback_stop)
 		elif mode=="regressor":
@@ -1278,7 +1290,7 @@ class SignalDetectionModel(object):
-	def generate_sets(self):
+	def prepare_sets(self):
 		"""
 		Generates and preprocesses training, validation, and test sets from loaded annotations.
@@ -1297,8 +1309,30 @@ class SignalDetectionModel(object):
 		self.y_time_set = []
 		self.y_class_set = []
-		for s in self.list_of_sets:
-			self.load_and_normalize(s)
+		if isinstance(self.list_of_sets[0],str):
+			# Case 1: a list of npy files to be loaded
+			for s in self.list_of_sets:
+				signal_dataset = self.load_set(s)
+				selected_signals, max_length = self.find_best_signal_match(signal_dataset)
+				signals_recast, classes, times_of_interest = self.cast_signals_into_training_data(signal_dataset, selected_signals, max_length)
+				signals_recast, times_of_interest = self.normalize_signals(signals_recast, times_of_interest)
+				self.x_set.extend(signals_recast)
+				self.y_time_set.extend(times_of_interest)
+				self.y_class_set.extend(classes)
+		elif isinstance(self.list_of_sets[0],list):
+			# Case 2: a list of sets (already loaded)
+			for signal_dataset in self.list_of_sets:
+				selected_signals, max_length = self.find_best_signal_match(signal_dataset)
+				signals_recast, classes, times_of_interest = self.cast_signals_into_training_data(signal_dataset, selected_signals, max_length)
+				signals_recast, times_of_interest = self.normalize_signals(signals_recast, times_of_interest)
+				self.x_set.extend(signals_recast)
+				self.y_time_set.extend(times_of_interest)
+				self.y_class_set.extend(classes)
 		self.x_set = np.array(self.x_set).astype(np.float32)
 		self.x_set = self.interpolate_signals(self.x_set)
@@ -1325,7 +1359,6 @@ class SignalDetectionModel(object):
 		self.x_train = ds["x_train"]
 		self.x_val = ds["x_val"]
 		self.y_time_train = ds["y1_train"].astype(np.float32)
-		print(np.amax(self.y_time_train),np.amin(self.y_time_train))
 		self.y_time_val = ds["y1_val"].astype(np.float32)
 		self.y_class_train = ds["y2_train"]
 		self.y_class_val = ds["y2_val"]
@@ -1357,13 +1390,24 @@ class SignalDetectionModel(object):
 		nbr_augment = self.augmentation_factor*len(self.x_train)
 		randomize = np.arange(len(self.x_train))
-		indices = random.choices(randomize,k=nbr_augment)
+		unique, counts = np.unique(self.y_class_train.argmax(axis=1),return_counts=True)
+		frac = counts/sum(counts)
+		weights = [frac[0]/f for f in frac]
+		weights[0] = weights[0]*3
+		self.pre_augment_weights = weights/sum(weights)
+		weights_array = [self.pre_augment_weights[a.argmax()] for a in self.y_class_train]
+		indices = random.choices(randomize,k=nbr_augment, weights=weights_array)
 		x_train_aug = []
 		y_time_train_aug = []
 		y_class_train_aug = []
+		counts = [0.,0.,0.]
 		for k in indices:
+			counts[self.y_class_train[k].argmax()] += 1
 			aug = augmenter(self.x_train[k],
 							self.y_time_train[k],
 							self.y_class_train[k],
@@ -1372,36 +1416,23 @@ class SignalDetectionModel(object):
 			x_train_aug.append(aug[0])
 			y_time_train_aug.append(aug[1])
 			y_class_train_aug.append(aug[2])
+		print('per class counts ',counts)
 		# Save augmented training set
 		self.x_train = np.array(x_train_aug)
 		self.y_time_train = np.array(y_time_train_aug)
 		self.y_class_train = np.array(y_class_train_aug)
-	def load_and_normalize(self, subset):
+		self.class_weights = compute_weights(self.y_class_train.argmax(axis=1))
+		print(f"New class weights: {self.class_weights}...")
-		"""
-		Loads a subset of signal data from an annotation file and applies normalization.
-		Parameters
-		----------
-		subset : str
-			The file path to the .npy annotation file containing signal data for a subset of observations.
-		Notes
-		-----
-		- The method extracts required signal channels from the annotation file and applies specified normalization
-		  and interpolation steps.
-		- Preprocessed signals are added to the global dataset for model training.
-		"""
-		set_k = np.load(subset,allow_pickle=True)
-		### here do a mapping between channel option and existing signals
+	def load_set(self, signal_dataset):
+		return np.load(signal_dataset,allow_pickle=True)
+	def find_best_signal_match(self, signal_dataset):
 		required_signals = self.channel_option
-		available_signals = list(set_k[0].keys())
+		available_signals = list(signal_dataset[0].keys())
 		selected_signals = []
 		for s in required_signals:
@@ -1421,47 +1452,134 @@ class SignalDetectionModel(object):
 			else:
 				return None
 		key_to_check = selected_signals[0] #self.channel_option[0]
-		signal_lengths = [len(l[key_to_check]) for l in set_k]
-		max_length = np.amax(signal_lengths)
+		signal_lengths = [len(l[key_to_check]) for l in signal_dataset]
+		max_length = np.amax(signal_lengths)
-		fluo = np.zeros((len(set_k),max_length,self.n_channels))
-		classes = np.zeros(len(set_k))
-		times_of_interest = np.zeros(len(set_k))
+		return selected_signals, max_length
+	def cast_signals_into_training_data(self, signal_dataset, selected_signals, max_length):
+		signals_recast = np.zeros((len(signal_dataset),max_length,self.n_channels))
+		classes = np.zeros(len(signal_dataset))
+		times_of_interest = np.zeros(len(signal_dataset))
-		for k in range(len(set_k)):
+		for k in range(len(signal_dataset)):
 			for i in range(self.n_channels):
 				try:
 					# take into account timeline for accurate time regression
-					timeline = set_k[k]['FRAME'].astype(int)
-					fluo[k,timeline,i] = set_k[k][selected_signals[i]]
+					timeline = signal_dataset[k]['FRAME'].astype(int)
+					signals_recast[k,timeline,i] = signal_dataset[k][selected_signals[i]]
 				except:
 					print(f"Attribute {selected_signals[i]} matched to {self.channel_option[i]} not found in annotation...")
 					pass
-			classes[k] = set_k[k]["class"]
-			times_of_interest[k] = set_k[k]["time_of_interest"]
+			classes[k] = signal_dataset[k]["class"]
+			times_of_interest[k] = signal_dataset[k]["time_of_interest"]
 		# Correct absurd times of interest
 		times_of_interest[np.nonzero(classes)] = -1
 		times_of_interest[(times_of_interest<=0.0)] = -1
-		# Attempt per-set normalization
-		fluo = pad_to_model_length(fluo, self.model_signal_length)
+		return signals_recast, classes, times_of_interest
+	def normalize_signals(self, signals_recast, times_of_interest):
+		signals_recast = pad_to_model_length(signals_recast, self.model_signal_length)
 		if self.normalize:
-			fluo = normalize_signal_set(fluo, self.channel_option, normalization_percentile=self.normalization_percentile,
+			signals_recast = normalize_signal_set(signals_recast, self.channel_option, normalization_percentile=self.normalization_percentile,
 										normalization_values=self.normalization_values, normalization_clip=self.normalization_clip,
 										)
 		# Trivial normalization for time of interest
 		times_of_interest /= self.model_signal_length
+		return signals_recast, times_of_interest
+	# def load_and_normalize(self, subset):
+	# 	"""
+	# 	Loads a subset of signal data from an annotation file and applies normalization.
+	# 	Parameters
+	# 	----------
+	# 	subset : str
+	# 		The file path to the .npy annotation file containing signal data for a subset of observations.
+	# 	Notes
+	# 	-----
+	# 	- The method extracts required signal channels from the annotation file and applies specified normalization
+	# 	  and interpolation steps.
+	# 	- Preprocessed signals are added to the global dataset for model training.
+	# 	"""
+	# 	set_k = np.load(subset,allow_pickle=True)
+	# 	### here do a mapping between channel option and existing signals
+	# 	required_signals = self.channel_option
+	# 	available_signals = list(set_k[0].keys())
+	# 	selected_signals = []
+	# 	for s in required_signals:
+	# 		pattern_test = [s in a for a in available_signals]
+	# 		if np.any(pattern_test):
+	# 			valid_columns = np.array(available_signals)[np.array(pattern_test)]
+	# 			if len(valid_columns)==1:
+	# 				selected_signals.append(valid_columns[0])
+	# 			else:
+	# 				print(f'Found several candidate signals: {valid_columns}')
+	# 				for vc in natsorted(valid_columns):
+	# 					if 'circle' in vc:
+	# 						selected_signals.append(vc)
+	# 						break
+	# 				else:
+	# 					selected_signals.append(valid_columns[0])
+	# 		else:
+	# 			return None
-		# Add to global dataset
-		self.x_set.extend(fluo)
-		self.y_time_set.extend(times_of_interest)
-		self.y_class_set.extend(classes)
+	# 	key_to_check = selected_signals[0] #self.channel_option[0]
+	# 	signal_lengths = [len(l[key_to_check]) for l in set_k]
+	# 	max_length = np.amax(signal_lengths)
+	# 	fluo = np.zeros((len(set_k),max_length,self.n_channels))
+	# 	classes = np.zeros(len(set_k))
+	# 	times_of_interest = np.zeros(len(set_k))
+	# 	for k in range(len(set_k)):
+	# 		for i in range(self.n_channels):
+	# 			try:
+	# 				# take into account timeline for accurate time regression
+	# 				timeline = set_k[k]['FRAME'].astype(int)
+	# 				fluo[k,timeline,i] = set_k[k][selected_signals[i]]
+	# 			except:
+	# 				print(f"Attribute {selected_signals[i]} matched to {self.channel_option[i]} not found in annotation...")
+	# 				pass
+	# 		classes[k] = set_k[k]["class"]
+	# 		times_of_interest[k] = set_k[k]["time_of_interest"]
+	# 	# Correct absurd times of interest
+	# 	times_of_interest[np.nonzero(classes)] = -1
+	# 	times_of_interest[(times_of_interest<=0.0)] = -1
+	# 	# Attempt per-set normalization
+	# 	fluo = pad_to_model_length(fluo, self.model_signal_length)
+	# 	if self.normalize:
+	# 		fluo = normalize_signal_set(fluo, self.channel_option, normalization_percentile=self.normalization_percentile,
+	# 									normalization_values=self.normalization_values, normalization_clip=self.normalization_clip,
+	# 									)
+	# 	# Trivial normalization for time of interest
+	# 	times_of_interest /= self.model_signal_length
+	# 	# Add to global dataset
+	# 	self.x_set.extend(fluo)
+	# 	self.y_time_set.extend(times_of_interest)
+	# 	self.y_class_set.extend(classes)
 def _interpret_normalization_parameters(n_channels, normalization_percentile, normalization_values, normalization_clip):
@@ -1655,7 +1773,7 @@ def pad_to_model_length(signal_set, model_signal_length):
 	"""
-	padded = np.pad(signal_set, [(0,0),(0,model_signal_length - signal_set.shape[1]),(0,0)])
+	padded = np.pad(signal_set, [(0,0),(0,model_signal_length - signal_set.shape[1]),(0,0)],mode="edge")
 	return padded
@@ -1767,13 +1885,16 @@ def random_time_shift(signal, time_of_interest, cclass, model_signal_length):
 	"""
+	min_time = 3
 	max_time = model_signal_length
 	return_target = False
 	if time_of_interest != -1:
 		return_target = True
-		max_time = model_signal_length - 3 # to prevent approaching too much to the edge
+		max_time = model_signal_length + 1/3*model_signal_length # bias to have a third of event class becoming no event
+		min_time = -model_signal_length*1/3
-	times = np.linspace(-max_time,max_time,2000) # symmetrize to create left-censored events
+	times = np.linspace(min_time,max_time,2000) # symmetrize to create left-censored events
 	target_time = np.random.choice(times)
 	delta_t = target_time - time_of_interest
@@ -1782,13 +1903,16 @@ def random_time_shift(signal, time_of_interest, cclass, model_signal_length):
 	if target_time<=0 and np.argmax(cclass)==0:
 		target_time = -1
 		cclass = np.array([0.,0.,1.]).astype(np.float32)
+	if target_time>=model_signal_length and np.argmax(cclass)==0:
+		target_time = -1
+		cclass = np.array([0.,1.,0.]).astype(np.float32)
 	if return_target:
 		return signal,target_time, cclass
 	else:
 		return signal, time_of_interest, cclass
-def augmenter(signal, time_of_interest, cclass, model_signal_length, time_shift=True, probability=0.8):
+def augmenter(signal, time_of_interest, cclass, model_signal_length, time_shift=True, probability=0.95):
 	"""
 	Randomly augments single-cell signals to simulate variations in noise, intensity ratios, and event times.
@@ -1839,9 +1963,8 @@ def augmenter(signal, time_of_interest, cclass, model_signal_length, time_shift=
 		if time_shift:
 			# do not time shift miscellaneous cells
-			if cclass.argmax()!=2.:
-				assert time_of_interest is not None, f"Please provide valid lysis times"
-				signal,time_of_interest,cclass = random_time_shift(signal, time_of_interest, cclass, model_signal_length)
+			assert time_of_interest is not None, f"Please provide valid lysis times"
+			signal,time_of_interest,cclass = random_time_shift(signal, time_of_interest, cclass, model_signal_length)
 		#signal = random_intensity_change(signal) #maybe bad idea for non percentile-normalized signals
 		signal = gauss_noise(signal)
@@ -2135,231 +2258,6 @@ def train_signal_model(config):
 	cmd = f'python "{script_path}" --config "{config}"'
 	subprocess.call(cmd, shell=True)
-def derivative(x, timeline, window, mode='bi'):
-	"""
-	Compute the derivative of a given array of values with respect to time using a specified numerical differentiation method.
-	Parameters
-	----------
-	x : array_like
-		The input array of values.
-	timeline : array_like
-		The array representing the time points corresponding to the input values.
-	window : int
-		The size of the window used for numerical differentiation. Must be a positive odd integer.
-	mode : {'bi', 'forward', 'backward'}, optional
-		The numerical differentiation method to be used:
-		- 'bi' (default): Bidirectional differentiation using a symmetric window.
-		- 'forward': Forward differentiation using a one-sided window.
-		- 'backward': Backward differentiation using a one-sided window.
-	Returns
-	-------
-	dxdt : ndarray
-		The computed derivative values of the input array with respect to time.
-	Raises
-	------
-	AssertionError
-		If the window size is not an odd integer and mode is 'bi'.
-	Notes
-	-----
-	- For 'bi' mode, the window size must be an odd number.
-	- For 'forward' mode, the derivative at the edge points may not be accurate due to the one-sided window.
-	- For 'backward' mode, the derivative at the first few points may not be accurate due to the one-sided window.
-	Examples
-	--------
-	>>> import numpy as np
-	>>> x = np.array([1, 2, 4, 7, 11])
-	>>> timeline = np.array([0, 1, 2, 3, 4])
-	>>> window = 3
-	>>> derivative(x, timeline, window, mode='bi')
-	array([3., 3., 3.])
-	>>> derivative(x, timeline, window, mode='forward')
-	array([1., 2., 3.])
-	>>> derivative(x, timeline, window, mode='backward')
-	array([3., 3., 3., 3.])
-	"""
-	# modes = bi, forward, backward
-	dxdt = np.zeros(len(x))
-	dxdt[:] = np.nan
-	if mode=='bi':
-		assert window%2==1,'Please set an odd window for the bidirectional mode'
-		lower_bound = window//2
-		upper_bound = len(x) - window//2 - 1
-	elif mode=='forward':
-		lower_bound = 0
-		upper_bound = len(x) - window
-	elif mode=='backward':
-		lower_bound = window
-		upper_bound = len(x)
-	for t in range(lower_bound,upper_bound):
-		if mode=='bi':
-			dxdt[t] = (x[t+window//2+1] - x[t-window//2]) / (timeline[t+window//2+1] - timeline[t-window//2])
-		elif mode=='forward':
-			dxdt[t] = (x[t+window] - x[t]) /  (timeline[t+window] - timeline[t])
-		elif mode=='backward':
-			dxdt[t] = (x[t] - x[t-window]) /  (timeline[t] - timeline[t-window])
-	return dxdt
-def velocity(x,y,timeline,window,mode='bi'):
-	"""
-	Compute the velocity vector of a given 2D trajectory represented by arrays of x and y coordinates
-	with respect to time using a specified numerical differentiation method.
-	Parameters
-	----------
-	x : array_like
-		The array of x-coordinates of the trajectory.
-	y : array_like
-		The array of y-coordinates of the trajectory.
-	timeline : array_like
-		The array representing the time points corresponding to the x and y coordinates.
-	window : int
-		The size of the window used for numerical differentiation. Must be a positive odd integer.
-	mode : {'bi', 'forward', 'backward'}, optional
-		The numerical differentiation method to be used:
-		- 'bi' (default): Bidirectional differentiation using a symmetric window.
-		- 'forward': Forward differentiation using a one-sided window.
-		- 'backward': Backward differentiation using a one-sided window.
-	Returns
-	-------
-	v : ndarray
-		The computed velocity vector of the 2D trajectory with respect to time.
-		The first column represents the x-component of velocity, and the second column represents the y-component.
-	Raises
-	------
-	AssertionError
-		If the window size is not an odd integer and mode is 'bi'.
-	Notes
-	-----
-	- For 'bi' mode, the window size must be an odd number.
-	- For 'forward' mode, the velocity at the edge points may not be accurate due to the one-sided window.
-	- For 'backward' mode, the velocity at the first few points may not be accurate due to the one-sided window.
-	Examples
-	--------
-	>>> import numpy as np
-	>>> x = np.array([1, 2, 4, 7, 11])
-	>>> y = np.array([0, 3, 5, 8, 10])
-	>>> timeline = np.array([0, 1, 2, 3, 4])
-	>>> window = 3
-	>>> velocity(x, y, timeline, window, mode='bi')
-	array([[3., 3.],
-		   [3., 3.]])
-	>>> velocity(x, y, timeline, window, mode='forward')
-	array([[2., 2.],
-		   [3., 3.]])
-	>>> velocity(x, y, timeline, window, mode='backward')
-	array([[3., 3.],
-		   [3., 3.]])
-	"""
-	v = np.zeros((len(x),2))
-	v[:,:] = np.nan
-	v[:,0] = derivative(x, timeline, window, mode=mode)
-	v[:,1] = derivative(y, timeline, window, mode=mode)
-	return v
-def magnitude_velocity(v_matrix):
-	"""
-	Compute the magnitude of velocity vectors given a matrix representing 2D velocity vectors.
-	Parameters
-	----------
-	v_matrix : array_like
-		The matrix where each row represents a 2D velocity vector with the first column
-		being the x-component and the second column being the y-component.
-	Returns
-	-------
-	magnitude : ndarray
-		The computed magnitudes of the input velocity vectors.
-	Notes
-	-----
-	- If a velocity vector has NaN components, the corresponding magnitude will be NaN.
-	- The function handles NaN values in the input matrix gracefully.
-	Examples
-	--------
-	>>> import numpy as np
-	>>> v_matrix = np.array([[3, 4],
-	...                      [2, 2],
-	...                      [3, 3]])
-	>>> magnitude_velocity(v_matrix)
-	array([5., 2.82842712, 4.24264069])
-	>>> v_matrix_with_nan = np.array([[3, 4],
-	...                               [np.nan, 2],
-	...                               [3, np.nan]])
-	>>> magnitude_velocity(v_matrix_with_nan)
-	array([5., nan, nan])
-	"""
-	magnitude = np.zeros(len(v_matrix))
-	magnitude[:] = np.nan
-	for i in range(len(v_matrix)):
-		if v_matrix[i,0]==v_matrix[i,0]:
-			magnitude[i] = np.sqrt(v_matrix[i,0]**2 + v_matrix[i,1]**2)
-	return magnitude
-def orientation(v_matrix):
-	"""
-	Compute the orientation angles (in radians) of 2D velocity vectors given a matrix representing velocity vectors.
-	Parameters
-	----------
-	v_matrix : array_like
-		The matrix where each row represents a 2D velocity vector with the first column
-		being the x-component and the second column being the y-component.
-	Returns
-	-------
-	orientation_array : ndarray
-		The computed orientation angles of the input velocity vectors in radians.
-		If a velocity vector has NaN components, the corresponding orientation angle will be NaN.
-	Examples
-	--------
-	>>> import numpy as np
-	>>> v_matrix = np.array([[3, 4],
-	...                      [2, 2],
-	...                      [-3, -3]])
-	>>> orientation(v_matrix)
-	array([0.92729522, 0.78539816, -2.35619449])
-	>>> v_matrix_with_nan = np.array([[3, 4],
-	...                               [np.nan, 2],
-	...                               [3, np.nan]])
-	>>> orientation(v_matrix_with_nan)
-	array([0.92729522, nan, nan])
-	"""
-	orientation_array = np.zeros(len(v_matrix))
-	for t in range(len(orientation_array)):
-		if v_matrix[t,0]==v_matrix[t,0]:
-			orientation_array[t] = np.arctan2(v_matrix[t,0],v_matrix[t,1])
-	return orientation_array
 def T_MSD(x,y,dt):
 	"""
@@ -2736,14 +2634,14 @@ def columnwise_mean(matrix, min_nbr_values = 1):
 	for k in range(matrix.shape[1]):
 		values = matrix[:,k]
-		values = values[values!=0]
+		values = values[values==values]
 		if len(values[values==values])>min_nbr_values:
 			mean_line[k] = np.nanmean(values)
 			mean_line_std[k] = np.nanstd(values)
 	return mean_line, mean_line_std
-def mean_signal(df, signal_name, class_col, time_col=None, class_value=[0], return_matrix=False, forced_max_duration=None, min_nbr_values=2):
+def mean_signal(df, signal_name, class_col, time_col=None, class_value=[0], return_matrix=False, forced_max_duration=None, min_nbr_values=2,conflict_mode='mean'):
 	"""
 	Calculate the mean and standard deviation of a specified signal for tracks of a given class in the input DataFrame.
@@ -2789,12 +2687,13 @@ def mean_signal(df, signal_name, class_col, time_col=None, class_value=[0], retu
 	else:
 		max_duration = forced_max_duration
 	n_tracks = len(df.groupby(['position','TRACK_ID']))
-	signal_matrix = np.zeros((n_tracks,max_duration*2 + 1))
+	signal_matrix = np.zeros((n_tracks,int(max_duration)*2 + 1))
 	signal_matrix[:,:] = np.nan
+	df = df.sort_values(by=['position','TRACK_ID','FRAME'])
 	trackid=0
 	for track,track_group in df.loc[df[class_col].isin(class_value)].groupby(['position','TRACK_ID']):
-		track_group = track_group.sort_values(by='FRAME')
 		cclass = track_group[class_col].to_numpy()[0]
 		if cclass != 0:
 			ref_time = 0
@@ -2803,8 +2702,14 @@ def mean_signal(df, signal_name, class_col, time_col=None, class_value=[0], retu
 				ref_time = floor(track_group[time_col].to_numpy()[0])
 			except:
 				continue
-		signal = track_group[signal_name].to_numpy()
-		timeline = track_group['FRAME'].to_numpy().astype(int)
+		if conflict_mode=='mean':
+			signal = track_group.groupby('FRAME')[signal_name].mean().to_numpy()
+		elif conflict_mode=='first':
+			signal = track_group.groupby('FRAME')[signal_name].first().to_numpy()
+		else:
+			signal = track_group[signal_name].to_numpy()
+		timeline = track_group['FRAME'].unique().astype(int)
 		timeline_shifted = timeline - ref_time + max_duration
 		signal_matrix[trackid,timeline_shifted] = signal
 		trackid+=1

celldetective 1.0.2.post1__py3-none-any.whl → 1.1.1__py3-none-any.whl

celldetective 1.0.2.post1py3-none-any.whl → 1.1.1py3-none-any.whl