boris-behav-obs 9.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of boris-behav-obs might be problematic. Click here for more details.
- boris/__init__.py +26 -0
- boris/__main__.py +25 -0
- boris/about.py +143 -0
- boris/add_modifier.py +635 -0
- boris/add_modifier_ui.py +303 -0
- boris/advanced_event_filtering.py +455 -0
- boris/analysis_plugins/__init__.py +0 -0
- boris/analysis_plugins/_latency.py +59 -0
- boris/analysis_plugins/irr_cohen_kappa.py +109 -0
- boris/analysis_plugins/irr_cohen_kappa_with_modifiers.py +112 -0
- boris/analysis_plugins/irr_weighted_cohen_kappa.py +157 -0
- boris/analysis_plugins/irr_weighted_cohen_kappa_with_modifiers.py +162 -0
- boris/analysis_plugins/list_of_dataframe_columns.py +22 -0
- boris/analysis_plugins/number_of_occurences.py +22 -0
- boris/analysis_plugins/number_of_occurences_by_independent_variable.py +54 -0
- boris/analysis_plugins/time_budget.py +61 -0
- boris/behav_coding_map_creator.py +1110 -0
- boris/behavior_binary_table.py +305 -0
- boris/behaviors_coding_map.py +239 -0
- boris/boris_cli.py +340 -0
- boris/cmd_arguments.py +49 -0
- boris/coding_pad.py +280 -0
- boris/config.py +785 -0
- boris/config_file.py +356 -0
- boris/connections.py +409 -0
- boris/converters.py +333 -0
- boris/converters_ui.py +225 -0
- boris/cooccurence.py +250 -0
- boris/core.py +5901 -0
- boris/core_qrc.py +15958 -0
- boris/core_ui.py +1107 -0
- boris/db_functions.py +324 -0
- boris/dev.py +134 -0
- boris/dialog.py +1108 -0
- boris/duration_widget.py +238 -0
- boris/edit_event.py +245 -0
- boris/edit_event_ui.py +233 -0
- boris/event_operations.py +1040 -0
- boris/events_cursor.py +61 -0
- boris/events_snapshots.py +596 -0
- boris/exclusion_matrix.py +141 -0
- boris/export_events.py +1006 -0
- boris/export_observation.py +1203 -0
- boris/external_processes.py +332 -0
- boris/geometric_measurement.py +941 -0
- boris/gui_utilities.py +135 -0
- boris/image_overlay.py +72 -0
- boris/import_observations.py +242 -0
- boris/ipc_mpv.py +325 -0
- boris/irr.py +634 -0
- boris/latency.py +244 -0
- boris/measurement_widget.py +161 -0
- boris/media_file.py +115 -0
- boris/menu_options.py +213 -0
- boris/modifier_coding_map_creator.py +1013 -0
- boris/modifiers_coding_map.py +157 -0
- boris/mpv.py +2016 -0
- boris/mpv2.py +2193 -0
- boris/observation.py +1453 -0
- boris/observation_operations.py +2538 -0
- boris/observation_ui.py +679 -0
- boris/observations_list.py +337 -0
- boris/otx_parser.py +442 -0
- boris/param_panel.py +201 -0
- boris/param_panel_ui.py +305 -0
- boris/player_dock_widget.py +198 -0
- boris/plot_data_module.py +536 -0
- boris/plot_events.py +634 -0
- boris/plot_events_rt.py +237 -0
- boris/plot_spectrogram_rt.py +316 -0
- boris/plot_waveform_rt.py +230 -0
- boris/plugins.py +431 -0
- boris/portion/__init__.py +31 -0
- boris/portion/const.py +95 -0
- boris/portion/dict.py +365 -0
- boris/portion/func.py +52 -0
- boris/portion/interval.py +581 -0
- boris/portion/io.py +181 -0
- boris/preferences.py +510 -0
- boris/preferences_ui.py +770 -0
- boris/project.py +2007 -0
- boris/project_functions.py +2041 -0
- boris/project_import_export.py +1096 -0
- boris/project_ui.py +794 -0
- boris/qrc_boris.py +10389 -0
- boris/qrc_boris5.py +2579 -0
- boris/select_modifiers.py +312 -0
- boris/select_observations.py +210 -0
- boris/select_subj_behav.py +286 -0
- boris/state_events.py +197 -0
- boris/subjects_pad.py +106 -0
- boris/synthetic_time_budget.py +290 -0
- boris/time_budget_functions.py +1136 -0
- boris/time_budget_widget.py +1039 -0
- boris/transitions.py +365 -0
- boris/utilities.py +1810 -0
- boris/version.py +24 -0
- boris/video_equalizer.py +159 -0
- boris/video_equalizer_ui.py +248 -0
- boris/video_operations.py +310 -0
- boris/view_df.py +104 -0
- boris/view_df_ui.py +75 -0
- boris/write_event.py +538 -0
- boris_behav_obs-9.7.7.dist-info/METADATA +139 -0
- boris_behav_obs-9.7.7.dist-info/RECORD +109 -0
- boris_behav_obs-9.7.7.dist-info/WHEEL +5 -0
- boris_behav_obs-9.7.7.dist-info/entry_points.txt +2 -0
- boris_behav_obs-9.7.7.dist-info/licenses/LICENSE.TXT +674 -0
- boris_behav_obs-9.7.7.dist-info/top_level.txt +1 -0
boris/irr.py
ADDED
|
@@ -0,0 +1,634 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BORIS
|
|
3
|
+
Behavioral Observation Research Interactive Software
|
|
4
|
+
Copyright 2012-2025 Olivier Friard
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
This program is free software; you can redistribute it and/or modify
|
|
8
|
+
it under the terms of the GNU General Public License as published by
|
|
9
|
+
the Free Software Foundation; either version 2 of the License, or
|
|
10
|
+
(at your option) any later version.
|
|
11
|
+
|
|
12
|
+
This program is distributed in the hope that it will be useful,
|
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
GNU General Public License for more details.
|
|
16
|
+
|
|
17
|
+
You should have received a copy of the GNU General Public License
|
|
18
|
+
along with this program; if not, write to the Free Software
|
|
19
|
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
20
|
+
MA 02110-1301, USA.
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
from decimal import Decimal as dec
|
|
26
|
+
|
|
27
|
+
import numpy as np
|
|
28
|
+
from PySide6.QtWidgets import QInputDialog, QMessageBox
|
|
29
|
+
|
|
30
|
+
from . import config as cfg
|
|
31
|
+
from . import db_functions, dialog, project_functions, select_subj_behav
|
|
32
|
+
from . import utilities as util
|
|
33
|
+
from . import select_observations
|
|
34
|
+
from . import observation_operations
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def subj_behav_modif(cursor, obsid: str, subject: str, time: dec, interval, include_modifiers: bool) -> list:
|
|
38
|
+
"""
|
|
39
|
+
current behaviors for observation obsId at time
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
cursor (sqlite3.cursor): cursor to aggregated events db
|
|
43
|
+
obsid (str): id of observation
|
|
44
|
+
subject (str): name of subject
|
|
45
|
+
time (Decimal): time
|
|
46
|
+
include_modifiers (bool): True: include modifiers False: do not
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
list: list of lists [subject, behavior, modifiers]
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
s = []
|
|
53
|
+
# state behaviors
|
|
54
|
+
rows = cursor.execute(
|
|
55
|
+
(
|
|
56
|
+
"SELECT behavior, modifiers FROM aggregated_events "
|
|
57
|
+
"WHERE "
|
|
58
|
+
"observation = ? "
|
|
59
|
+
"AND subject = ? "
|
|
60
|
+
"AND type = 'STATE' "
|
|
61
|
+
"AND (? BETWEEN start AND STOP) "
|
|
62
|
+
),
|
|
63
|
+
(
|
|
64
|
+
obsid,
|
|
65
|
+
subject,
|
|
66
|
+
float(time),
|
|
67
|
+
),
|
|
68
|
+
).fetchall()
|
|
69
|
+
|
|
70
|
+
for row in rows:
|
|
71
|
+
if include_modifiers:
|
|
72
|
+
s.append([subject, row[0], row[1]])
|
|
73
|
+
else:
|
|
74
|
+
s.append([subject, row[0]])
|
|
75
|
+
|
|
76
|
+
# point behaviors
|
|
77
|
+
rows = cursor.execute(
|
|
78
|
+
(
|
|
79
|
+
"SELECT behavior, modifiers FROM aggregated_events "
|
|
80
|
+
"WHERE "
|
|
81
|
+
"observation = ? "
|
|
82
|
+
"AND subject = ? "
|
|
83
|
+
"AND type = 'POINT' "
|
|
84
|
+
"AND abs(start - ?) <= ? "
|
|
85
|
+
),
|
|
86
|
+
(
|
|
87
|
+
obsid,
|
|
88
|
+
subject,
|
|
89
|
+
float(time),
|
|
90
|
+
float(interval / 2),
|
|
91
|
+
),
|
|
92
|
+
).fetchall()
|
|
93
|
+
|
|
94
|
+
for row in rows:
|
|
95
|
+
if include_modifiers:
|
|
96
|
+
s.append([subject, row[0], row[1]])
|
|
97
|
+
else:
|
|
98
|
+
s.append([subject, row[0]])
|
|
99
|
+
|
|
100
|
+
return s
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def cohen_kappa(cursor, obsid1: str, obsid2: str, interval: dec, selected_subjects: list, include_modifiers: bool):
|
|
104
|
+
"""
|
|
105
|
+
Inter-rater reliability Cohen's kappa coefficient (time-unit)
|
|
106
|
+
see Sequential Analysis and Observational Methods for the Behavioral Sciences p. 77
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
cursor (sqlite3.cursor): cursor to aggregated events db
|
|
110
|
+
obsid1 (str): id of observation #1
|
|
111
|
+
obsid2 (str): id of observation #2
|
|
112
|
+
interval (decimal.Decimal): time unit (s)
|
|
113
|
+
selected_subjects (list): subjects selected for analysis
|
|
114
|
+
include_modifiers (bool): True: include modifiers False: do not
|
|
115
|
+
|
|
116
|
+
Return:
|
|
117
|
+
float: K
|
|
118
|
+
str: result of analysis
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
# check if obs have events
|
|
122
|
+
for obs_id in [obsid1, obsid2]:
|
|
123
|
+
if not cursor.execute("SELECT * FROM aggregated_events WHERE observation = ? ", (obs_id,)).fetchall():
|
|
124
|
+
return -100, f"The observation {obs_id} has no recorded events"
|
|
125
|
+
|
|
126
|
+
first_event = cursor.execute(
|
|
127
|
+
(
|
|
128
|
+
"SELECT min(start) FROM aggregated_events "
|
|
129
|
+
f"WHERE observation in (?, ?) AND subject in ({','.join('?' * len(selected_subjects))}) "
|
|
130
|
+
),
|
|
131
|
+
(obsid1, obsid2) + tuple(selected_subjects),
|
|
132
|
+
).fetchone()[0]
|
|
133
|
+
|
|
134
|
+
logging.debug(f"first_event: {first_event}")
|
|
135
|
+
|
|
136
|
+
last_event = cursor.execute(
|
|
137
|
+
(f"SELECT max(stop) FROM aggregated_events WHERE observation in (?, ?) AND subject in ({','.join('?' * len(selected_subjects))}) "),
|
|
138
|
+
(obsid1, obsid2) + tuple(selected_subjects),
|
|
139
|
+
).fetchone()[0]
|
|
140
|
+
|
|
141
|
+
logging.debug(f"last_event: {last_event}")
|
|
142
|
+
|
|
143
|
+
nb_events1 = cursor.execute(
|
|
144
|
+
(f"SELECT COUNT(*) FROM aggregated_events WHERE observation = ? AND subject in ({','.join('?' * len(selected_subjects))}) "),
|
|
145
|
+
(obsid1,) + tuple(selected_subjects),
|
|
146
|
+
).fetchone()[0]
|
|
147
|
+
nb_events2 = cursor.execute(
|
|
148
|
+
(f"SELECT COUNT(*) FROM aggregated_events WHERE observation = ? AND subject in ({','.join('?' * len(selected_subjects))}) "),
|
|
149
|
+
(obsid2,) + tuple(selected_subjects),
|
|
150
|
+
).fetchone()[0]
|
|
151
|
+
|
|
152
|
+
total_states = []
|
|
153
|
+
|
|
154
|
+
currentTime = dec(str(first_event))
|
|
155
|
+
while currentTime <= last_event:
|
|
156
|
+
for obsid in [obsid1, obsid2]:
|
|
157
|
+
for subject in selected_subjects:
|
|
158
|
+
s = subj_behav_modif(cursor, obsid, subject, currentTime, interval, include_modifiers)
|
|
159
|
+
|
|
160
|
+
if s not in total_states:
|
|
161
|
+
total_states.append(s)
|
|
162
|
+
|
|
163
|
+
logging.debug(f"{obsid} {subject} {currentTime} {s}")
|
|
164
|
+
|
|
165
|
+
currentTime += interval
|
|
166
|
+
|
|
167
|
+
total_states = sorted(total_states)
|
|
168
|
+
|
|
169
|
+
logging.debug(f"total_states: {total_states} len:{len(total_states)}")
|
|
170
|
+
|
|
171
|
+
contingency_table = np.zeros((len(total_states), len(total_states)))
|
|
172
|
+
|
|
173
|
+
seq1 = {}
|
|
174
|
+
seq2 = {}
|
|
175
|
+
currentTime = dec(str(first_event))
|
|
176
|
+
while currentTime <= last_event:
|
|
177
|
+
seq1[currentTime] = []
|
|
178
|
+
seq2[currentTime] = []
|
|
179
|
+
for subject in selected_subjects:
|
|
180
|
+
s1 = subj_behav_modif(cursor, obsid1, subject, currentTime, interval, include_modifiers)
|
|
181
|
+
s2 = subj_behav_modif(cursor, obsid2, subject, currentTime, interval, include_modifiers)
|
|
182
|
+
|
|
183
|
+
seq1[currentTime].append(s1)
|
|
184
|
+
seq2[currentTime].append(s2)
|
|
185
|
+
|
|
186
|
+
logging.debug(f"currentTime: {currentTime} s1:{s1} s2:{s2}")
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
contingency_table[total_states.index(s1), total_states.index(s2)] += 1
|
|
190
|
+
except Exception:
|
|
191
|
+
return -100, "Error with contingency table"
|
|
192
|
+
|
|
193
|
+
currentTime += interval
|
|
194
|
+
|
|
195
|
+
logging.debug(f"seq1:\n {list(seq1.values())}")
|
|
196
|
+
logging.debug(f"seq2:\n {list(seq2.values())}")
|
|
197
|
+
|
|
198
|
+
logging.debug(f"contingency_table:\n {contingency_table}")
|
|
199
|
+
|
|
200
|
+
template = (
|
|
201
|
+
"Observation: {obsid1}\nnumber of events: {nb_events1}\n\nObservation: {obsid2}\nnumber of events: {nb_events2:.0f}\n\nK = {K:.3f}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# out += "Observation length: <b>{:.3f} s</b><br>".format(self.observationTotalMediaLength(obsid1))
|
|
205
|
+
# out += "Number of intervals: <b>{:.0f}</b><br><br>".format(self.observationTotalMediaLength(obsid1) / interval)
|
|
206
|
+
|
|
207
|
+
# out += "Observation length: <b>{:.3f} s</b><br>".format(self.observationTotalMediaLength(obsid2))
|
|
208
|
+
# out += "Number of intervals: <b>{:.0f}</b><br><br>".format(self.observationTotalMediaLength(obsid2) / interval)
|
|
209
|
+
|
|
210
|
+
cols_sums = contingency_table.sum(axis=0)
|
|
211
|
+
rows_sums = contingency_table.sum(axis=1)
|
|
212
|
+
overall_total = contingency_table.sum()
|
|
213
|
+
|
|
214
|
+
logging.debug(f"overall_total: {overall_total}")
|
|
215
|
+
|
|
216
|
+
agreements = sum(contingency_table.diagonal())
|
|
217
|
+
|
|
218
|
+
logging.debug(f"agreements: {agreements}")
|
|
219
|
+
|
|
220
|
+
sum_ef = 0
|
|
221
|
+
for idx in range(len(total_states)):
|
|
222
|
+
sum_ef += rows_sums[idx] * cols_sums[idx] / overall_total
|
|
223
|
+
|
|
224
|
+
logging.debug(f"sum_ef {sum_ef}")
|
|
225
|
+
|
|
226
|
+
if not (overall_total - sum_ef):
|
|
227
|
+
K = 1
|
|
228
|
+
else:
|
|
229
|
+
try:
|
|
230
|
+
K = round((agreements - sum_ef) / (overall_total - sum_ef), 3)
|
|
231
|
+
except Exception:
|
|
232
|
+
K = np.nan
|
|
233
|
+
|
|
234
|
+
out = template.format(obsid1=obsid1, obsid2=obsid2, nb_events1=nb_events1, nb_events2=nb_events2, K=K)
|
|
235
|
+
|
|
236
|
+
logging.debug(f"K: {K}")
|
|
237
|
+
return K, out
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def irr_cohen_kappa(self):
|
|
241
|
+
"""
|
|
242
|
+
calculate the Inter-Rater Reliability index - Cohen's Kappa of 2 or more observations
|
|
243
|
+
https://en.wikipedia.org/wiki/Cohen%27s_kappa
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
# ask user observations to analyze
|
|
247
|
+
_, selected_observations = select_observations.select_observations2(
|
|
248
|
+
self, mode=cfg.MULTIPLE, windows_title="Select observations for IRR Cohen Kappa"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
if not selected_observations:
|
|
252
|
+
return
|
|
253
|
+
if len(selected_observations) < 2:
|
|
254
|
+
QMessageBox.information(self, cfg.programName, "Select almost 2 observations for IRR analysis")
|
|
255
|
+
return
|
|
256
|
+
|
|
257
|
+
# check if coded behaviors are defined in ethogram
|
|
258
|
+
if project_functions.check_coded_behaviors_in_obs_list(self.pj, selected_observations):
|
|
259
|
+
return
|
|
260
|
+
|
|
261
|
+
# check if state events are paired
|
|
262
|
+
not_ok, selected_observations = project_functions.check_state_events(self.pj, selected_observations)
|
|
263
|
+
if not_ok or not selected_observations:
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
start_coding, end_coding, _ = observation_operations.coding_time(self.pj[cfg.OBSERVATIONS], selected_observations)
|
|
267
|
+
|
|
268
|
+
# exit with message if events do not have timestamp
|
|
269
|
+
if start_coding.is_nan():
|
|
270
|
+
QMessageBox.critical(
|
|
271
|
+
None,
|
|
272
|
+
cfg.programName,
|
|
273
|
+
("This function is not available for observations with events that do not have timestamp"),
|
|
274
|
+
QMessageBox.Ok | QMessageBox.Default,
|
|
275
|
+
QMessageBox.NoButton,
|
|
276
|
+
)
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
parameters = select_subj_behav.choose_obs_subj_behav_category(
|
|
280
|
+
self,
|
|
281
|
+
selected_observations,
|
|
282
|
+
start_coding=dec("NaN"),
|
|
283
|
+
end_coding=dec("NaN"),
|
|
284
|
+
show_include_modifiers=True,
|
|
285
|
+
show_exclude_non_coded_behaviors=False,
|
|
286
|
+
n_observations=len(selected_observations),
|
|
287
|
+
)
|
|
288
|
+
if parameters == {}:
|
|
289
|
+
return
|
|
290
|
+
if not parameters[cfg.SELECTED_SUBJECTS] or not parameters[cfg.SELECTED_BEHAVIORS]:
|
|
291
|
+
QMessageBox.warning(None, cfg.programName, "Select subject(s) and behavior(s) to analyze")
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
# ask for time slice
|
|
295
|
+
i, ok = QInputDialog.getDouble(self, "IRR - Cohen's Kappa (time-unit)", "Time unit (in seconds):", 1.0, 0.001, 86400, 3)
|
|
296
|
+
if not ok:
|
|
297
|
+
return
|
|
298
|
+
interval = util.float2decimal(i)
|
|
299
|
+
|
|
300
|
+
ok, msg, db_connector = db_functions.load_aggregated_events_in_db(
|
|
301
|
+
self.pj, parameters[cfg.SELECTED_SUBJECTS], selected_observations, parameters[cfg.SELECTED_BEHAVIORS]
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
cursor = db_connector.cursor()
|
|
305
|
+
out = (
|
|
306
|
+
"Index of Inter-rater Reliability - Cohen's Kappa\n\n"
|
|
307
|
+
f"Interval time: {interval:.3f} s\n"
|
|
308
|
+
f"Selected subjects: {', '.join(parameters[cfg.SELECTED_SUBJECTS])}\n\n"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
mem_done = []
|
|
312
|
+
irr_results = np.ones((len(selected_observations), len(selected_observations)))
|
|
313
|
+
|
|
314
|
+
for obs_id1 in selected_observations:
|
|
315
|
+
for obs_id2 in selected_observations:
|
|
316
|
+
if obs_id1 == obs_id2:
|
|
317
|
+
continue
|
|
318
|
+
if set([obs_id1, obs_id2]) not in mem_done:
|
|
319
|
+
K, msg = cohen_kappa(
|
|
320
|
+
cursor,
|
|
321
|
+
obs_id1,
|
|
322
|
+
obs_id2,
|
|
323
|
+
interval,
|
|
324
|
+
parameters[cfg.SELECTED_SUBJECTS],
|
|
325
|
+
parameters[cfg.INCLUDE_MODIFIERS],
|
|
326
|
+
)
|
|
327
|
+
irr_results[selected_observations.index(obs_id1), selected_observations.index(obs_id2)] = K
|
|
328
|
+
irr_results[selected_observations.index(obs_id2), selected_observations.index(obs_id1)] = K
|
|
329
|
+
out += msg + "\n=============\n"
|
|
330
|
+
mem_done.append(set([obs_id1, obs_id2]))
|
|
331
|
+
|
|
332
|
+
out2 = "\t{}\n".format("\t".join(list(selected_observations)))
|
|
333
|
+
for r in range(irr_results.shape[0]):
|
|
334
|
+
out2 += f"{selected_observations[r]}\t"
|
|
335
|
+
out2 += "\t".join(["%8.6f" % x for x in irr_results[r, :]]) + "\n"
|
|
336
|
+
|
|
337
|
+
self.results = dialog.Results_dialog()
|
|
338
|
+
self.results.setWindowTitle("BORIS - IRR - Cohen's Kappa (time-unit) analysis results")
|
|
339
|
+
self.results.ptText.setReadOnly(True)
|
|
340
|
+
if len(selected_observations) == 2:
|
|
341
|
+
self.results.ptText.appendPlainText(out)
|
|
342
|
+
else:
|
|
343
|
+
self.results.ptText.appendPlainText(out2)
|
|
344
|
+
self.results.show()
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def needleman_wunsch_identity(cursor, obsid1: str, obsid2: str, interval, selected_subjects: list, include_modifiers: bool):
|
|
348
|
+
"""
|
|
349
|
+
Needleman - Wunsch identity between 2 observations
|
|
350
|
+
|
|
351
|
+
see http://anhaidgroup.github.io/py_stringmatching/v0.4.1/NeedlemanWunsch.html#
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
cursor (sqlite3.cursor): cursor to aggregated events db
|
|
355
|
+
obsid1 (str): id of observation #1
|
|
356
|
+
obsid2 (str): id of observation #2
|
|
357
|
+
interval
|
|
358
|
+
selected_subjects (list): subjects selected for analysis
|
|
359
|
+
include_modifiers (bool): True: include modifiers False: do not
|
|
360
|
+
|
|
361
|
+
Return:
|
|
362
|
+
float: identity
|
|
363
|
+
str: result of analysis
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
def zeros(shape):
|
|
367
|
+
retval = []
|
|
368
|
+
for x in range(shape[0]):
|
|
369
|
+
retval.append([])
|
|
370
|
+
for y in range(shape[1]):
|
|
371
|
+
retval[-1].append(0)
|
|
372
|
+
return retval
|
|
373
|
+
|
|
374
|
+
match_award = 1
|
|
375
|
+
mismatch_penalty = -1
|
|
376
|
+
gap_penalty = -1
|
|
377
|
+
|
|
378
|
+
def match_score(alpha, beta):
|
|
379
|
+
if alpha == beta:
|
|
380
|
+
return match_award
|
|
381
|
+
elif alpha == "-" or beta == "-":
|
|
382
|
+
return gap_penalty
|
|
383
|
+
else:
|
|
384
|
+
return mismatch_penalty
|
|
385
|
+
|
|
386
|
+
def finalize(align1, align2):
|
|
387
|
+
align1 = align1[::-1]
|
|
388
|
+
align2 = align2[::-1]
|
|
389
|
+
|
|
390
|
+
i = 0
|
|
391
|
+
symbol = []
|
|
392
|
+
score = 0
|
|
393
|
+
identity = 0
|
|
394
|
+
for i in range(0, len(align1)):
|
|
395
|
+
if align1[i] == align2[i]:
|
|
396
|
+
symbol.append(align1[i])
|
|
397
|
+
identity += 1
|
|
398
|
+
score += match_score(align1[i], align2[i])
|
|
399
|
+
|
|
400
|
+
elif align1[i] != align2[i] and align1[i] != "-" and align2[i] != "-":
|
|
401
|
+
score += match_score(align1[i], align2[i])
|
|
402
|
+
symbol.append(" ")
|
|
403
|
+
|
|
404
|
+
# if one of them is a gap, output a space
|
|
405
|
+
elif align1[i] == "-" or align2[i] == "-":
|
|
406
|
+
symbol.append(" ")
|
|
407
|
+
score += gap_penalty
|
|
408
|
+
|
|
409
|
+
identity = float(identity) / len(align1) * 100
|
|
410
|
+
|
|
411
|
+
return {"identity": identity, "score": score, "align1": align1, "align2": align2, "symbol": symbol}
|
|
412
|
+
|
|
413
|
+
def needle(seq1, seq2):
|
|
414
|
+
m, n = len(seq1), len(seq2)
|
|
415
|
+
|
|
416
|
+
score = zeros((m + 1, n + 1))
|
|
417
|
+
|
|
418
|
+
for i in range(0, m + 1):
|
|
419
|
+
score[i][0] = gap_penalty * i
|
|
420
|
+
for j in range(0, n + 1):
|
|
421
|
+
score[0][j] = gap_penalty * j
|
|
422
|
+
for i in range(1, m + 1):
|
|
423
|
+
for j in range(1, n + 1):
|
|
424
|
+
match = score[i - 1][j - 1] + match_score(seq1[i - 1], seq2[j - 1])
|
|
425
|
+
delete = score[i - 1][j] + gap_penalty
|
|
426
|
+
insert = score[i][j - 1] + gap_penalty
|
|
427
|
+
score[i][j] = max(match, delete, insert)
|
|
428
|
+
|
|
429
|
+
align1, align2 = [], []
|
|
430
|
+
i, j = m, n
|
|
431
|
+
while i > 0 and j > 0:
|
|
432
|
+
score_current = score[i][j]
|
|
433
|
+
score_diagonal = score[i - 1][j - 1]
|
|
434
|
+
score_up = score[i][j - 1]
|
|
435
|
+
score_left = score[i - 1][j]
|
|
436
|
+
|
|
437
|
+
if score_current == score_diagonal + match_score(seq1[i - 1], seq2[j - 1]):
|
|
438
|
+
align1.append(seq1[i - 1])
|
|
439
|
+
align2.append(seq2[j - 1])
|
|
440
|
+
i -= 1
|
|
441
|
+
j -= 1
|
|
442
|
+
elif score_current == score_left + gap_penalty:
|
|
443
|
+
align1.append(seq1[i - 1])
|
|
444
|
+
align2.append("-")
|
|
445
|
+
i -= 1
|
|
446
|
+
elif score_current == score_up + gap_penalty:
|
|
447
|
+
align1.append("-")
|
|
448
|
+
align2.append(seq2[j - 1])
|
|
449
|
+
j -= 1
|
|
450
|
+
|
|
451
|
+
# Finish tracing up to the top left cell
|
|
452
|
+
while i > 0:
|
|
453
|
+
align1.append(seq1[i - 1])
|
|
454
|
+
align2.append("-")
|
|
455
|
+
i -= 1
|
|
456
|
+
while j > 0:
|
|
457
|
+
align1.append("-")
|
|
458
|
+
align2.append(seq2[j - 1])
|
|
459
|
+
j -= 1
|
|
460
|
+
|
|
461
|
+
return finalize(align1, align2)
|
|
462
|
+
|
|
463
|
+
first_event = cursor.execute(
|
|
464
|
+
(
|
|
465
|
+
"SELECT min(start) FROM aggregated_events "
|
|
466
|
+
f"WHERE observation in (?, ?) AND subject in ({','.join('?' * len(selected_subjects))}) "
|
|
467
|
+
),
|
|
468
|
+
(obsid1, obsid2) + tuple(selected_subjects),
|
|
469
|
+
).fetchone()[0]
|
|
470
|
+
|
|
471
|
+
if first_event is None:
|
|
472
|
+
logging.debug(f"An observation has no recorded events: {obsid1} or {obsid2}")
|
|
473
|
+
|
|
474
|
+
return -100, f"An observation has no recorded events: {obsid1} {obsid2}"
|
|
475
|
+
|
|
476
|
+
logging.debug(f"first_event: {first_event}")
|
|
477
|
+
|
|
478
|
+
last_event = cursor.execute(
|
|
479
|
+
(f"SELECT max(stop) FROM aggregated_events WHERE observation in (?, ?) AND subject in ({','.join('?' * len(selected_subjects))}) "),
|
|
480
|
+
(obsid1, obsid2) + tuple(selected_subjects),
|
|
481
|
+
).fetchone()[0]
|
|
482
|
+
|
|
483
|
+
logging.debug(f"last_event: {last_event}")
|
|
484
|
+
|
|
485
|
+
nb_events1 = cursor.execute(
|
|
486
|
+
(f"SELECT COUNT(*) FROM aggregated_events WHERE observation = ? AND subject in ({','.join('?' * len(selected_subjects))}) "),
|
|
487
|
+
(obsid1,) + tuple(selected_subjects),
|
|
488
|
+
).fetchone()[0]
|
|
489
|
+
|
|
490
|
+
nb_events2 = cursor.execute(
|
|
491
|
+
(f"SELECT COUNT(*) FROM aggregated_events WHERE observation = ? AND subject in ({','.join('?' * len(selected_subjects))}) "),
|
|
492
|
+
(obsid2,) + tuple(selected_subjects),
|
|
493
|
+
).fetchone()[0]
|
|
494
|
+
|
|
495
|
+
seq1: dict = {}
|
|
496
|
+
seq2: dict = {}
|
|
497
|
+
|
|
498
|
+
currentTime = dec(str(first_event))
|
|
499
|
+
while currentTime <= last_event:
|
|
500
|
+
seq1[currentTime], seq2[currentTime] = [], []
|
|
501
|
+
|
|
502
|
+
for subject in selected_subjects:
|
|
503
|
+
s1 = subj_behav_modif(cursor, obsid1, subject, currentTime, interval, include_modifiers)
|
|
504
|
+
s2 = subj_behav_modif(cursor, obsid2, subject, currentTime, interval, include_modifiers)
|
|
505
|
+
|
|
506
|
+
seq1[currentTime].append(s1)
|
|
507
|
+
seq2[currentTime].append(s2)
|
|
508
|
+
|
|
509
|
+
logging.debug(f"currentTime: {currentTime} s1:{s1} s2:{s2}")
|
|
510
|
+
|
|
511
|
+
currentTime += interval
|
|
512
|
+
|
|
513
|
+
logging.debug(f"seq1:\n {list(seq1.values())}")
|
|
514
|
+
logging.debug(f"seq2:\n {list(seq2.values())}")
|
|
515
|
+
|
|
516
|
+
r = needle(list(seq1.values()), list(seq2.values()))
|
|
517
|
+
|
|
518
|
+
out = (
|
|
519
|
+
f"Observation: {obsid1}\n"
|
|
520
|
+
f"number of events: {nb_events1}\n\n"
|
|
521
|
+
f"Observation: {obsid2}\n"
|
|
522
|
+
f"number of events: {nb_events2:.0f}\n\n"
|
|
523
|
+
f"identity = {r['identity']:.3f} %"
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
logging.debug(f"identity: {r['identity']}")
|
|
527
|
+
|
|
528
|
+
return r["identity"], out
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def needleman_wunch(self):
|
|
532
|
+
"""
|
|
533
|
+
calculate the Needleman-Wunsch similarity for 2 or more observations
|
|
534
|
+
"""
|
|
535
|
+
|
|
536
|
+
# ask user observations to analyze
|
|
537
|
+
_, selected_observations = select_observations.select_observations2(
|
|
538
|
+
self, mode=cfg.MULTIPLE, windows_title="Select observations for Needleman-Wunch identity"
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
if not selected_observations:
|
|
542
|
+
return
|
|
543
|
+
if len(selected_observations) < 2:
|
|
544
|
+
QMessageBox.information(self, cfg.programName, "You have to select at least 2 observations for Needleman-Wunsch similarity")
|
|
545
|
+
return
|
|
546
|
+
|
|
547
|
+
# check if coded behaviors are defined in ethogram
|
|
548
|
+
if project_functions.check_coded_behaviors_in_obs_list(self.pj, selected_observations):
|
|
549
|
+
return
|
|
550
|
+
|
|
551
|
+
# check if state events are paired
|
|
552
|
+
not_ok, selected_observations = project_functions.check_state_events(self.pj, selected_observations)
|
|
553
|
+
if not_ok or not selected_observations:
|
|
554
|
+
return
|
|
555
|
+
|
|
556
|
+
start_coding, end_coding, _ = observation_operations.coding_time(self.pj[cfg.OBSERVATIONS], selected_observations)
|
|
557
|
+
|
|
558
|
+
# exit with message if events do not have timestamp
|
|
559
|
+
if start_coding.is_nan():
|
|
560
|
+
QMessageBox.critical(
|
|
561
|
+
None,
|
|
562
|
+
cfg.programName,
|
|
563
|
+
("This function is not available for observations with events that do not have timestamp"),
|
|
564
|
+
QMessageBox.Ok | QMessageBox.Default,
|
|
565
|
+
QMessageBox.NoButton,
|
|
566
|
+
)
|
|
567
|
+
return
|
|
568
|
+
|
|
569
|
+
parameters = select_subj_behav.choose_obs_subj_behav_category(
|
|
570
|
+
self,
|
|
571
|
+
selected_observations,
|
|
572
|
+
start_coding=dec("NaN"),
|
|
573
|
+
end_coding=dec("NaN"),
|
|
574
|
+
show_include_modifiers=True,
|
|
575
|
+
show_exclude_non_coded_behaviors=False,
|
|
576
|
+
n_observations=len(selected_observations),
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
if parameters == {}:
|
|
580
|
+
return
|
|
581
|
+
|
|
582
|
+
if not parameters[cfg.SELECTED_SUBJECTS] or not parameters[cfg.SELECTED_BEHAVIORS]:
|
|
583
|
+
QMessageBox.warning(None, cfg.programName, "Select subject(s) and behavior(s) to analyze")
|
|
584
|
+
return
|
|
585
|
+
|
|
586
|
+
# ask for time slice
|
|
587
|
+
|
|
588
|
+
i, ok = QInputDialog.getDouble(self, "Needleman-Wunsch similarity", "Time unit (in seconds):", 1.0, 0.001, 86400, 3)
|
|
589
|
+
if not ok:
|
|
590
|
+
return
|
|
591
|
+
interval = util.float2decimal(i)
|
|
592
|
+
|
|
593
|
+
ok, msg, db_connector = db_functions.load_aggregated_events_in_db(
|
|
594
|
+
self.pj, parameters[cfg.SELECTED_SUBJECTS], selected_observations, parameters[cfg.SELECTED_BEHAVIORS]
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
cursor = db_connector.cursor()
|
|
598
|
+
out = (
|
|
599
|
+
f"Needleman-Wunsch similarity\n\nTime unit: {interval:.3f} s\nSelected subjects: {', '.join(parameters[cfg.SELECTED_SUBJECTS])}\n\n"
|
|
600
|
+
)
|
|
601
|
+
mem_done = []
|
|
602
|
+
nws_results = np.ones((len(selected_observations), len(selected_observations)))
|
|
603
|
+
|
|
604
|
+
for obs_id1 in selected_observations:
|
|
605
|
+
for obs_id2 in selected_observations:
|
|
606
|
+
if obs_id1 == obs_id2:
|
|
607
|
+
continue
|
|
608
|
+
if set([obs_id1, obs_id2]) not in mem_done:
|
|
609
|
+
similarity, msg = needleman_wunsch_identity(
|
|
610
|
+
cursor,
|
|
611
|
+
obs_id1,
|
|
612
|
+
obs_id2,
|
|
613
|
+
interval,
|
|
614
|
+
parameters[cfg.SELECTED_SUBJECTS],
|
|
615
|
+
parameters[cfg.INCLUDE_MODIFIERS],
|
|
616
|
+
)
|
|
617
|
+
nws_results[selected_observations.index(obs_id1), selected_observations.index(obs_id2)] = similarity
|
|
618
|
+
nws_results[selected_observations.index(obs_id2), selected_observations.index(obs_id1)] = similarity
|
|
619
|
+
out += msg + "\n=============\n"
|
|
620
|
+
mem_done.append(set([obs_id1, obs_id2]))
|
|
621
|
+
|
|
622
|
+
out2 = "\t{}\n".format("\t".join(list(selected_observations)))
|
|
623
|
+
for r in range(nws_results.shape[0]):
|
|
624
|
+
out2 += f"{selected_observations[r]}\t"
|
|
625
|
+
out2 += "\t".join([f"{x:8.6f}" for x in nws_results[r, :]]) + "\n"
|
|
626
|
+
|
|
627
|
+
self.results = dialog.Results_dialog()
|
|
628
|
+
self.results.setWindowTitle(f"{cfg.programName} - Needleman-Wunsch similarity")
|
|
629
|
+
self.results.ptText.setReadOnly(True)
|
|
630
|
+
if len(selected_observations) == 2:
|
|
631
|
+
self.results.ptText.appendPlainText(out)
|
|
632
|
+
else:
|
|
633
|
+
self.results.ptText.appendPlainText(out2)
|
|
634
|
+
self.results.show()
|