circaPy 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="TestRunnerService">
9
+ <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10
+ </component>
11
+ </module>
circaPy/.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (actigraphy_analysis_environment)" project-jdk-type="Python SDK" />
4
+ </project>
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/actigraphy_analysis.iml" filepath="$PROJECT_DIR$/.idea/actigraphy_analysis.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
circaPy/.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5
+ </component>
6
+ </project>
@@ -0,0 +1,95 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ChangeListManager">
4
+ <list default="true" id="b5541f44-a378-4089-97fd-52dc0173e1fa" name="Default Changelist" comment="" />
5
+ <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
6
+ <option name="SHOW_DIALOG" value="false" />
7
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
8
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
9
+ <option name="LAST_RESOLUTION" value="IGNORE" />
10
+ </component>
11
+ <component name="FUSProjectUsageTrigger">
12
+ <session id="258723820">
13
+ <usages-collector id="statistics.lifecycle.project">
14
+ <counts>
15
+ <entry key="project.closed" value="1" />
16
+ <entry key="project.open.time.0" value="1" />
17
+ <entry key="project.opened" value="1" />
18
+ </counts>
19
+ </usages-collector>
20
+ </session>
21
+ </component>
22
+ <component name="Git.Settings">
23
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
24
+ </component>
25
+ <component name="ProjectFrameBounds" extendedState="6" fullScreen="true">
26
+ <option name="x" value="74" />
27
+ <option name="y" value="97" />
28
+ <option name="width" value="1280" />
29
+ <option name="height" value="777" />
30
+ </component>
31
+ <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
32
+ <component name="ProjectView">
33
+ <navigator proportions="" version="1">
34
+ <foldersAlwaysOnTop value="true" />
35
+ </navigator>
36
+ <panes>
37
+ <pane id="ProjectPane" />
38
+ <pane id="Scope" />
39
+ </panes>
40
+ </component>
41
+ <component name="PropertiesComponent">
42
+ <property name="last_opened_file_path" value="$PROJECT_DIR$" />
43
+ <property name="settings.editor.selected.configurable" value="reference.settingsdialog.IDE.editor.postfix.templates" />
44
+ </component>
45
+ <component name="RunDashboard">
46
+ <option name="ruleStates">
47
+ <list>
48
+ <RuleState>
49
+ <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
50
+ </RuleState>
51
+ <RuleState>
52
+ <option name="name" value="StatusDashboardGroupingRule" />
53
+ </RuleState>
54
+ </list>
55
+ </option>
56
+ </component>
57
+ <component name="SvnConfiguration">
58
+ <configuration />
59
+ </component>
60
+ <component name="TaskManager">
61
+ <task active="true" id="Default" summary="Default task">
62
+ <changelist id="b5541f44-a378-4089-97fd-52dc0173e1fa" name="Default Changelist" comment="" />
63
+ <created>1540890610664</created>
64
+ <option name="number" value="Default" />
65
+ <option name="presentableId" value="Default" />
66
+ <updated>1540890610664</updated>
67
+ </task>
68
+ <servers />
69
+ </component>
70
+ <component name="ToolWindowManager">
71
+ <frame x="0" y="0" width="1440" height="900" extended-state="6" />
72
+ <layout>
73
+ <window_info id="Favorites" side_tool="true" />
74
+ <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.24947146" />
75
+ <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
76
+ <window_info anchor="bottom" id="Version Control" show_stripe_button="false" />
77
+ <window_info anchor="bottom" id="Python Console" />
78
+ <window_info anchor="bottom" id="Terminal" />
79
+ <window_info anchor="bottom" id="Event Log" side_tool="true" />
80
+ <window_info anchor="bottom" id="Message" order="0" />
81
+ <window_info anchor="bottom" id="Find" order="1" />
82
+ <window_info anchor="bottom" id="Run" order="2" />
83
+ <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
84
+ <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
85
+ <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
86
+ <window_info anchor="bottom" id="TODO" order="6" />
87
+ <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
88
+ <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
89
+ <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
90
+ </layout>
91
+ </component>
92
+ <component name="VcsContentAnnotationSettings">
93
+ <option name="myLimit" value="2678400000" />
94
+ </component>
95
+ </project>
circaPy/__init__.py ADDED
File without changes
circaPy/activity.py ADDED
@@ -0,0 +1,391 @@
1
+ import pdb
2
+ import numpy as np
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ import circaPy.preprocessing as prep
7
+
8
+
9
+ @prep.validate_input
10
+ def calculate_IV(data):
11
+ """
12
+ Intradavariability calculation.
13
+
14
+ Calculates intradayvariabaility according to the equation set out in
15
+ van Someren et al 1996, a ratio of variance of the first derivative
16
+ to overall variance of the data.
17
+ IV = n * sum{i=2 -> n}(x{i} - x{i-1})**2
18
+ /
19
+ (n-1) * sum{i=1 -> n}(x{i} - x{bar})**2
20
+
21
+ Parameters
22
+ ----------
23
+ data : array or dataframe
24
+ Timeseries data to calculate.
25
+
26
+ Returns
27
+ -------
28
+ array or dataframe with calculated IV variables
29
+ """
30
+ # Convert to numpy array for convenience
31
+ x = np.array(data)
32
+ n = len(x)
33
+
34
+ if n < 2:
35
+ raise ValueError(
36
+ "At least two data points are required to compute IV.")
37
+
38
+ # Calculate mean of x
39
+ x_mean = np.mean(x)
40
+
41
+ # Calculate numerator
42
+ numerator = n * np.sum((x[1:] - x[:-1])**2)
43
+
44
+ # Calculate denominator
45
+ denominator = (n - 1) * np.sum((x - x_mean)**2)
46
+
47
+ if numerator == 0 and denominator == 0:
48
+ return 0
49
+
50
+ # Compute IV
51
+ IV = numerator / denominator
52
+ return IV
53
+
54
+
55
+ @prep.validate_input
56
+ def calculate_mean_activity(data, sem=False):
57
+ """
58
+ Mean activity calculation
59
+
60
+ Calculates the mean activity at each time point for all days.
61
+
62
+ Parameters
63
+ ----------
64
+ data : pd.DataFrame
65
+ A DataFrame with a datetime index and activity values for each time
66
+ point.
67
+ sem: Boolean
68
+ Whether to return standard error of the mean as well, defaults
69
+ to False
70
+
71
+ Returns
72
+ -------
73
+ pd.DataFrame
74
+ A DataFrame containing the mean activity at each time point across all
75
+ days.
76
+ """
77
+ # Group data by time of day (ignoring the date) and calculate the mean for
78
+ # each time point
79
+ mean_activity = data.groupby(data.index.time).mean()
80
+
81
+ # Convert the time index back to datetime for clarity
82
+ mean_activity.index = pd.to_datetime(
83
+ mean_activity.index, format="%H:%M:%S").time
84
+
85
+ if sem:
86
+ sem_activity = data.groupby(data.index.time).sem()
87
+ sem_activity.index = pd.to_datetime(
88
+ sem_activity.index, format="%H:%M:%S").time
89
+
90
+ return mean_activity, sem_activity
91
+
92
+ return mean_activity
93
+
94
+
95
+ @prep.validate_input
96
+ def normalise_to_baseline(data, baseline_data):
97
+ """
98
+ normalise_to_baseline
99
+ Takes two dataframes and expresses the data as a percentage of the
100
+ baseline_data.
101
+
102
+ Parameters
103
+ ----------
104
+ data : pd.Series
105
+ Timeindexed data to be normalised
106
+ baseline_data : pd.Series
107
+ Timeindexed data to be normalised against
108
+
109
+ returns
110
+ -------
111
+ dataframe
112
+ Timeindexed dataframe with original data as a percentage of
113
+ baseline_data
114
+ """
115
+ # calculate mean activity for baseline
116
+ baseline_mean = calculate_mean_activity(baseline_data)
117
+
118
+ # map the mean values to each timepoint
119
+ time_index = data.index.time
120
+ baseline_mean_values = baseline_mean.loc[time_index].values
121
+
122
+ # calculate normalised values
123
+ normalised = (data.values / baseline_mean_values) * 100
124
+ norm_series = pd.Series(normalised, index=data.index, name=data.name)
125
+
126
+ return norm_series
127
+
128
+
129
+ @prep.validate_input
130
+ def light_phase_activity(data,
131
+ light_col=-1,
132
+ light_val=150):
133
+ """
134
+ Light_phase_activity
135
+ Calculates the percentage of activity occurring during the light phase
136
+ compared to the total activity in the dataset.
137
+
138
+ Parameters
139
+ ----------
140
+ data : pd.DataFrame
141
+ A time-indexed DataFrame containing activity and light data.
142
+ light_col : int, optional
143
+ Index of the column that contains light data.
144
+ Default is -1 (the last column).
145
+ light_val : int, optional
146
+ The threshold above which the light is considered "on". Default is 150.
147
+
148
+ Returns
149
+ -------
150
+ pd.Series
151
+ A Series where each element represents the percentage of activity
152
+ occurring during the light phase for each column in the DataFrame.
153
+
154
+ Notes
155
+ -----
156
+ - The function assumes the `data` DataFrame contains numeric data.
157
+ - Activity columns should be numeric and summable.
158
+ - If no light values exceed the `light_val` threshold, the returned
159
+ percentage will be 0 for all activity columns.
160
+ - Ensure `data` is not empty and contains the specified `light_col` index.
161
+ """
162
+ # select activity just during light
163
+ light_mask = data.iloc[:, light_col] >= light_val
164
+ light_data = data[light_mask]
165
+
166
+ # sum up the activity
167
+ light_sum = light_data.sum()
168
+ total_sum = data.sum()
169
+
170
+ # calculate light phase as percentage
171
+ light_phase_activity = (light_sum / total_sum) * 100
172
+
173
+ return light_phase_activity
174
+
175
+
176
+ @prep.validate_input
177
+ def relative_amplitude(data,
178
+ time_unit="h",
179
+ active_time=1,
180
+ inactive_time=1):
181
+ """
182
+ Relative Amplitude
183
+
184
+ Calculates the relative amplitude for each column as the difference between
185
+ the maximum activity during the most active hours and the minimum activity
186
+ during the least active hours, after resampling the data to an hourly
187
+ frequency.
188
+
189
+ Parameters
190
+ ----------
191
+ data : pd.DataFrame
192
+ A DataFrame with a time index and activity columns.
193
+ active_time : int, optional
194
+ The number of most active hours to consider. Default is 10.
195
+ inactive_time : int, optional
196
+ The number of least active hours to consider. Default is 5.
197
+
198
+ Returns
199
+ -------
200
+ pd.Series
201
+ A Series where the index corresponds to the column names from the
202
+ input data, and the values are the relative amplitude for each column.
203
+
204
+ Raises
205
+ ------
206
+ ValueError
207
+ If `active_time` + `inactive_time` exceeds the length of the resampled
208
+ data.
209
+ """
210
+ # Resample data to the given frequency
211
+ hourly_data = data.resample(time_unit).mean()
212
+
213
+ # Check if active_time + inactive_time exceeds the data length
214
+ if active_time + inactive_time > len(hourly_data):
215
+ raise ValueError(
216
+ f"The sum of active_time ({active_time}) and inactive_time"
217
+ f"({inactive_time}) exceeds the length of the resampled "
218
+ f"data ({len(hourly_data)})."
219
+ )
220
+ # Dictionary to store relative amplitude for each column
221
+ relative_amplitudes = {}
222
+
223
+ for column in hourly_data.columns:
224
+ # Get the most active hours for this column
225
+ most_active_time = hourly_data[column].nlargest(active_time)
226
+
227
+ # Get the least active hours for this column
228
+ least_active_time = hourly_data[column].nsmallest(inactive_time)
229
+
230
+ # Calculate max and min activity
231
+ max_active = most_active_time.mean()
232
+ min_inactive = least_active_time.mean()
233
+
234
+ # Calculate relative amplitude
235
+ amplitude_diff = max_active - min_inactive
236
+ amplitude_sum = max_active + min_inactive
237
+ relative_amplitudes[column] = amplitude_diff / amplitude_sum
238
+
239
+ relative_amplitude = pd.Series(
240
+ relative_amplitudes, name="Relative Amplitude")
241
+
242
+ return relative_amplitude
243
+
244
+
245
+ @prep.validate_input
246
+ def calculate_IS(data, subject_no=0):
247
+ r"""
248
+ Calculates the Interdaily Stability (IS) for a given time series of
249
+ activity data.
250
+
251
+ The Interdaily Stability (IS) is a measure of the consistency of an
252
+ activity pattern across different periods of time (e.g., days). It is
253
+ defined as the ratio of variance caused by the time periods to the total
254
+ variance of the data. Higher IS values indicate more stability in the
255
+ activity pattern.
256
+
257
+ The formula for IS is:
258
+
259
+ .. math::
260
+
261
+ IS = \frac{N \sum_{h=1}^p (M_h - M )^2}{p \sum_{i=1}^N (x_i - M)^2}
262
+
263
+ where:
264
+ - :math:`N` is the total number of observations.
265
+ - :math:`p` is the number of time points in the period.
266
+ - :math:`M_h` is the mean value at time point :math:`h`.
267
+ - :math:`M` is the overall mean.
268
+ - :math:`x_i` is the value of the observation :math:`i`.
269
+
270
+ The result is a ratio of variances that ranges from 0 to 1, where higher
271
+ values indicate a more stable activity pattern. The function calculates the
272
+ variance in the activity time series for each time point relative to the
273
+ overall mean, and compares this to the total variance of the series.
274
+
275
+ Parameters
276
+ ----------
277
+ data : pd.DataFrame
278
+ The DataFrame containing the activity data for multiple subjects, where
279
+ each column represents a subject's data over time.
280
+ subject_no : int, optional
281
+ The column index of the subject for whom the IS is being calculated.
282
+ Default is 0.
283
+
284
+ Returns
285
+ -------
286
+ float
287
+ The Interdaily Stability value (IS) for the specified subject's data.
288
+
289
+ Notes
290
+ -----
291
+ The function assumes that the data is organized in time series format,
292
+ where each row corresponds to a time point, and each column corresponds to
293
+ a subject's activity data.
294
+ """
295
+ # select the data
296
+ curr_data = data.iloc[:, subject_no]
297
+
298
+ # calculate mean
299
+ mean_data = calculate_mean_activity(curr_data)
300
+
301
+ # get squared deviation from the mean
302
+ mean = curr_data.mean()
303
+ square_deviation = (mean_data - mean) ** 2
304
+
305
+ # divide by mean to get variance around the time points
306
+ time_variance = square_deviation.sum() / len(square_deviation)
307
+
308
+ # divide by total variance
309
+ total_variance = curr_data.var()
310
+
311
+ # is 0s avoid divide by 0
312
+ if time_variance == 0 and total_variance == 0:
313
+ return np.nan
314
+
315
+ interdaily_stability = time_variance / total_variance
316
+
317
+ return interdaily_stability
318
+
319
+
320
+ @prep.validate_input
321
+ def calculate_TV(data, subject_no=0):
322
+ r"""
323
+ Calculates Timepoint Variability
324
+
325
+ The Timepoint Variability is the ratio of variance around each timepoint
326
+ to the total variance. It is defined as follows
327
+
328
+ .. math::
329
+
330
+ \begin{equation*}
331
+ TV=
332
+ \frac{\sum_{h=1}^P}{P} \frac{S^2_h}{S^2}
333
+ \end{equation*}
334
+
335
+
336
+ \begin{equation*}
337
+ TV=
338
+ \frac{\sum_{h=1}^P \frac{\sum_{x=1}^N (x_i-x_h)^2}{N}}{P \frac{\sum_{i=1}^N (x_i - \bar x)^2}{N}}
339
+ \end{equation*}
340
+
341
+ \begin{equation*}
342
+ TV=
343
+ \frac{\sum_{h=1}^P \sum_{x=1}^N (x_i-x_h)^2}{P \sum_{i=1}^N (x_i - \bar x)^2}
344
+ \end{equation*}
345
+
346
+ where:
347
+ - :math:`N` is the total number of observations.
348
+ - :math:`p` is the number of time points in the period.
349
+ - :math:`M_h` is the mean value at time point :math:`h`.
350
+ - :math:`M` is the overall mean.
351
+ - :math:`x_i` is the value of the observation :math:`i`.
352
+
353
+ The TV value ranges from 0 to 1, lower is more stable.
354
+ """
355
+ # select the data
356
+ curr_data = data.iloc[:, subject_no]
357
+
358
+ # calculate mean
359
+ mean_data = calculate_mean_activity(curr_data)
360
+
361
+ # sum of squares from mean
362
+ # extend mean data so matches length of curr_data
363
+ multiple_length = len(curr_data) / len(mean_data)
364
+ repeated_mean_data = pd.Series(
365
+ np.tile(
366
+ mean_data.values,
367
+ (int(multiple_length) + 1)
368
+ )[:len(curr_data)], index=curr_data.index)
369
+
370
+ # calculate differences square of each time point from that timepoint mean
371
+ deviation = repeated_mean_data - curr_data
372
+ square_dev = deviation ** 2
373
+
374
+ # group them by time of day
375
+ square_dev.index = square_dev.index.strftime("%H:%M:%S")
376
+ sum_of_squares = square_dev.groupby(square_dev.index).sum()
377
+
378
+ # divide by number of measurements at each timepoint
379
+ timepoint_variance = sum_of_squares / multiple_length
380
+ time_variance = timepoint_variance.mean()
381
+
382
+ # divide by total variance
383
+ total_variance = curr_data.var()
384
+
385
+ # not if both 0s
386
+ if time_variance == 0 and total_variance == 0:
387
+ return np.nan
388
+
389
+ timepoint_variability = time_variance / total_variance
390
+
391
+ return timepoint_variability