tasci_merger 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +22 -22
- data/README.md +77 -77
- data/bin/merge_tasci +10 -8
- data/lib/labtime.rb +110 -110
- data/lib/man_merger.rb +344 -344
- data/lib/tasci_merger.rb +333 -324
- data/tasci_merger.gemspec +21 -21
- metadata +3 -3
data/lib/man_merger.rb
CHANGED
@@ -1,344 +1,344 @@
|
|
1
|
-
require 'csv'
|
2
|
-
|
3
|
-
## CHANGELOG
|
4
|
-
# 2173 Master file: change sp16,17,18 to *_rev
|
5
|
-
|
6
|
-
module ETL
|
7
|
-
class ManMerger
|
8
|
-
LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
|
9
|
-
T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
|
10
|
-
#T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
|
11
|
-
EPOCH_LENGTH = 30
|
12
|
-
|
13
|
-
def merge_files
|
14
|
-
subject_list = load_subject_list
|
15
|
-
subject_list.each do |subject_code, file_list|
|
16
|
-
merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
|
17
|
-
merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
|
18
|
-
MY_LOG.info "---- #{subject_code}"
|
19
|
-
|
20
|
-
previous_first_labtime = nil
|
21
|
-
previous_last_labtime = nil
|
22
|
-
subject_year = get_subject_year(file_list)
|
23
|
-
|
24
|
-
file_list.each do |file_hash|
|
25
|
-
matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
|
26
|
-
matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
|
27
|
-
|
28
|
-
## Validate File List
|
29
|
-
if matched_files.length != 1
|
30
|
-
raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
|
31
|
-
else
|
32
|
-
man_file_path = matched_files[0]
|
33
|
-
end
|
34
|
-
|
35
|
-
man_file = File.open(man_file_path)
|
36
|
-
LOADER_LOGGER.info "--- Loading #{man_file_path}"
|
37
|
-
file_info = {}
|
38
|
-
|
39
|
-
|
40
|
-
## Ignore Corrupted Files
|
41
|
-
#next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
|
42
|
-
|
43
|
-
# Date from file name
|
44
|
-
matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
|
45
|
-
file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
|
46
|
-
|
47
|
-
# read file
|
48
|
-
lines = man_file.readlines("\r")
|
49
|
-
# delete possible empty last line
|
50
|
-
lines.pop if lines.last.blank?
|
51
|
-
|
52
|
-
# get file first and last times
|
53
|
-
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
|
54
|
-
file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
55
|
-
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
|
56
|
-
file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
57
|
-
|
58
|
-
# validate first/last times
|
59
|
-
if file_hash[:start_time] != file_info[:first_time]
|
60
|
-
MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
|
61
|
-
end
|
62
|
-
if file_hash[:last_line_time] != file_info[:last_time]
|
63
|
-
MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
|
64
|
-
end
|
65
|
-
if file_hash[:last_line_number] != lines.length
|
66
|
-
MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
|
67
|
-
end
|
68
|
-
|
69
|
-
##
|
70
|
-
# VALIDATION
|
71
|
-
file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
|
72
|
-
file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
|
73
|
-
|
74
|
-
start_realtime = file_hash[:start_labtime].to_time
|
75
|
-
last_line_realtime = file_hash[:last_line_labtime].to_time
|
76
|
-
|
77
|
-
first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
|
78
|
-
last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
|
79
|
-
|
80
|
-
file_info[:first_labtime] = Labtime.parse(first_realtime)
|
81
|
-
file_info[:last_labtime] = Labtime.parse(last_realtime)
|
82
|
-
predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
|
83
|
-
|
84
|
-
sep = false
|
85
|
-
if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
|
86
|
-
MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
|
87
|
-
sep = true
|
88
|
-
end
|
89
|
-
|
90
|
-
# These checks fail if DST TRANSITION HAPPENS
|
91
|
-
if last_line_realtime.dst? == start_realtime.dst?
|
92
|
-
if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
|
93
|
-
MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
|
94
|
-
sep = true
|
95
|
-
end
|
96
|
-
if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
|
97
|
-
MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
|
98
|
-
sep = true
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
|
103
|
-
MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
|
104
|
-
sep = true
|
105
|
-
end
|
106
|
-
|
107
|
-
unless previous_first_labtime.nil? or previous_last_labtime.nil?
|
108
|
-
MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
|
109
|
-
end
|
110
|
-
|
111
|
-
raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
|
112
|
-
raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
|
113
|
-
|
114
|
-
MY_LOG.info "-----------------------------------\n\n" if sep
|
115
|
-
|
116
|
-
last_labtime = nil
|
117
|
-
ibob_flag = 0
|
118
|
-
|
119
|
-
lines.each_with_index do |line, line_number|
|
120
|
-
#merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
|
121
|
-
=begin
|
122
|
-
sleep man file:
|
123
|
-
0 undef/unscored
|
124
|
-
1 stage 1
|
125
|
-
2 stage 2
|
126
|
-
3 stage 3
|
127
|
-
4 stage 4
|
128
|
-
5 wake
|
129
|
-
6 REM
|
130
|
-
7 MVT
|
131
|
-
8 LOff and LOn
|
132
|
-
|
133
|
-
wake man file:
|
134
|
-
0 undef/un
|
135
|
-
cored
|
136
|
-
1 stage 1
|
137
|
-
2 stage 2
|
138
|
-
3 stage 3
|
139
|
-
4 stage 4
|
140
|
-
5 wake
|
141
|
-
6 REM
|
142
|
-
7 MVT
|
143
|
-
8 SEM
|
144
|
-
=end
|
145
|
-
|
146
|
-
|
147
|
-
line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
|
148
|
-
line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
|
149
|
-
|
150
|
-
# Sleep Period Coding:
|
151
|
-
# 1 Sleep Onset (Lights Off) (IN BED)
|
152
|
-
# 2 Sleep Offset (Lights On) (OUT OF BED)
|
153
|
-
if file_hash[:type] == :sleep and line_code == 8
|
154
|
-
if ibob_flag == 0
|
155
|
-
sleep_period = 1
|
156
|
-
ibob_flag = 1
|
157
|
-
else
|
158
|
-
sleep_period = 2
|
159
|
-
ibob_flag = 0
|
160
|
-
end
|
161
|
-
else
|
162
|
-
sleep_period = nil
|
163
|
-
end
|
164
|
-
|
165
|
-
# Sleep Stage Coding:
|
166
|
-
# 1 stage 1
|
167
|
-
# 2 stage 2
|
168
|
-
# 3 stage 3
|
169
|
-
# 4 stage 4
|
170
|
-
# 6 MT
|
171
|
-
# 7 Undef
|
172
|
-
# 5 REM
|
173
|
-
# 9 Wake
|
174
|
-
if line_code >= 1 and line_code <= 4
|
175
|
-
line_event = line_code
|
176
|
-
elsif line_code == 0
|
177
|
-
line_event = 7
|
178
|
-
elsif line_code == 5 or line_code == 8
|
179
|
-
line_event = 9
|
180
|
-
elsif line_code == 6
|
181
|
-
line_event = 5
|
182
|
-
elsif line_code == 7
|
183
|
-
line_event = 6
|
184
|
-
else
|
185
|
-
raise StandardError, "Cannot map the following event: #{line_code}"
|
186
|
-
end
|
187
|
-
|
188
|
-
# SEM Event Coding:
|
189
|
-
# 1 Slow Eye Movement
|
190
|
-
# 0 No Slow Eye Movement
|
191
|
-
if file_hash[:type] == :wake and line_code == 8
|
192
|
-
sem_event = 1
|
193
|
-
else
|
194
|
-
sem_event = 0
|
195
|
-
end
|
196
|
-
|
197
|
-
last_labtime = line_labtime
|
198
|
-
|
199
|
-
output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
|
200
|
-
merged_file << output_line
|
201
|
-
end
|
202
|
-
|
203
|
-
|
204
|
-
previous_first_labtime = file_info[:first_labtime]
|
205
|
-
previous_last_labtime = last_labtime
|
206
|
-
|
207
|
-
end
|
208
|
-
merged_file.close
|
209
|
-
MY_LOG.info "---- end #{subject_code}\n\n"
|
210
|
-
|
211
|
-
end
|
212
|
-
end
|
213
|
-
|
214
|
-
def load_subject_list
|
215
|
-
subject_info = {}
|
216
|
-
Dir.foreach(LIST_DIR) do |file|
|
217
|
-
next if file == '.' or file == '..'
|
218
|
-
#MY_LOG.info "#{file}"
|
219
|
-
csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
|
220
|
-
|
221
|
-
# Match and Validate File Name
|
222
|
-
matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
|
223
|
-
if matched_sc
|
224
|
-
subject_code = matched_sc[1].upcase
|
225
|
-
else
|
226
|
-
next
|
227
|
-
end
|
228
|
-
|
229
|
-
subject_info[subject_code] = []
|
230
|
-
csv_file.each do |row|
|
231
|
-
file_info = {}
|
232
|
-
pattern = /(.*)\.man/i.match(row[0])
|
233
|
-
|
234
|
-
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
|
235
|
-
if matched_time
|
236
|
-
file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
237
|
-
else
|
238
|
-
MY_LOG.error "No Valid Start Time Found: #{row}"
|
239
|
-
next
|
240
|
-
end
|
241
|
-
|
242
|
-
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
|
243
|
-
if matched_time
|
244
|
-
file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
245
|
-
else
|
246
|
-
MY_LOG.error "No Valid End Time Found: #{row}"
|
247
|
-
next
|
248
|
-
end
|
249
|
-
|
250
|
-
file_info[:start_labtime] = row[2].to_f
|
251
|
-
file_info[:last_line_number] = row[3].to_i
|
252
|
-
file_info[:last_line_labtime] = row[5].to_f
|
253
|
-
|
254
|
-
if pattern
|
255
|
-
file_info[:pattern] = pattern[1]
|
256
|
-
subject_info[subject_code] << file_info
|
257
|
-
|
258
|
-
# Determine if sleep or wake file
|
259
|
-
raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
|
260
|
-
raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
|
261
|
-
|
262
|
-
if /_sp?\d/i.match(pattern[1])
|
263
|
-
file_info[:type] = :sleep
|
264
|
-
elsif /_wp?\d/i.match(pattern[1])
|
265
|
-
file_info[:type] = :wake
|
266
|
-
else
|
267
|
-
raise StandardError, "Didn't match any SP/WP..."
|
268
|
-
end
|
269
|
-
else
|
270
|
-
MY_LOG.info "No Valid File Name Found: #{row}"
|
271
|
-
next
|
272
|
-
end
|
273
|
-
end
|
274
|
-
#MY_LOG.info subject_info[subject_code]
|
275
|
-
end
|
276
|
-
#MY_LOG.info subject_info.inspect
|
277
|
-
subject_info
|
278
|
-
end
|
279
|
-
|
280
|
-
def get_subject_year(file_list)
|
281
|
-
years = file_list.map do |h|
|
282
|
-
matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
|
283
|
-
matched_date ? matched_date[3] : nil
|
284
|
-
end
|
285
|
-
years.delete_if {|x| x.nil? }
|
286
|
-
years = years.uniq
|
287
|
-
|
288
|
-
raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
|
289
|
-
year = years.first.to_i
|
290
|
-
year > 30 ? year + 1900 : year + 2000
|
291
|
-
end
|
292
|
-
end
|
293
|
-
|
294
|
-
|
295
|
-
end
|
296
|
-
|
297
|
-
=begin
|
298
|
-
path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
|
299
|
-
path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
|
300
|
-
|
301
|
-
file list:
|
302
|
-
subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
|
303
|
-
|
304
|
-
sleep man file:
|
305
|
-
0 undef/unscored
|
306
|
-
1 stage 1
|
307
|
-
2 stage 2
|
308
|
-
3 stage 3
|
309
|
-
4 stage 4
|
310
|
-
5 wake
|
311
|
-
6 REM
|
312
|
-
7 MVT
|
313
|
-
8 LOff and LOn
|
314
|
-
|
315
|
-
wake man file:
|
316
|
-
0 undef/unscored
|
317
|
-
1 stage 1
|
318
|
-
2 stage 2
|
319
|
-
3 stage 3
|
320
|
-
4 stage 4
|
321
|
-
5 wake
|
322
|
-
6 REM
|
323
|
-
7 MVT
|
324
|
-
8 SEM
|
325
|
-
|
326
|
-
|
327
|
-
sleep stage 8 should be coded as Wake with a SEM
|
328
|
-
5 is Wake
|
329
|
-
1-4 is Sleep stage 1-4
|
330
|
-
7 is REM
|
331
|
-
8 is Wake with SEM plus LOff and Lon
|
332
|
-
|
333
|
-
|
334
|
-
mapping:
|
335
|
-
1 stage 1
|
336
|
-
2 stage 2
|
337
|
-
3 stage 3
|
338
|
-
4 stage 4
|
339
|
-
6 MT
|
340
|
-
7 Undef
|
341
|
-
5 REM
|
342
|
-
9 Wake
|
343
|
-
=end
|
344
|
-
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
## CHANGELOG
|
4
|
+
# 2173 Master file: change sp16,17,18 to *_rev
|
5
|
+
|
6
|
+
module ETL
|
7
|
+
class ManMerger
|
8
|
+
LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
|
9
|
+
T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
|
10
|
+
#T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
|
11
|
+
EPOCH_LENGTH = 30
|
12
|
+
|
13
|
+
def merge_files
|
14
|
+
subject_list = load_subject_list
|
15
|
+
subject_list.each do |subject_code, file_list|
|
16
|
+
merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
|
17
|
+
merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
|
18
|
+
MY_LOG.info "---- #{subject_code}"
|
19
|
+
|
20
|
+
previous_first_labtime = nil
|
21
|
+
previous_last_labtime = nil
|
22
|
+
subject_year = get_subject_year(file_list)
|
23
|
+
|
24
|
+
file_list.each do |file_hash|
|
25
|
+
matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
|
26
|
+
matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
|
27
|
+
|
28
|
+
## Validate File List
|
29
|
+
if matched_files.length != 1
|
30
|
+
raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
|
31
|
+
else
|
32
|
+
man_file_path = matched_files[0]
|
33
|
+
end
|
34
|
+
|
35
|
+
man_file = File.open(man_file_path)
|
36
|
+
LOADER_LOGGER.info "--- Loading #{man_file_path}"
|
37
|
+
file_info = {}
|
38
|
+
|
39
|
+
|
40
|
+
## Ignore Corrupted Files
|
41
|
+
#next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
|
42
|
+
|
43
|
+
# Date from file name
|
44
|
+
matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
|
45
|
+
file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
|
46
|
+
|
47
|
+
# read file
|
48
|
+
lines = man_file.readlines("\r")
|
49
|
+
# delete possible empty last line
|
50
|
+
lines.pop if lines.last.blank?
|
51
|
+
|
52
|
+
# get file first and last times
|
53
|
+
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
|
54
|
+
file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
55
|
+
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
|
56
|
+
file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
57
|
+
|
58
|
+
# validate first/last times
|
59
|
+
if file_hash[:start_time] != file_info[:first_time]
|
60
|
+
MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
|
61
|
+
end
|
62
|
+
if file_hash[:last_line_time] != file_info[:last_time]
|
63
|
+
MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
|
64
|
+
end
|
65
|
+
if file_hash[:last_line_number] != lines.length
|
66
|
+
MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# VALIDATION
|
71
|
+
file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
|
72
|
+
file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
|
73
|
+
|
74
|
+
start_realtime = file_hash[:start_labtime].to_time
|
75
|
+
last_line_realtime = file_hash[:last_line_labtime].to_time
|
76
|
+
|
77
|
+
first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
|
78
|
+
last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
|
79
|
+
|
80
|
+
file_info[:first_labtime] = Labtime.parse(first_realtime)
|
81
|
+
file_info[:last_labtime] = Labtime.parse(last_realtime)
|
82
|
+
predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
|
83
|
+
|
84
|
+
sep = false
|
85
|
+
if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
|
86
|
+
MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
|
87
|
+
sep = true
|
88
|
+
end
|
89
|
+
|
90
|
+
# These checks fail if DST TRANSITION HAPPENS
|
91
|
+
if last_line_realtime.dst? == start_realtime.dst?
|
92
|
+
if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
|
93
|
+
MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
|
94
|
+
sep = true
|
95
|
+
end
|
96
|
+
if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
|
97
|
+
MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
|
98
|
+
sep = true
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
|
103
|
+
MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
|
104
|
+
sep = true
|
105
|
+
end
|
106
|
+
|
107
|
+
unless previous_first_labtime.nil? or previous_last_labtime.nil?
|
108
|
+
MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
|
109
|
+
end
|
110
|
+
|
111
|
+
raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
|
112
|
+
raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
|
113
|
+
|
114
|
+
MY_LOG.info "-----------------------------------\n\n" if sep
|
115
|
+
|
116
|
+
last_labtime = nil
|
117
|
+
ibob_flag = 0
|
118
|
+
|
119
|
+
lines.each_with_index do |line, line_number|
|
120
|
+
#merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
|
121
|
+
=begin
|
122
|
+
sleep man file:
|
123
|
+
0 undef/unscored
|
124
|
+
1 stage 1
|
125
|
+
2 stage 2
|
126
|
+
3 stage 3
|
127
|
+
4 stage 4
|
128
|
+
5 wake
|
129
|
+
6 REM
|
130
|
+
7 MVT
|
131
|
+
8 LOff and LOn
|
132
|
+
|
133
|
+
wake man file:
|
134
|
+
0 undef/un
|
135
|
+
cored
|
136
|
+
1 stage 1
|
137
|
+
2 stage 2
|
138
|
+
3 stage 3
|
139
|
+
4 stage 4
|
140
|
+
5 wake
|
141
|
+
6 REM
|
142
|
+
7 MVT
|
143
|
+
8 SEM
|
144
|
+
=end
|
145
|
+
|
146
|
+
|
147
|
+
line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
|
148
|
+
line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
|
149
|
+
|
150
|
+
# Sleep Period Coding:
|
151
|
+
# 1 Sleep Onset (Lights Off) (IN BED)
|
152
|
+
# 2 Sleep Offset (Lights On) (OUT OF BED)
|
153
|
+
if file_hash[:type] == :sleep and line_code == 8
|
154
|
+
if ibob_flag == 0
|
155
|
+
sleep_period = 1
|
156
|
+
ibob_flag = 1
|
157
|
+
else
|
158
|
+
sleep_period = 2
|
159
|
+
ibob_flag = 0
|
160
|
+
end
|
161
|
+
else
|
162
|
+
sleep_period = nil
|
163
|
+
end
|
164
|
+
|
165
|
+
# Sleep Stage Coding:
|
166
|
+
# 1 stage 1
|
167
|
+
# 2 stage 2
|
168
|
+
# 3 stage 3
|
169
|
+
# 4 stage 4
|
170
|
+
# 6 MT
|
171
|
+
# 7 Undef
|
172
|
+
# 5 REM
|
173
|
+
# 9 Wake
|
174
|
+
if line_code >= 1 and line_code <= 4
|
175
|
+
line_event = line_code
|
176
|
+
elsif line_code == 0
|
177
|
+
line_event = 7
|
178
|
+
elsif line_code == 5 or line_code == 8
|
179
|
+
line_event = 9
|
180
|
+
elsif line_code == 6
|
181
|
+
line_event = 5
|
182
|
+
elsif line_code == 7
|
183
|
+
line_event = 6
|
184
|
+
else
|
185
|
+
raise StandardError, "Cannot map the following event: #{line_code}"
|
186
|
+
end
|
187
|
+
|
188
|
+
# SEM Event Coding:
|
189
|
+
# 1 Slow Eye Movement
|
190
|
+
# 0 No Slow Eye Movement
|
191
|
+
if file_hash[:type] == :wake and line_code == 8
|
192
|
+
sem_event = 1
|
193
|
+
else
|
194
|
+
sem_event = 0
|
195
|
+
end
|
196
|
+
|
197
|
+
last_labtime = line_labtime
|
198
|
+
|
199
|
+
output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
|
200
|
+
merged_file << output_line
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
previous_first_labtime = file_info[:first_labtime]
|
205
|
+
previous_last_labtime = last_labtime
|
206
|
+
|
207
|
+
end
|
208
|
+
merged_file.close
|
209
|
+
MY_LOG.info "---- end #{subject_code}\n\n"
|
210
|
+
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def load_subject_list
|
215
|
+
subject_info = {}
|
216
|
+
Dir.foreach(LIST_DIR) do |file|
|
217
|
+
next if file == '.' or file == '..'
|
218
|
+
#MY_LOG.info "#{file}"
|
219
|
+
csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
|
220
|
+
|
221
|
+
# Match and Validate File Name
|
222
|
+
matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
|
223
|
+
if matched_sc
|
224
|
+
subject_code = matched_sc[1].upcase
|
225
|
+
else
|
226
|
+
next
|
227
|
+
end
|
228
|
+
|
229
|
+
subject_info[subject_code] = []
|
230
|
+
csv_file.each do |row|
|
231
|
+
file_info = {}
|
232
|
+
pattern = /(.*)\.man/i.match(row[0])
|
233
|
+
|
234
|
+
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
|
235
|
+
if matched_time
|
236
|
+
file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
237
|
+
else
|
238
|
+
MY_LOG.error "No Valid Start Time Found: #{row}"
|
239
|
+
next
|
240
|
+
end
|
241
|
+
|
242
|
+
matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
|
243
|
+
if matched_time
|
244
|
+
file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
|
245
|
+
else
|
246
|
+
MY_LOG.error "No Valid End Time Found: #{row}"
|
247
|
+
next
|
248
|
+
end
|
249
|
+
|
250
|
+
file_info[:start_labtime] = row[2].to_f
|
251
|
+
file_info[:last_line_number] = row[3].to_i
|
252
|
+
file_info[:last_line_labtime] = row[5].to_f
|
253
|
+
|
254
|
+
if pattern
|
255
|
+
file_info[:pattern] = pattern[1]
|
256
|
+
subject_info[subject_code] << file_info
|
257
|
+
|
258
|
+
# Determine if sleep or wake file
|
259
|
+
raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
|
260
|
+
raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
|
261
|
+
|
262
|
+
if /_sp?\d/i.match(pattern[1])
|
263
|
+
file_info[:type] = :sleep
|
264
|
+
elsif /_wp?\d/i.match(pattern[1])
|
265
|
+
file_info[:type] = :wake
|
266
|
+
else
|
267
|
+
raise StandardError, "Didn't match any SP/WP..."
|
268
|
+
end
|
269
|
+
else
|
270
|
+
MY_LOG.info "No Valid File Name Found: #{row}"
|
271
|
+
next
|
272
|
+
end
|
273
|
+
end
|
274
|
+
#MY_LOG.info subject_info[subject_code]
|
275
|
+
end
|
276
|
+
#MY_LOG.info subject_info.inspect
|
277
|
+
subject_info
|
278
|
+
end
|
279
|
+
|
280
|
+
def get_subject_year(file_list)
|
281
|
+
years = file_list.map do |h|
|
282
|
+
matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
|
283
|
+
matched_date ? matched_date[3] : nil
|
284
|
+
end
|
285
|
+
years.delete_if {|x| x.nil? }
|
286
|
+
years = years.uniq
|
287
|
+
|
288
|
+
raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
|
289
|
+
year = years.first.to_i
|
290
|
+
year > 30 ? year + 1900 : year + 2000
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
|
295
|
+
end
|
296
|
+
|
297
|
+
=begin
|
298
|
+
path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
|
299
|
+
path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
|
300
|
+
|
301
|
+
file list:
|
302
|
+
subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
|
303
|
+
|
304
|
+
sleep man file:
|
305
|
+
0 undef/unscored
|
306
|
+
1 stage 1
|
307
|
+
2 stage 2
|
308
|
+
3 stage 3
|
309
|
+
4 stage 4
|
310
|
+
5 wake
|
311
|
+
6 REM
|
312
|
+
7 MVT
|
313
|
+
8 LOff and LOn
|
314
|
+
|
315
|
+
wake man file:
|
316
|
+
0 undef/unscored
|
317
|
+
1 stage 1
|
318
|
+
2 stage 2
|
319
|
+
3 stage 3
|
320
|
+
4 stage 4
|
321
|
+
5 wake
|
322
|
+
6 REM
|
323
|
+
7 MVT
|
324
|
+
8 SEM
|
325
|
+
|
326
|
+
|
327
|
+
sleep stage 8 should be coded as Wake with a SEM
|
328
|
+
5 is Wake
|
329
|
+
1-4 is Sleep stage 1-4
|
330
|
+
7 is REM
|
331
|
+
8 is Wake with SEM plus LOff and Lon
|
332
|
+
|
333
|
+
|
334
|
+
mapping:
|
335
|
+
1 stage 1
|
336
|
+
2 stage 2
|
337
|
+
3 stage 3
|
338
|
+
4 stage 4
|
339
|
+
6 MT
|
340
|
+
7 Undef
|
341
|
+
5 REM
|
342
|
+
9 Wake
|
343
|
+
=end
|
344
|
+
|