tasci_merger 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/man_merger.rb CHANGED
@@ -1,344 +1,344 @@
1
- require 'csv'
2
-
3
- ## CHANGELOG
4
- # 2173 Master file: change sp16,17,18 to *_rev
5
-
6
- module ETL
7
- class ManMerger
8
- LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
9
- T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
10
- #T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
11
- EPOCH_LENGTH = 30
12
-
13
- def merge_files
14
- subject_list = load_subject_list
15
- subject_list.each do |subject_code, file_list|
16
- merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
17
- merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
18
- MY_LOG.info "---- #{subject_code}"
19
-
20
- previous_first_labtime = nil
21
- previous_last_labtime = nil
22
- subject_year = get_subject_year(file_list)
23
-
24
- file_list.each do |file_hash|
25
- matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
26
- matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
27
-
28
- ## Validate File List
29
- if matched_files.length != 1
30
- raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
31
- else
32
- man_file_path = matched_files[0]
33
- end
34
-
35
- man_file = File.open(man_file_path)
36
- LOADER_LOGGER.info "--- Loading #{man_file_path}"
37
- file_info = {}
38
-
39
-
40
- ## Ignore Corrupted Files
41
- #next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
42
-
43
- # Date from file name
44
- matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
45
- file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
46
-
47
- # read file
48
- lines = man_file.readlines("\r")
49
- # delete possible empty last line
50
- lines.pop if lines.last.blank?
51
-
52
- # get file first and last times
53
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
54
- file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
55
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
56
- file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
57
-
58
- # validate first/last times
59
- if file_hash[:start_time] != file_info[:first_time]
60
- MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
61
- end
62
- if file_hash[:last_line_time] != file_info[:last_time]
63
- MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
64
- end
65
- if file_hash[:last_line_number] != lines.length
66
- MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
67
- end
68
-
69
- ##
70
- # VALIDATION
71
- file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
72
- file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
73
-
74
- start_realtime = file_hash[:start_labtime].to_time
75
- last_line_realtime = file_hash[:last_line_labtime].to_time
76
-
77
- first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
78
- last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
79
-
80
- file_info[:first_labtime] = Labtime.parse(first_realtime)
81
- file_info[:last_labtime] = Labtime.parse(last_realtime)
82
- predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
83
-
84
- sep = false
85
- if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
86
- MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
87
- sep = true
88
- end
89
-
90
- # These checks fail if DST TRANSITION HAPPENS
91
- if last_line_realtime.dst? == start_realtime.dst?
92
- if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
93
- MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
94
- sep = true
95
- end
96
- if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
97
- MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
98
- sep = true
99
- end
100
- end
101
-
102
- if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
103
- MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
104
- sep = true
105
- end
106
-
107
- unless previous_first_labtime.nil? or previous_last_labtime.nil?
108
- MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
109
- end
110
-
111
- raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
112
- raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
113
-
114
- MY_LOG.info "-----------------------------------\n\n" if sep
115
-
116
- last_labtime = nil
117
- ibob_flag = 0
118
-
119
- lines.each_with_index do |line, line_number|
120
- #merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
121
- =begin
122
- sleep man file:
123
- 0 undef/unscored
124
- 1 stage 1
125
- 2 stage 2
126
- 3 stage 3
127
- 4 stage 4
128
- 5 wake
129
- 6 REM
130
- 7 MVT
131
- 8 LOff and LOn
132
-
133
- wake man file:
134
- 0 undef/un
135
- cored
136
- 1 stage 1
137
- 2 stage 2
138
- 3 stage 3
139
- 4 stage 4
140
- 5 wake
141
- 6 REM
142
- 7 MVT
143
- 8 SEM
144
- =end
145
-
146
-
147
- line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
148
- line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
149
-
150
- # Sleep Period Coding:
151
- # 1 Sleep Onset (Lights Off) (IN BED)
152
- # 2 Sleep Offset (Lights On) (OUT OF BED)
153
- if file_hash[:type] == :sleep and line_code == 8
154
- if ibob_flag == 0
155
- sleep_period = 1
156
- ibob_flag = 1
157
- else
158
- sleep_period = 2
159
- ibob_flag = 0
160
- end
161
- else
162
- sleep_period = nil
163
- end
164
-
165
- # Sleep Stage Coding:
166
- # 1 stage 1
167
- # 2 stage 2
168
- # 3 stage 3
169
- # 4 stage 4
170
- # 6 MT
171
- # 7 Undef
172
- # 5 REM
173
- # 9 Wake
174
- if line_code >= 1 and line_code <= 4
175
- line_event = line_code
176
- elsif line_code == 0
177
- line_event = 7
178
- elsif line_code == 5 or line_code == 8
179
- line_event = 9
180
- elsif line_code == 6
181
- line_event = 5
182
- elsif line_code == 7
183
- line_event = 6
184
- else
185
- raise StandardError, "Cannot map the following event: #{line_code}"
186
- end
187
-
188
- # SEM Event Coding:
189
- # 1 Slow Eye Movement
190
- # 0 No Slow Eye Movement
191
- if file_hash[:type] == :wake and line_code == 8
192
- sem_event = 1
193
- else
194
- sem_event = 0
195
- end
196
-
197
- last_labtime = line_labtime
198
-
199
- output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
200
- merged_file << output_line
201
- end
202
-
203
-
204
- previous_first_labtime = file_info[:first_labtime]
205
- previous_last_labtime = last_labtime
206
-
207
- end
208
- merged_file.close
209
- MY_LOG.info "---- end #{subject_code}\n\n"
210
-
211
- end
212
- end
213
-
214
- def load_subject_list
215
- subject_info = {}
216
- Dir.foreach(LIST_DIR) do |file|
217
- next if file == '.' or file == '..'
218
- #MY_LOG.info "#{file}"
219
- csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
220
-
221
- # Match and Validate File Name
222
- matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
223
- if matched_sc
224
- subject_code = matched_sc[1].upcase
225
- else
226
- next
227
- end
228
-
229
- subject_info[subject_code] = []
230
- csv_file.each do |row|
231
- file_info = {}
232
- pattern = /(.*)\.man/i.match(row[0])
233
-
234
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
235
- if matched_time
236
- file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
237
- else
238
- MY_LOG.error "No Valid Start Time Found: #{row}"
239
- next
240
- end
241
-
242
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
243
- if matched_time
244
- file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
245
- else
246
- MY_LOG.error "No Valid End Time Found: #{row}"
247
- next
248
- end
249
-
250
- file_info[:start_labtime] = row[2].to_f
251
- file_info[:last_line_number] = row[3].to_i
252
- file_info[:last_line_labtime] = row[5].to_f
253
-
254
- if pattern
255
- file_info[:pattern] = pattern[1]
256
- subject_info[subject_code] << file_info
257
-
258
- # Determine if sleep or wake file
259
- raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
260
- raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
261
-
262
- if /_sp?\d/i.match(pattern[1])
263
- file_info[:type] = :sleep
264
- elsif /_wp?\d/i.match(pattern[1])
265
- file_info[:type] = :wake
266
- else
267
- raise StandardError, "Didn't match any SP/WP..."
268
- end
269
- else
270
- MY_LOG.info "No Valid File Name Found: #{row}"
271
- next
272
- end
273
- end
274
- #MY_LOG.info subject_info[subject_code]
275
- end
276
- #MY_LOG.info subject_info.inspect
277
- subject_info
278
- end
279
-
280
- def get_subject_year(file_list)
281
- years = file_list.map do |h|
282
- matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
283
- matched_date ? matched_date[3] : nil
284
- end
285
- years.delete_if {|x| x.nil? }
286
- years = years.uniq
287
-
288
- raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
289
- year = years.first.to_i
290
- year > 30 ? year + 1900 : year + 2000
291
- end
292
- end
293
-
294
-
295
- end
296
-
297
- =begin
298
- path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
299
- path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
300
-
301
- file list:
302
- subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
303
-
304
- sleep man file:
305
- 0 undef/unscored
306
- 1 stage 1
307
- 2 stage 2
308
- 3 stage 3
309
- 4 stage 4
310
- 5 wake
311
- 6 REM
312
- 7 MVT
313
- 8 LOff and LOn
314
-
315
- wake man file:
316
- 0 undef/unscored
317
- 1 stage 1
318
- 2 stage 2
319
- 3 stage 3
320
- 4 stage 4
321
- 5 wake
322
- 6 REM
323
- 7 MVT
324
- 8 SEM
325
-
326
-
327
- sleep stage 8 should be coded as Wake with a SEM
328
- 5 is Wake
329
- 1-4 is Sleep stage 1-4
330
- 7 is REM
331
- 8 is Wake with SEM plus LOff and Lon
332
-
333
-
334
- mapping:
335
- 1 stage 1
336
- 2 stage 2
337
- 3 stage 3
338
- 4 stage 4
339
- 6 MT
340
- 7 Undef
341
- 5 REM
342
- 9 Wake
343
- =end
344
-
1
+ require 'csv'
2
+
3
+ ## CHANGELOG
4
+ # 2173 Master file: change sp16,17,18 to *_rev
5
+
6
+ module ETL
7
+ class ManMerger
8
+ LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
9
+ T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
10
+ #T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
11
+ EPOCH_LENGTH = 30
12
+
13
+ def merge_files
14
+ subject_list = load_subject_list
15
+ subject_list.each do |subject_code, file_list|
16
+ merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
17
+ merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
18
+ MY_LOG.info "---- #{subject_code}"
19
+
20
+ previous_first_labtime = nil
21
+ previous_last_labtime = nil
22
+ subject_year = get_subject_year(file_list)
23
+
24
+ file_list.each do |file_hash|
25
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
26
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
27
+
28
+ ## Validate File List
29
+ if matched_files.length != 1
30
+ raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
31
+ else
32
+ man_file_path = matched_files[0]
33
+ end
34
+
35
+ man_file = File.open(man_file_path)
36
+ LOADER_LOGGER.info "--- Loading #{man_file_path}"
37
+ file_info = {}
38
+
39
+
40
+ ## Ignore Corrupted Files
41
+ #next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
42
+
43
+ # Date from file name
44
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
45
+ file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
46
+
47
+ # read file
48
+ lines = man_file.readlines("\r")
49
+ # delete possible empty last line
50
+ lines.pop if lines.last.blank?
51
+
52
+ # get file first and last times
53
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
54
+ file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
55
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
56
+ file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
57
+
58
+ # validate first/last times
59
+ if file_hash[:start_time] != file_info[:first_time]
60
+ MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
61
+ end
62
+ if file_hash[:last_line_time] != file_info[:last_time]
63
+ MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
64
+ end
65
+ if file_hash[:last_line_number] != lines.length
66
+ MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
67
+ end
68
+
69
+ ##
70
+ # VALIDATION
71
+ file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
72
+ file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
73
+
74
+ start_realtime = file_hash[:start_labtime].to_time
75
+ last_line_realtime = file_hash[:last_line_labtime].to_time
76
+
77
+ first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
78
+ last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
79
+
80
+ file_info[:first_labtime] = Labtime.parse(first_realtime)
81
+ file_info[:last_labtime] = Labtime.parse(last_realtime)
82
+ predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
83
+
84
+ sep = false
85
+ if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
86
+ MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
87
+ sep = true
88
+ end
89
+
90
+ # These checks fail if DST TRANSITION HAPPENS
91
+ if last_line_realtime.dst? == start_realtime.dst?
92
+ if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
93
+ MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
94
+ sep = true
95
+ end
96
+ if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
97
+ MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
98
+ sep = true
99
+ end
100
+ end
101
+
102
+ if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
103
+ MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
104
+ sep = true
105
+ end
106
+
107
+ unless previous_first_labtime.nil? or previous_last_labtime.nil?
108
+ MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
109
+ end
110
+
111
+ raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
112
+ raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
113
+
114
+ MY_LOG.info "-----------------------------------\n\n" if sep
115
+
116
+ last_labtime = nil
117
+ ibob_flag = 0
118
+
119
+ lines.each_with_index do |line, line_number|
120
+ #merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
121
+ =begin
122
+ sleep man file:
123
+ 0 undef/unscored
124
+ 1 stage 1
125
+ 2 stage 2
126
+ 3 stage 3
127
+ 4 stage 4
128
+ 5 wake
129
+ 6 REM
130
+ 7 MVT
131
+ 8 LOff and LOn
132
+
133
+ wake man file:
134
+ 0 undef/un
135
+ cored
136
+ 1 stage 1
137
+ 2 stage 2
138
+ 3 stage 3
139
+ 4 stage 4
140
+ 5 wake
141
+ 6 REM
142
+ 7 MVT
143
+ 8 SEM
144
+ =end
145
+
146
+
147
+ line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
148
+ line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
149
+
150
+ # Sleep Period Coding:
151
+ # 1 Sleep Onset (Lights Off) (IN BED)
152
+ # 2 Sleep Offset (Lights On) (OUT OF BED)
153
+ if file_hash[:type] == :sleep and line_code == 8
154
+ if ibob_flag == 0
155
+ sleep_period = 1
156
+ ibob_flag = 1
157
+ else
158
+ sleep_period = 2
159
+ ibob_flag = 0
160
+ end
161
+ else
162
+ sleep_period = nil
163
+ end
164
+
165
+ # Sleep Stage Coding:
166
+ # 1 stage 1
167
+ # 2 stage 2
168
+ # 3 stage 3
169
+ # 4 stage 4
170
+ # 6 MT
171
+ # 7 Undef
172
+ # 5 REM
173
+ # 9 Wake
174
+ if line_code >= 1 and line_code <= 4
175
+ line_event = line_code
176
+ elsif line_code == 0
177
+ line_event = 7
178
+ elsif line_code == 5 or line_code == 8
179
+ line_event = 9
180
+ elsif line_code == 6
181
+ line_event = 5
182
+ elsif line_code == 7
183
+ line_event = 6
184
+ else
185
+ raise StandardError, "Cannot map the following event: #{line_code}"
186
+ end
187
+
188
+ # SEM Event Coding:
189
+ # 1 Slow Eye Movement
190
+ # 0 No Slow Eye Movement
191
+ if file_hash[:type] == :wake and line_code == 8
192
+ sem_event = 1
193
+ else
194
+ sem_event = 0
195
+ end
196
+
197
+ last_labtime = line_labtime
198
+
199
+ output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
200
+ merged_file << output_line
201
+ end
202
+
203
+
204
+ previous_first_labtime = file_info[:first_labtime]
205
+ previous_last_labtime = last_labtime
206
+
207
+ end
208
+ merged_file.close
209
+ MY_LOG.info "---- end #{subject_code}\n\n"
210
+
211
+ end
212
+ end
213
+
214
+ def load_subject_list
215
+ subject_info = {}
216
+ Dir.foreach(LIST_DIR) do |file|
217
+ next if file == '.' or file == '..'
218
+ #MY_LOG.info "#{file}"
219
+ csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
220
+
221
+ # Match and Validate File Name
222
+ matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
223
+ if matched_sc
224
+ subject_code = matched_sc[1].upcase
225
+ else
226
+ next
227
+ end
228
+
229
+ subject_info[subject_code] = []
230
+ csv_file.each do |row|
231
+ file_info = {}
232
+ pattern = /(.*)\.man/i.match(row[0])
233
+
234
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
235
+ if matched_time
236
+ file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
237
+ else
238
+ MY_LOG.error "No Valid Start Time Found: #{row}"
239
+ next
240
+ end
241
+
242
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
243
+ if matched_time
244
+ file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
245
+ else
246
+ MY_LOG.error "No Valid End Time Found: #{row}"
247
+ next
248
+ end
249
+
250
+ file_info[:start_labtime] = row[2].to_f
251
+ file_info[:last_line_number] = row[3].to_i
252
+ file_info[:last_line_labtime] = row[5].to_f
253
+
254
+ if pattern
255
+ file_info[:pattern] = pattern[1]
256
+ subject_info[subject_code] << file_info
257
+
258
+ # Determine if sleep or wake file
259
+ raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
260
+ raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
261
+
262
+ if /_sp?\d/i.match(pattern[1])
263
+ file_info[:type] = :sleep
264
+ elsif /_wp?\d/i.match(pattern[1])
265
+ file_info[:type] = :wake
266
+ else
267
+ raise StandardError, "Didn't match any SP/WP..."
268
+ end
269
+ else
270
+ MY_LOG.info "No Valid File Name Found: #{row}"
271
+ next
272
+ end
273
+ end
274
+ #MY_LOG.info subject_info[subject_code]
275
+ end
276
+ #MY_LOG.info subject_info.inspect
277
+ subject_info
278
+ end
279
+
280
+ def get_subject_year(file_list)
281
+ years = file_list.map do |h|
282
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
283
+ matched_date ? matched_date[3] : nil
284
+ end
285
+ years.delete_if {|x| x.nil? }
286
+ years = years.uniq
287
+
288
+ raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
289
+ year = years.first.to_i
290
+ year > 30 ? year + 1900 : year + 2000
291
+ end
292
+ end
293
+
294
+
295
+ end
296
+
297
+ =begin
298
+ path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
299
+ path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
300
+
301
+ file list:
302
+ subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
303
+
304
+ sleep man file:
305
+ 0 undef/unscored
306
+ 1 stage 1
307
+ 2 stage 2
308
+ 3 stage 3
309
+ 4 stage 4
310
+ 5 wake
311
+ 6 REM
312
+ 7 MVT
313
+ 8 LOff and LOn
314
+
315
+ wake man file:
316
+ 0 undef/unscored
317
+ 1 stage 1
318
+ 2 stage 2
319
+ 3 stage 3
320
+ 4 stage 4
321
+ 5 wake
322
+ 6 REM
323
+ 7 MVT
324
+ 8 SEM
325
+
326
+
327
+ sleep stage 8 should be coded as Wake with a SEM
328
+ 5 is Wake
329
+ 1-4 is Sleep stage 1-4
330
+ 7 is REM
331
+ 8 is Wake with SEM plus LOff and Lon
332
+
333
+
334
+ mapping:
335
+ 1 stage 1
336
+ 2 stage 2
337
+ 3 stage 3
338
+ 4 stage 4
339
+ 6 MT
340
+ 7 Undef
341
+ 5 REM
342
+ 9 Wake
343
+ =end
344
+