tasci_merger 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/man_merger.rb CHANGED
@@ -1,344 +1,344 @@
1
- require 'csv'
2
-
3
- ## CHANGELOG
4
- # 2173 Master file: change sp16,17,18 to *_rev
5
-
6
- module ETL
7
- class ManMerger
8
- LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
9
- T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
10
- #T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
11
- EPOCH_LENGTH = 30
12
-
13
- def merge_files
14
- subject_list = load_subject_list
15
- subject_list.each do |subject_code, file_list|
16
- merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
17
- merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
18
- MY_LOG.info "---- #{subject_code}"
19
-
20
- previous_first_labtime = nil
21
- previous_last_labtime = nil
22
- subject_year = get_subject_year(file_list)
23
-
24
- file_list.each do |file_hash|
25
- matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
26
- matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
27
-
28
- ## Validate File List
29
- if matched_files.length != 1
30
- raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
31
- else
32
- man_file_path = matched_files[0]
33
- end
34
-
35
- man_file = File.open(man_file_path)
36
- LOADER_LOGGER.info "--- Loading #{man_file_path}"
37
- file_info = {}
38
-
39
-
40
- ## Ignore Corrupted Files
41
- #next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
42
-
43
- # Date from file name
44
- matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
45
- file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
46
-
47
- # read file
48
- lines = man_file.readlines("\r")
49
- # delete possible empty last line
50
- lines.pop if lines.last.blank?
51
-
52
- # get file first and last times
53
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
54
- file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
55
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
56
- file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
57
-
58
- # validate first/last times
59
- if file_hash[:start_time] != file_info[:first_time]
60
- MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
61
- end
62
- if file_hash[:last_line_time] != file_info[:last_time]
63
- MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
64
- end
65
- if file_hash[:last_line_number] != lines.length
66
- MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
67
- end
68
-
69
- ##
70
- # VALIDATION
71
- file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
72
- file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
73
-
74
- start_realtime = file_hash[:start_labtime].to_time
75
- last_line_realtime = file_hash[:last_line_labtime].to_time
76
-
77
- first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
78
- last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
79
-
80
- file_info[:first_labtime] = Labtime.parse(first_realtime)
81
- file_info[:last_labtime] = Labtime.parse(last_realtime)
82
- predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
83
-
84
- sep = false
85
- if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
86
- MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
87
- sep = true
88
- end
89
-
90
- # These checks fail if DST TRANSITION HAPPENS
91
- if last_line_realtime.dst? == start_realtime.dst?
92
- if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
93
- MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
94
- sep = true
95
- end
96
- if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
97
- MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
98
- sep = true
99
- end
100
- end
101
-
102
- if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
103
- MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
104
- sep = true
105
- end
106
-
107
- unless previous_first_labtime.nil? or previous_last_labtime.nil?
108
- MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
109
- end
110
-
111
- raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
112
- raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
113
-
114
- MY_LOG.info "-----------------------------------\n\n" if sep
115
-
116
- last_labtime = nil
117
- ibob_flag = 0
118
-
119
- lines.each_with_index do |line, line_number|
120
- #merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
121
- =begin
122
- sleep man file:
123
- 0 undef/unscored
124
- 1 stage 1
125
- 2 stage 2
126
- 3 stage 3
127
- 4 stage 4
128
- 5 wake
129
- 6 REM
130
- 7 MVT
131
- 8 LOff and LOn
132
-
133
- wake man file:
134
- 0 undef/un
135
- cored
136
- 1 stage 1
137
- 2 stage 2
138
- 3 stage 3
139
- 4 stage 4
140
- 5 wake
141
- 6 REM
142
- 7 MVT
143
- 8 SEM
144
- =end
145
-
146
-
147
- line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
148
- line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
149
-
150
- # Sleep Period Coding:
151
- # 1 Sleep Onset (Lights Off) (IN BED)
152
- # 2 Sleep Offset (Lights On) (OUT OF BED)
153
- if file_hash[:type] == :sleep and line_code == 8
154
- if ibob_flag == 0
155
- sleep_period = 1
156
- ibob_flag = 1
157
- else
158
- sleep_period = 2
159
- ibob_flag = 0
160
- end
161
- else
162
- sleep_period = nil
163
- end
164
-
165
- # Sleep Stage Coding:
166
- # 1 stage 1
167
- # 2 stage 2
168
- # 3 stage 3
169
- # 4 stage 4
170
- # 6 MT
171
- # 7 Undef
172
- # 5 REM
173
- # 9 Wake
174
- if line_code >= 1 and line_code <= 4
175
- line_event = line_code
176
- elsif line_code == 0
177
- line_event = 7
178
- elsif line_code == 5 or line_code == 8
179
- line_event = 9
180
- elsif line_code == 6
181
- line_event = 5
182
- elsif line_code == 7
183
- line_event = 6
184
- else
185
- raise StandardError, "Cannot map the following event: #{line_code}"
186
- end
187
-
188
- # SEM Event Coding:
189
- # 1 Slow Eye Movement
190
- # 0 No Slow Eye Movement
191
- if file_hash[:type] == :wake and line_code == 8
192
- sem_event = 1
193
- else
194
- sem_event = 0
195
- end
196
-
197
- last_labtime = line_labtime
198
-
199
- output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
200
- merged_file << output_line
201
- end
202
-
203
-
204
- previous_first_labtime = file_info[:first_labtime]
205
- previous_last_labtime = last_labtime
206
-
207
- end
208
- merged_file.close
209
- MY_LOG.info "---- end #{subject_code}\n\n"
210
-
211
- end
212
- end
213
-
214
- def load_subject_list
215
- subject_info = {}
216
- Dir.foreach(LIST_DIR) do |file|
217
- next if file == '.' or file == '..'
218
- #MY_LOG.info "#{file}"
219
- csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
220
-
221
- # Match and Validate File Name
222
- matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
223
- if matched_sc
224
- subject_code = matched_sc[1].upcase
225
- else
226
- next
227
- end
228
-
229
- subject_info[subject_code] = []
230
- csv_file.each do |row|
231
- file_info = {}
232
- pattern = /(.*)\.man/i.match(row[0])
233
-
234
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
235
- if matched_time
236
- file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
237
- else
238
- MY_LOG.error "No Valid Start Time Found: #{row}"
239
- next
240
- end
241
-
242
- matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
243
- if matched_time
244
- file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
245
- else
246
- MY_LOG.error "No Valid End Time Found: #{row}"
247
- next
248
- end
249
-
250
- file_info[:start_labtime] = row[2].to_f
251
- file_info[:last_line_number] = row[3].to_i
252
- file_info[:last_line_labtime] = row[5].to_f
253
-
254
- if pattern
255
- file_info[:pattern] = pattern[1]
256
- subject_info[subject_code] << file_info
257
-
258
- # Determine if sleep or wake file
259
- raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
260
- raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
261
-
262
- if /_sp?\d/i.match(pattern[1])
263
- file_info[:type] = :sleep
264
- elsif /_wp?\d/i.match(pattern[1])
265
- file_info[:type] = :wake
266
- else
267
- raise StandardError, "Didn't match any SP/WP..."
268
- end
269
- else
270
- MY_LOG.info "No Valid File Name Found: #{row}"
271
- next
272
- end
273
- end
274
- #MY_LOG.info subject_info[subject_code]
275
- end
276
- #MY_LOG.info subject_info.inspect
277
- subject_info
278
- end
279
-
280
- def get_subject_year(file_list)
281
- years = file_list.map do |h|
282
- matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
283
- matched_date ? matched_date[3] : nil
284
- end
285
- years.delete_if {|x| x.nil? }
286
- years = years.uniq
287
-
288
- raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
289
- year = years.first.to_i
290
- year > 30 ? year + 1900 : year + 2000
291
- end
292
- end
293
-
294
-
295
- end
296
-
297
- =begin
298
- path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
299
- path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
300
-
301
- file list:
302
- subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
303
-
304
- sleep man file:
305
- 0 undef/unscored
306
- 1 stage 1
307
- 2 stage 2
308
- 3 stage 3
309
- 4 stage 4
310
- 5 wake
311
- 6 REM
312
- 7 MVT
313
- 8 LOff and LOn
314
-
315
- wake man file:
316
- 0 undef/unscored
317
- 1 stage 1
318
- 2 stage 2
319
- 3 stage 3
320
- 4 stage 4
321
- 5 wake
322
- 6 REM
323
- 7 MVT
324
- 8 SEM
325
-
326
-
327
- sleep stage 8 should be coded as Wake with a SEM
328
- 5 is Wake
329
- 1-4 is Sleep stage 1-4
330
- 7 is REM
331
- 8 is Wake with SEM plus LOff and Lon
332
-
333
-
334
- mapping:
335
- 1 stage 1
336
- 2 stage 2
337
- 3 stage 3
338
- 4 stage 4
339
- 6 MT
340
- 7 Undef
341
- 5 REM
342
- 9 Wake
343
- =end
344
-
1
+ require 'csv'
2
+
3
+ ## CHANGELOG
4
+ # 2173 Master file: change sp16,17,18 to *_rev
5
+
6
+ module ETL
7
+ class ManMerger
8
+ LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
9
+ T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
10
+ #T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
11
+ EPOCH_LENGTH = 30
12
+
13
+ def merge_files
14
+ subject_list = load_subject_list
15
+ subject_list.each do |subject_code, file_list|
16
+ merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
17
+ merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
18
+ MY_LOG.info "---- #{subject_code}"
19
+
20
+ previous_first_labtime = nil
21
+ previous_last_labtime = nil
22
+ subject_year = get_subject_year(file_list)
23
+
24
+ file_list.each do |file_hash|
25
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
26
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
27
+
28
+ ## Validate File List
29
+ if matched_files.length != 1
30
+ raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
31
+ else
32
+ man_file_path = matched_files[0]
33
+ end
34
+
35
+ man_file = File.open(man_file_path)
36
+ LOADER_LOGGER.info "--- Loading #{man_file_path}"
37
+ file_info = {}
38
+
39
+
40
+ ## Ignore Corrupted Files
41
+ #next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
42
+
43
+ # Date from file name
44
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
45
+ file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
46
+
47
+ # read file
48
+ lines = man_file.readlines("\r")
49
+ # delete possible empty last line
50
+ lines.pop if lines.last.blank?
51
+
52
+ # get file first and last times
53
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
54
+ file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
55
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
56
+ file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
57
+
58
+ # validate first/last times
59
+ if file_hash[:start_time] != file_info[:first_time]
60
+ MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
61
+ end
62
+ if file_hash[:last_line_time] != file_info[:last_time]
63
+ MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
64
+ end
65
+ if file_hash[:last_line_number] != lines.length
66
+ MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
67
+ end
68
+
69
+ ##
70
+ # VALIDATION
71
+ file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
72
+ file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
73
+
74
+ start_realtime = file_hash[:start_labtime].to_time
75
+ last_line_realtime = file_hash[:last_line_labtime].to_time
76
+
77
+ first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
78
+ last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
79
+
80
+ file_info[:first_labtime] = Labtime.parse(first_realtime)
81
+ file_info[:last_labtime] = Labtime.parse(last_realtime)
82
+ predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
83
+
84
+ sep = false
85
+ if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
86
+ MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
87
+ sep = true
88
+ end
89
+
90
+ # These checks fail if DST TRANSITION HAPPENS
91
+ if last_line_realtime.dst? == start_realtime.dst?
92
+ if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
93
+ MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
94
+ sep = true
95
+ end
96
+ if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
97
+ MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
98
+ sep = true
99
+ end
100
+ end
101
+
102
+ if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
103
+ MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
104
+ sep = true
105
+ end
106
+
107
+ unless previous_first_labtime.nil? or previous_last_labtime.nil?
108
+ MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
109
+ end
110
+
111
+ raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
112
+ raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
113
+
114
+ MY_LOG.info "-----------------------------------\n\n" if sep
115
+
116
+ last_labtime = nil
117
+ ibob_flag = 0
118
+
119
+ lines.each_with_index do |line, line_number|
120
+ #merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
121
+ =begin
122
+ sleep man file:
123
+ 0 undef/unscored
124
+ 1 stage 1
125
+ 2 stage 2
126
+ 3 stage 3
127
+ 4 stage 4
128
+ 5 wake
129
+ 6 REM
130
+ 7 MVT
131
+ 8 LOff and LOn
132
+
133
+ wake man file:
134
+ 0 undef/un
135
+ cored
136
+ 1 stage 1
137
+ 2 stage 2
138
+ 3 stage 3
139
+ 4 stage 4
140
+ 5 wake
141
+ 6 REM
142
+ 7 MVT
143
+ 8 SEM
144
+ =end
145
+
146
+
147
+ line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
148
+ line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
149
+
150
+ # Sleep Period Coding:
151
+ # 1 Sleep Onset (Lights Off) (IN BED)
152
+ # 2 Sleep Offset (Lights On) (OUT OF BED)
153
+ if file_hash[:type] == :sleep and line_code == 8
154
+ if ibob_flag == 0
155
+ sleep_period = 1
156
+ ibob_flag = 1
157
+ else
158
+ sleep_period = 2
159
+ ibob_flag = 0
160
+ end
161
+ else
162
+ sleep_period = nil
163
+ end
164
+
165
+ # Sleep Stage Coding:
166
+ # 1 stage 1
167
+ # 2 stage 2
168
+ # 3 stage 3
169
+ # 4 stage 4
170
+ # 6 MT
171
+ # 7 Undef
172
+ # 5 REM
173
+ # 9 Wake
174
+ if line_code >= 1 and line_code <= 4
175
+ line_event = line_code
176
+ elsif line_code == 0
177
+ line_event = 7
178
+ elsif line_code == 5 or line_code == 8
179
+ line_event = 9
180
+ elsif line_code == 6
181
+ line_event = 5
182
+ elsif line_code == 7
183
+ line_event = 6
184
+ else
185
+ raise StandardError, "Cannot map the following event: #{line_code}"
186
+ end
187
+
188
+ # SEM Event Coding:
189
+ # 1 Slow Eye Movement
190
+ # 0 No Slow Eye Movement
191
+ if file_hash[:type] == :wake and line_code == 8
192
+ sem_event = 1
193
+ else
194
+ sem_event = 0
195
+ end
196
+
197
+ last_labtime = line_labtime
198
+
199
+ output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
200
+ merged_file << output_line
201
+ end
202
+
203
+
204
+ previous_first_labtime = file_info[:first_labtime]
205
+ previous_last_labtime = last_labtime
206
+
207
+ end
208
+ merged_file.close
209
+ MY_LOG.info "---- end #{subject_code}\n\n"
210
+
211
+ end
212
+ end
213
+
214
+ def load_subject_list
215
+ subject_info = {}
216
+ Dir.foreach(LIST_DIR) do |file|
217
+ next if file == '.' or file == '..'
218
+ #MY_LOG.info "#{file}"
219
+ csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
220
+
221
+ # Match and Validate File Name
222
+ matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
223
+ if matched_sc
224
+ subject_code = matched_sc[1].upcase
225
+ else
226
+ next
227
+ end
228
+
229
+ subject_info[subject_code] = []
230
+ csv_file.each do |row|
231
+ file_info = {}
232
+ pattern = /(.*)\.man/i.match(row[0])
233
+
234
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
235
+ if matched_time
236
+ file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
237
+ else
238
+ MY_LOG.error "No Valid Start Time Found: #{row}"
239
+ next
240
+ end
241
+
242
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
243
+ if matched_time
244
+ file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
245
+ else
246
+ MY_LOG.error "No Valid End Time Found: #{row}"
247
+ next
248
+ end
249
+
250
+ file_info[:start_labtime] = row[2].to_f
251
+ file_info[:last_line_number] = row[3].to_i
252
+ file_info[:last_line_labtime] = row[5].to_f
253
+
254
+ if pattern
255
+ file_info[:pattern] = pattern[1]
256
+ subject_info[subject_code] << file_info
257
+
258
+ # Determine if sleep or wake file
259
+ raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
260
+ raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
261
+
262
+ if /_sp?\d/i.match(pattern[1])
263
+ file_info[:type] = :sleep
264
+ elsif /_wp?\d/i.match(pattern[1])
265
+ file_info[:type] = :wake
266
+ else
267
+ raise StandardError, "Didn't match any SP/WP..."
268
+ end
269
+ else
270
+ MY_LOG.info "No Valid File Name Found: #{row}"
271
+ next
272
+ end
273
+ end
274
+ #MY_LOG.info subject_info[subject_code]
275
+ end
276
+ #MY_LOG.info subject_info.inspect
277
+ subject_info
278
+ end
279
+
280
+ def get_subject_year(file_list)
281
+ years = file_list.map do |h|
282
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
283
+ matched_date ? matched_date[3] : nil
284
+ end
285
+ years.delete_if {|x| x.nil? }
286
+ years = years.uniq
287
+
288
+ raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
289
+ year = years.first.to_i
290
+ year > 30 ? year + 1900 : year + 2000
291
+ end
292
+ end
293
+
294
+
295
+ end
296
+
297
+ =begin
298
+ path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
299
+ path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
300
+
301
+ file list:
302
+ subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
303
+
304
+ sleep man file:
305
+ 0 undef/unscored
306
+ 1 stage 1
307
+ 2 stage 2
308
+ 3 stage 3
309
+ 4 stage 4
310
+ 5 wake
311
+ 6 REM
312
+ 7 MVT
313
+ 8 LOff and LOn
314
+
315
+ wake man file:
316
+ 0 undef/unscored
317
+ 1 stage 1
318
+ 2 stage 2
319
+ 3 stage 3
320
+ 4 stage 4
321
+ 5 wake
322
+ 6 REM
323
+ 7 MVT
324
+ 8 SEM
325
+
326
+
327
+ sleep stage 8 should be coded as Wake with a SEM
328
+ 5 is Wake
329
+ 1-4 is Sleep stage 1-4
330
+ 7 is REM
331
+ 8 is Wake with SEM plus LOff and Lon
332
+
333
+
334
+ mapping:
335
+ 1 stage 1
336
+ 2 stage 2
337
+ 3 stage 3
338
+ 4 stage 4
339
+ 6 MT
340
+ 7 Undef
341
+ 5 REM
342
+ 9 Wake
343
+ =end
344
+