tasci_merger 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 97e75686a322ea2d9d5550c697424ae7dd74ddbf
4
+ data.tar.gz: f36602af907f096088e45bc49acc92af4a160bd2
5
+ SHA512:
6
+ metadata.gz: 23b86daf7c2711403d6a9966c2a3a93514cbda5fa5723f7df96e715973e4dc790ed26e9f091897c475cacfd5ad269200e61edefe4b3bc7bb7bf4c3de0a5ef709
7
+ data.tar.gz: c737f3f05aaccc74d094908f0d82f67395a5b071b86983d1b2ed01d32b43298e236b4fa800181f3236066bf75ef07dab814c0fe4bcbc4a823c44f90c6cb48a77
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Piotr Mankowski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,63 @@
1
+ # Instructions for Tasci File Merger
2
+
3
+ ## Installation
4
+ 1. Install Ruby.
5
+ - [http://rubyinstaller.org/](http://rubyinstaller.org/)
6
+ - Download Ruby 2.1.5 (x64) installer.
7
+ - Install to desired *RUBY_DIRECTORY*
8
+ - Choose option to add ruby to *PATH*
9
+ - Choose option to associate *.rb files with Ruby
10
+
11
+ 2. Verify ruby installation.
12
+ - Go into command line.
13
+ - Enter `ruby -v`
14
+ - Ouput should look like `ruby 2.1.5p273...`
15
+ - Enter `gem -v`
16
+ - Output should look like `2.2.2`
17
+
18
+ 3. Fix potential RubyGems certificate bug, as documented on [this page](https://gist.github.com/luislavena/f064211759ee0f806c88)
19
+ - Try running `gem install activesupport --no-ri --no-rdoc`
20
+ - Click `Allow access` if prompted by Windows Firewall message, choosing option for *Private networks*
21
+ - If installation fails with an `SSL_error`, follow the following steps:
22
+ - Save certificate file from [this website](https://raw.githubusercontent.com/rubygems/rubygems/master/lib/rubygems/ssl_certs/AddTrustExternalCARoot-2048.pem) to the Downloads directory.
23
+ - **Make sure file is saved with the *.pem extension**
24
+ - Find rubygems folder location by typing `gem which rubygems` into the console. You should get output like `C:/Ruby21/lib/ruby/2.1.0/rubygems.rb`
25
+ - Locate the directory and open it in an explorer window. For the above path, the directory would be `C:\Ruby21\lib\ruby\2.1.0\rubygems`
26
+ - Open the `ssl_certs` directory, and copy the previously-downloaded `*.pem` file into this directory.
27
+ - Close and re-open a console window.
28
+
29
+ 4. Install required gems.
30
+ - Run:
31
+
32
+ ```
33
+ gem install activesupport --no-ri --no-rdoc
34
+ gem install 'tzinfo-data' --no-ri --no-rdoc
35
+ ```
36
+ 5. Download TASCI merger package zipfile from [Github](https://github.com/pmanko/tasci_merger) using the *Download ZIP* button.
37
+
38
+ 10. Merge TASCI files.
39
+ 6. Unpack to *package_directory*.
40
+
41
+ 7. Run **IRB** in *package_directory*.
42
+ - Open console
43
+ - Run `cd unpacked_package_directory` to navigate to package directory
44
+ - Run `irb` to open interactive ruby console
45
+
46
+ 8. Load package.
47
+
48
+ ```ruby
49
+ load('./tasci_merger.rb')
50
+ ```
51
+ 9. Generate master file list.
52
+
53
+ ```ruby
54
+ tasci_merger = ETL::TasciMerger.new
55
+ tasci_merger.create_master_list("TASCI_FILE_DIRECTORY", "OUTPUT_DIRECTORY")
56
+ ```
57
+
58
+ ```ruby
59
+ tm.merge_files(['SUBJECT_CODE'], "MASTER_FILE_PATH", "OUTPUT_DIRECTORY", "TASCI_FILE_DIRECTORY")
60
+ ```
61
+
62
+
63
+
data/bin/merge_tasci ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tasci_merger'
4
+
5
+ # SUBJECT_CODE, TASCI_DIR
6
+ tm = TasciMerger.new(ARGV[0], ARGV[1], ARGV[2])
7
+ tm.create_master_list
8
+ tm.merge_files
data/lib/labtime.rb ADDED
@@ -0,0 +1,111 @@
1
+ require('active_support/values/time_zone')
2
+ require('active_support/time_with_zone')
3
+ require 'active_support/core_ext/time/zones'
4
+ require 'active_support/core_ext/time'
5
+ require 'active_support/core_ext/numeric/time'
6
+
7
+ class Labtime
8
+ include Comparable
9
+
10
+ attr_accessor :year, :hour, :min, :sec, :time_zone
11
+ DEFAULT_TIME_ZONE = ActiveSupport::TimeZone.new("Eastern Time (US & Canada)")
12
+
13
+ def self.parse(realtime)
14
+ # Return nil if nil parameter
15
+ return nil if realtime.nil?
16
+
17
+ # Make sure datetime is an ActiveSupport:TimeWithZone object
18
+ raise ArgumentError, "realtime class #{realtime.class} is not ActiveSupport::TimeWithZone" unless realtime.is_a?(ActiveSupport::TimeWithZone)
19
+
20
+ # year is easy
21
+ year = realtime.year
22
+
23
+ # Reference fo labtime is start of year
24
+ Time.zone = realtime.time_zone
25
+ reference_time = Time.zone.local(year)
26
+
27
+ # find difference between reference and
28
+ second_difference = realtime.to_i - reference_time.to_i
29
+
30
+ # convert second difference to labtime
31
+ hour = second_difference / 3600
32
+ min = (second_difference - (hour * 3600)) / 60
33
+ sec = (second_difference - (hour * 3600) - (min * 60))
34
+
35
+ self.new(year, hour, min, sec, realtime.time_zone)
36
+ end
37
+
38
+ def self.from_decimal(decimal_labtime, year, time_zone = ActiveSupport::TimeZone.new("Eastern Time (US & Canada)"))
39
+ raise ArguementError, "No year supplied!" if year.blank?
40
+
41
+ hour = decimal_labtime.to_i
42
+ remainder = decimal_labtime - hour.to_f
43
+ min_labtime = 60.0 * remainder
44
+ min = min_labtime.to_i
45
+ remainder = min_labtime - min.to_f
46
+ sec = (60 * remainder).round.to_i
47
+
48
+ self.new(year, hour, min, sec, time_zone)
49
+ end
50
+
51
+ def self.from_seconds(sec_time, year, time_zone = DEFAULT_TIME_ZONE)
52
+ hour = (sec_time / 3600.0).to_i
53
+ sec_time = sec_time - (hour * 3600)
54
+ min = (sec_time / 60.0).to_i
55
+ sec_time = sec_time - (min * 60)
56
+ sec = sec_time
57
+
58
+ self.new(year, hour, min, sec, time_zone)
59
+ end
60
+
61
+ def self.from_s(str, time_params = {}, time_zone = DEFAULT_TIME_ZONE)
62
+ time_captures = /(\d+)\:(\d{1,2})(\:(\d{1,2}))?(\s(\d\d\d\d))?\z/.match(str).captures
63
+
64
+ time_params[:hour] ||= time_captures[0]
65
+ time_params[:min] ||= time_captures[1]
66
+ time_params[:sec] ||= time_captures[3]
67
+ time_params[:year] ||= time_captures[5]
68
+
69
+ self.new(time_params[:year], time_params[:hour], time_params[:min], time_params[:sec], time_zone)
70
+ end
71
+
72
+ def initialize(year, hour, min, sec, time_zone = nil)
73
+ @year = year.to_i
74
+ @hour = hour.to_i
75
+ @min = min.to_i
76
+ @sec = sec.to_i
77
+ @time_zone = time_zone || DEFAULT_TIME_ZONE
78
+ end
79
+
80
+ def to_time
81
+ reference_time = time_zone.local(year)
82
+ reference_time + time_in_seconds
83
+ end
84
+
85
+ def <=>(other)
86
+ to_time <=> other.to_time
87
+ end
88
+
89
+ def to_s
90
+ "#{year} #{hour}:#{min}:#{sec} #{time_zone.to_s}"
91
+ end
92
+
93
+ def to_short_s
94
+ "#{hour}:#{min}:#{sec}"
95
+ end
96
+
97
+ def time_in_seconds
98
+ hour * 3600 + min * 60 + sec
99
+ end
100
+
101
+ def add_seconds(sec)
102
+ self.class.from_seconds(self.time_in_seconds + sec, self.year, self.time_zone)
103
+ end
104
+
105
+ def to_decimal
106
+ hour.to_f + (min.to_f/60.0) + (sec.to_f/3600.0)
107
+ end
108
+
109
+ private
110
+
111
+ end
data/lib/man_merger.rb ADDED
@@ -0,0 +1,344 @@
1
+ require 'csv'
2
+
3
+ ## CHANGELOG
4
+ # 2173 Master file: change sp16,17,18 to *_rev
5
+
6
+ module ETL
7
+ class ManMerger
8
+ LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
9
+ T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
10
+ #T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
11
+ EPOCH_LENGTH = 30
12
+
13
+ def merge_files
14
+ subject_list = load_subject_list
15
+ subject_list.each do |subject_code, file_list|
16
+ merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
17
+ merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
18
+ MY_LOG.info "---- #{subject_code}"
19
+
20
+ previous_first_labtime = nil
21
+ previous_last_labtime = nil
22
+ subject_year = get_subject_year(file_list)
23
+
24
+ file_list.each do |file_hash|
25
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
26
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
27
+
28
+ ## Validate File List
29
+ if matched_files.length != 1
30
+ raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
31
+ else
32
+ man_file_path = matched_files[0]
33
+ end
34
+
35
+ man_file = File.open(man_file_path)
36
+ LOADER_LOGGER.info "--- Loading #{man_file_path}"
37
+ file_info = {}
38
+
39
+
40
+ ## Ignore Corrupted Files
41
+ #next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
42
+
43
+ # Date from file name
44
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
45
+ file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
46
+
47
+ # read file
48
+ lines = man_file.readlines("\r")
49
+ # delete possible empty last line
50
+ lines.pop if lines.last.blank?
51
+
52
+ # get file first and last times
53
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
54
+ file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
55
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
56
+ file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
57
+
58
+ # validate first/last times
59
+ if file_hash[:start_time] != file_info[:first_time]
60
+ MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
61
+ end
62
+ if file_hash[:last_line_time] != file_info[:last_time]
63
+ MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
64
+ end
65
+ if file_hash[:last_line_number] != lines.length
66
+ MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
67
+ end
68
+
69
+ ##
70
+ # VALIDATION
71
+ file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
72
+ file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
73
+
74
+ start_realtime = file_hash[:start_labtime].to_time
75
+ last_line_realtime = file_hash[:last_line_labtime].to_time
76
+
77
+ first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
78
+ last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
79
+
80
+ file_info[:first_labtime] = Labtime.parse(first_realtime)
81
+ file_info[:last_labtime] = Labtime.parse(last_realtime)
82
+ predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
83
+
84
+ sep = false
85
+ if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
86
+ MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
87
+ sep = true
88
+ end
89
+
90
+ # These checks fail if DST TRANSITION HAPPENS
91
+ if last_line_realtime.dst? == start_realtime.dst?
92
+ if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
93
+ MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
94
+ sep = true
95
+ end
96
+ if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
97
+ MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
98
+ sep = true
99
+ end
100
+ end
101
+
102
+ if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
103
+ MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
104
+ sep = true
105
+ end
106
+
107
+ unless previous_first_labtime.nil? or previous_last_labtime.nil?
108
+ MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
109
+ end
110
+
111
+ raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
112
+ raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
113
+
114
+ MY_LOG.info "-----------------------------------\n\n" if sep
115
+
116
+ last_labtime = nil
117
+ ibob_flag = 0
118
+
119
+ lines.each_with_index do |line, line_number|
120
+ #merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
121
+ =begin
122
+ sleep man file:
123
+ 0 undef/unscored
124
+ 1 stage 1
125
+ 2 stage 2
126
+ 3 stage 3
127
+ 4 stage 4
128
+ 5 wake
129
+ 6 REM
130
+ 7 MVT
131
+ 8 LOff and LOn
132
+
133
+ wake man file:
134
+ 0 undef/un
135
+ cored
136
+ 1 stage 1
137
+ 2 stage 2
138
+ 3 stage 3
139
+ 4 stage 4
140
+ 5 wake
141
+ 6 REM
142
+ 7 MVT
143
+ 8 SEM
144
+ =end
145
+
146
+
147
+ line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
148
+ line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
149
+
150
+ # Sleep Period Coding:
151
+ # 1 Sleep Onset (Lights Off) (IN BED)
152
+ # 2 Sleep Offset (Lights On) (OUT OF BED)
153
+ if file_hash[:type] == :sleep and line_code == 8
154
+ if ibob_flag == 0
155
+ sleep_period = 1
156
+ ibob_flag = 1
157
+ else
158
+ sleep_period = 2
159
+ ibob_flag = 0
160
+ end
161
+ else
162
+ sleep_period = nil
163
+ end
164
+
165
+ # Sleep Stage Coding:
166
+ # 1 stage 1
167
+ # 2 stage 2
168
+ # 3 stage 3
169
+ # 4 stage 4
170
+ # 6 MT
171
+ # 7 Undef
172
+ # 5 REM
173
+ # 9 Wake
174
+ if line_code >= 1 and line_code <= 4
175
+ line_event = line_code
176
+ elsif line_code == 0
177
+ line_event = 7
178
+ elsif line_code == 5 or line_code == 8
179
+ line_event = 9
180
+ elsif line_code == 6
181
+ line_event = 5
182
+ elsif line_code == 7
183
+ line_event = 6
184
+ else
185
+ raise StandardError, "Cannot map the following event: #{line_code}"
186
+ end
187
+
188
+ # SEM Event Coding:
189
+ # 1 Slow Eye Movement
190
+ # 0 No Slow Eye Movement
191
+ if file_hash[:type] == :wake and line_code == 8
192
+ sem_event = 1
193
+ else
194
+ sem_event = 0
195
+ end
196
+
197
+ last_labtime = line_labtime
198
+
199
+ output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
200
+ merged_file << output_line
201
+ end
202
+
203
+
204
+ previous_first_labtime = file_info[:first_labtime]
205
+ previous_last_labtime = last_labtime
206
+
207
+ end
208
+ merged_file.close
209
+ MY_LOG.info "---- end #{subject_code}\n\n"
210
+
211
+ end
212
+ end
213
+
214
+ def load_subject_list
215
+ subject_info = {}
216
+ Dir.foreach(LIST_DIR) do |file|
217
+ next if file == '.' or file == '..'
218
+ #MY_LOG.info "#{file}"
219
+ csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
220
+
221
+ # Match and Validate File Name
222
+ matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
223
+ if matched_sc
224
+ subject_code = matched_sc[1].upcase
225
+ else
226
+ next
227
+ end
228
+
229
+ subject_info[subject_code] = []
230
+ csv_file.each do |row|
231
+ file_info = {}
232
+ pattern = /(.*)\.man/i.match(row[0])
233
+
234
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
235
+ if matched_time
236
+ file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
237
+ else
238
+ MY_LOG.error "No Valid Start Time Found: #{row}"
239
+ next
240
+ end
241
+
242
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
243
+ if matched_time
244
+ file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
245
+ else
246
+ MY_LOG.error "No Valid End Time Found: #{row}"
247
+ next
248
+ end
249
+
250
+ file_info[:start_labtime] = row[2].to_f
251
+ file_info[:last_line_number] = row[3].to_i
252
+ file_info[:last_line_labtime] = row[5].to_f
253
+
254
+ if pattern
255
+ file_info[:pattern] = pattern[1]
256
+ subject_info[subject_code] << file_info
257
+
258
+ # Determine if sleep or wake file
259
+ raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
260
+ raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
261
+
262
+ if /_sp?\d/i.match(pattern[1])
263
+ file_info[:type] = :sleep
264
+ elsif /_wp?\d/i.match(pattern[1])
265
+ file_info[:type] = :wake
266
+ else
267
+ raise StandardError, "Didn't match any SP/WP..."
268
+ end
269
+ else
270
+ MY_LOG.info "No Valid File Name Found: #{row}"
271
+ next
272
+ end
273
+ end
274
+ #MY_LOG.info subject_info[subject_code]
275
+ end
276
+ #MY_LOG.info subject_info.inspect
277
+ subject_info
278
+ end
279
+
280
+ def get_subject_year(file_list)
281
+ years = file_list.map do |h|
282
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
283
+ matched_date ? matched_date[3] : nil
284
+ end
285
+ years.delete_if {|x| x.nil? }
286
+ years = years.uniq
287
+
288
+ raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
289
+ year = years.first.to_i
290
+ year > 30 ? year + 1900 : year + 2000
291
+ end
292
+ end
293
+
294
+
295
+ end
296
+
297
+ =begin
298
+ path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
299
+ path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
300
+
301
+ file list:
302
+ subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
303
+
304
+ sleep man file:
305
+ 0 undef/unscored
306
+ 1 stage 1
307
+ 2 stage 2
308
+ 3 stage 3
309
+ 4 stage 4
310
+ 5 wake
311
+ 6 REM
312
+ 7 MVT
313
+ 8 LOff and LOn
314
+
315
+ wake man file:
316
+ 0 undef/unscored
317
+ 1 stage 1
318
+ 2 stage 2
319
+ 3 stage 3
320
+ 4 stage 4
321
+ 5 wake
322
+ 6 REM
323
+ 7 MVT
324
+ 8 SEM
325
+
326
+
327
+ sleep stage 8 should be coded as Wake with a SEM
328
+ 5 is Wake
329
+ 1-4 is Sleep stage 1-4
330
+ 7 is REM
331
+ 8 is Wake with SEM plus LOff and Lon
332
+
333
+
334
+ mapping:
335
+ 1 stage 1
336
+ 2 stage 2
337
+ 3 stage 3
338
+ 4 stage 4
339
+ 6 MT
340
+ 7 Undef
341
+ 5 REM
342
+ 9 Wake
343
+ =end
344
+
@@ -0,0 +1,276 @@
1
+ require 'csv'
2
+ require 'man_merger'
3
+ require 'labtime'
4
+
5
+ Time.zone = 'Eastern Time (US & Canada)'
6
+
7
+ class TasciMerger
8
+
9
+ def initialize(subject_code, tasci_directory, output_directory)
10
+ @subject_code = subject_code.chomp
11
+ @tasci_directory = tasci_directory.chomp
12
+ @output_directory = output_directory.chomp
13
+ @master_file_path
14
+ end
15
+
16
+ def create_master_list
17
+ master_file_name = File.join(@output_directory, "tasci_master_#{Time.zone.now.strftime("%Y%m%d")}.csv")
18
+ master_file = CSV.open(master_file_name, "wb")
19
+
20
+ master_file << %w(file_name file_labtime file_full_time total_epochs start_labtime end_labtime)
21
+ master_file_contents = []
22
+
23
+ puts @tasci_directory
24
+ puts File.exists?(@tasci_directory)
25
+
26
+ Dir.foreach(@tasci_directory) do |file|
27
+ next if file == '.' or file == '..'
28
+
29
+ puts file
30
+
31
+ tasci_file = File.open(File.join(@tasci_directory, file))
32
+ file_info = {}
33
+
34
+ ## HEADER INFO
35
+ # Header Line
36
+ tasci_file.readline
37
+
38
+ # File Name
39
+ read_line = tasci_file.readline
40
+ matched_name = /\W*File name \|\W*(.*\.vpd)/i.match(read_line)
41
+ puts "ERROR: #{read_line}" unless matched_name
42
+ file_info[:source_file_name] = matched_name[1]
43
+
44
+ # Record Date
45
+ read_line = tasci_file.readline
46
+ matched_date = /RecordDate\W*\|\W*(..)\/(..)\/(....)\W*\|.*/.match(read_line)
47
+ puts "ERROR: #{read_line}" unless matched_date
48
+ #MY_LOG.info "matched_date: #{matched_date[3]} #{matched_date[1]} #{matched_date[2]}"
49
+ file_info[:record_date] = (matched_date ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i) : nil)
50
+
51
+ # Record Time
52
+ read_line = tasci_file.readline
53
+ matched_time = /RecordTime\W*\|\W*(..):(..):(..)\W*\|\W*Patient ID\W*\|\W*.*\W*\|/.match(read_line)
54
+ puts "ERROR: #{read_line}" unless matched_time
55
+ file_info[:record_full_time] = ((matched_time and matched_date) ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i, matched_time[1].to_i, matched_time[2].to_i, matched_time[3].to_i) : nil)
56
+ file_info[:record_labtime] = Labtime.parse(file_info[:record_full_time])
57
+
58
+ 6.times do
59
+ tasci_file.readline
60
+ end
61
+
62
+ # Epochs and duration
63
+ read_line = tasci_file.readline
64
+ matched_line = /\W*# Epochs\W*\|\W*(\d+)\W*\|\W*Duration\(S\)\W*\|\W*(\d+)\|/.match(read_line)
65
+ puts "ERROR: #{read_line}" unless matched_line
66
+ file_info[:epochs] = matched_line[1].to_i - 1
67
+ file_info[:epoch_duration] = matched_line[2].to_i
68
+
69
+ 5.times do
70
+ tasci_file.readline
71
+ end
72
+
73
+
74
+ first_labtime = nil
75
+ last_labtime = nil
76
+
77
+ until tasci_file.eof?
78
+ line = tasci_file.readline
79
+
80
+ matched_line = /(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d\d):(\d\d):(\d\d)\|\W*(.+)\|\W*(.+)\|/.match(line)
81
+ fields = matched_line.to_a
82
+ fields.delete_at(0)
83
+
84
+ raise StandardError, "fields should have 9 fields: #{fields.length} #{fields} #{line}" unless fields.length == 9
85
+
86
+ # Calculating labtime is tricky - file may span two days
87
+ calculated_line_time = file_info[:record_full_time] + fields[1].to_i.hours + fields[2].to_i.minutes + fields[3].to_i.seconds
88
+ if calculated_line_time.hour == fields[4].to_i and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
89
+ line_time = calculated_line_time
90
+ line_labtime = Labtime.parse(line_time)
91
+ elsif file_info[:record_full_time].dst? != calculated_line_time.dst?
92
+ if (calculated_line_time.hour - fields[4].to_i).abs == 1 and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
93
+ line_time = calculated_line_time
94
+ line_labtime = Labtime.parse(line_time)
95
+ else
96
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
97
+ end
98
+ else
99
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
100
+ end
101
+
102
+ first_labtime = line_labtime if first_labtime.nil?
103
+ last_labtime = line_labtime
104
+
105
+ #MY_LOG.info fields
106
+ end
107
+
108
+ master_file_contents << [file, file_info[:record_labtime].to_short_s, file_info[:record_full_time], file_info[:epochs], first_labtime.to_decimal, last_labtime.to_decimal]
109
+ end
110
+
111
+ master_file_contents.sort! {|x, y| x[4] <=> y[4] }
112
+ master_file_contents.each { |row| master_file << row }
113
+
114
+ puts "Created master file: #{master_file.path}"
115
+ @master_file_path = master_file.path
116
+ master_file_name
117
+ end
118
+
119
+ def merge_files
120
+ raise StandardError, "No master file path set! You must run create_master_list before running this function." unless @master_file_path
121
+
122
+ merged_file = CSV.open(File.join(@output_directory, "#{@subject_code}_merged_#{Time.zone.now.strftime("%Y%m%d")}.csv"), "wb")
123
+ merged_file << %w(SUBJECT_CODE FILE_NAME_SLEEP_WAKE_EPISODE LABTIME SLEEP_STAGE LIGHTS_OFF_ON_FLAG SEM_FLAG)
124
+
125
+ previous_first_labtime = nil
126
+ previous_last_labtime = nil
127
+
128
+ CSV.foreach(@master_file_path, headers: true) do |row|
129
+ puts row
130
+ tasci_file_path = File.join(@tasci_directory, row[0])
131
+
132
+ tasci_file = File.open(tasci_file_path)
133
+ file_info = {}
134
+
135
+ ## HEADER INFO
136
+ # Header Line
137
+ tasci_file.readline
138
+
139
+ # File Name
140
+ read_line = tasci_file.readline
141
+ matched_name = /\W*File name \|\W*(.*\.vpd)/i.match(read_line)
142
+ puts "ERROR: #{read_line}" unless matched_name
143
+ file_info[:source_file_name] = matched_name[1]
144
+
145
+ # Record Date
146
+ read_line = tasci_file.readline
147
+ matched_date = /RecordDate\W*\|\W*(..)\/(..)\/(....)\W*\|.*/.match(read_line)
148
+ puts "ERROR: #{read_line}" unless matched_date
149
+ #MY_LOG.info "matched_date: #{matched_date[3]} #{matched_date[1]} #{matched_date[2]}"
150
+ file_info[:record_date] = (matched_date ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i) : nil)
151
+
152
+ # Record Time
153
+ read_line = tasci_file.readline
154
+ matched_time = /RecordTime\W*\|\W*(..):(..):(..)\W*\|\W*Patient ID\W*\|\W*.*_.*_(\w*)\W*\|/.match(read_line)
155
+ puts "ERROR: #{read_line}" unless matched_time
156
+ file_info[:record_full_time] = ((matched_time and matched_date) ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i, matched_time[1].to_i, matched_time[2].to_i, matched_time[3].to_i) : nil)
157
+ file_info[:record_labtime] = Labtime.parse(file_info[:record_full_time])
158
+ file_info[:sleep_wake_episode] = matched_time[4]
159
+
160
+ 6.times do
161
+ tasci_file.readline
162
+ end
163
+
164
+ # Epochs and duration
165
+ read_line = tasci_file.readline
166
+ matched_line = /\W*# Epochs\W*\|\W*(\d+)\W*\|\W*Duration\(S\)\W*\|\W*(\d+)\|/.match(read_line)
167
+ puts "ERROR: #{read_line}" unless matched_line
168
+ file_info[:epochs] = matched_line[1].to_i
169
+ file_info[:epoch_duration] = matched_line[2].to_i
170
+
171
+ 5.times do
172
+ tasci_file.readline
173
+ end
174
+
175
+ first_labtime = nil
176
+ last_labtime = nil
177
+
178
+ until tasci_file.eof?
179
+ line = tasci_file.readline
180
+
181
+ matched_line = /(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d\d):(\d\d):(\d\d)\|\W*(.+)\|\W*(.+)\|/.match(line)
182
+ fields = matched_line.to_a
183
+ fields.delete_at(0)
184
+
185
+ raise StandardError, "fields should have 9 fields: #{fields.length} #{fields} #{line}" unless fields.length == 9
186
+
187
+ # Calculating labtime is tricky - file may span two days
188
+ calculated_line_time = file_info[:record_full_time] + fields[1].to_i.hours + fields[2].to_i.minutes + fields[3].to_i.seconds
189
+ if calculated_line_time.hour == fields[4].to_i and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
190
+ line_time = calculated_line_time
191
+ line_labtime = Labtime.parse(line_time)
192
+ elsif file_info[:record_full_time].dst? != calculated_line_time.dst?
193
+ if (calculated_line_time.hour - fields[4].to_i).abs == 1 and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
194
+ line_time = calculated_line_time
195
+ line_labtime = Labtime.parse(line_time)
196
+ else
197
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
198
+ end
199
+ else
200
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
201
+ end
202
+
203
+ # Sleep Period Coding:
204
+ # 1 Sleep Onset (Lights Off)
205
+ # 2 Sleep Offset (Lights On)
206
+ if /Lights Off/i.match(fields[7]) # Sleep Onset
207
+ sleep_period = 1
208
+ elsif /Lights On/i.match(fields[7]) # Sleep Offset
209
+ sleep_period = 2
210
+ else
211
+ sleep_period = nil
212
+ end
213
+
214
+ # Sleep Stage Coding:
215
+ # 1 stage 1
216
+ # 2 stage 2
217
+ # 3 stage 3
218
+ # 4 stage 4
219
+ # 6 MT
220
+ # 7 Undef
221
+ # 5 REM
222
+ # 9 Wake
223
+ line_event = nil
224
+ if fields[8] == "Wake"
225
+ line_event = 9
226
+ elsif fields[8] == "Undefined"
227
+ line_event = 7
228
+ elsif fields[8] == "N1"
229
+ line_event = 1
230
+ elsif fields[8] == "N2"
231
+ line_event = 2
232
+ elsif fields[8] == "N3"
233
+ line_event = 3
234
+ elsif fields[8] == "4"
235
+ line_event = 4
236
+ elsif fields[8] == "REM"
237
+ line_event = 5
238
+ elsif fields[8] == "MVT"
239
+ line_event = 6
240
+ else
241
+ raise StandardError, "Cannot map the following event: #{fields[8]}"
242
+ end
243
+
244
+ # SEM Event Coding:
245
+ # 1 Slow Eye Movement
246
+ # 0 No Slow Eye Movement
247
+ sem_event = (fields[7] =~ /SEM/ ? 1 : 0)
248
+
249
+ # Previous Effort:
250
+ #line_time = Time.zone.local(file_info[:record_full_time].year, file_info[:record_full_time].month, file_info[:record_full_time].day, fields[4].to_i, fields[5].to_i, fields[6].to_i)
251
+ #line_labtime = Labtime.parse(line_time)
252
+
253
+ first_labtime = line_labtime if first_labtime.nil?
254
+ last_labtime = line_labtime
255
+
256
+ output_line = [@subject_code.upcase, file_info[:sleep_wake_episode], line_labtime.to_decimal, line_event, sleep_period, sem_event]
257
+ merged_file << output_line
258
+
259
+
260
+ #MY_LOG.info fields
261
+ end
262
+
263
+
264
+ unless previous_first_labtime.nil? or previous_last_labtime.nil?
265
+ puts "Start time is before previous end labtime: #{previous_last_labtime.to_short_s} #{first_labtime.to_short_s}" if first_labtime < previous_last_labtime
266
+ end
267
+
268
+
269
+ previous_first_labtime = first_labtime
270
+ previous_last_labtime = last_labtime
271
+ end
272
+ merged_file.close
273
+ end
274
+
275
+ end
276
+
@@ -0,0 +1,21 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'tasci_merger'
3
+ s.version = '0.0.0'
4
+ s.date = '2015-02-06'
5
+ s.summary = "Merger utility for TASCI scored sleep files."
6
+ s.description = "Merger utility for TASCI scored sleep files, built for the Division of Sleep and Circadian Disorders at BWH."
7
+ s.authors = ["Piotr Mankowski"]
8
+ s.email = 'pmankowski@partners.org'
9
+ s.files = %w(LICENSE README.md tasci_merger.gemspec lib/tasci_merger.rb lib/man_merger.rb lib/labtime.rb)
10
+ s.require_path = 'lib'
11
+ s.homepage =
12
+ 'https://github.com/pmanko/tasci_merger'
13
+ s.license = 'MIT'
14
+ s.executables << 'merge_tasci'
15
+
16
+ s.required_ruby_version = '>= 2.1.0'
17
+
18
+ s.add_dependency "activesupport", '~> 4.2', '>= 4.2.0'
19
+
20
+ end
21
+
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tasci_merger
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Piotr Mankowski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.2'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 4.2.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '4.2'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 4.2.0
33
+ description: Merger utility for TASCI scored sleep files, built for the Division of
34
+ Sleep and Circadian Disorders at BWH.
35
+ email: pmankowski@partners.org
36
+ executables:
37
+ - merge_tasci
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - LICENSE
42
+ - README.md
43
+ - bin/merge_tasci
44
+ - lib/labtime.rb
45
+ - lib/man_merger.rb
46
+ - lib/tasci_merger.rb
47
+ - tasci_merger.gemspec
48
+ homepage: https://github.com/pmanko/tasci_merger
49
+ licenses:
50
+ - MIT
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 2.1.0
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.2.2
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Merger utility for TASCI scored sleep files.
72
+ test_files: []