tasci_merger 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 97e75686a322ea2d9d5550c697424ae7dd74ddbf
4
+ data.tar.gz: f36602af907f096088e45bc49acc92af4a160bd2
5
+ SHA512:
6
+ metadata.gz: 23b86daf7c2711403d6a9966c2a3a93514cbda5fa5723f7df96e715973e4dc790ed26e9f091897c475cacfd5ad269200e61edefe4b3bc7bb7bf4c3de0a5ef709
7
+ data.tar.gz: c737f3f05aaccc74d094908f0d82f67395a5b071b86983d1b2ed01d32b43298e236b4fa800181f3236066bf75ef07dab814c0fe4bcbc4a823c44f90c6cb48a77
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Piotr Mankowski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
data/README.md ADDED
@@ -0,0 +1,63 @@
1
+ # Instructions for Tasci File Merger
2
+
3
+ ## Installation
4
+ 1. Install Ruby.
5
+ - [http://rubyinstaller.org/](http://rubyinstaller.org/)
6
+ - Download Ruby 2.1.5 (x64) installer.
7
+ - Install to desired *RUBY_DIRECTORY*
8
+ - Choose option to add ruby to *PATH*
9
+ - Choose option to associate *.rb files with Ruby
10
+
11
+ 2. Verify ruby installation.
12
+ - Go into command line.
13
+ - Enter `ruby -v`
14
+ - Ouput should look like `ruby 2.1.5p273...`
15
+ - Enter `gem -v`
16
+ - Output should look like `2.2.2`
17
+
18
+ 3. Fix potential RubyGems certificate bug, as documented on [this page](https://gist.github.com/luislavena/f064211759ee0f806c88)
19
+ - Try running `gem install activesupport --no-ri --no-rdoc`
20
+ - Click `Allow access` if prompted by Windows Firewall message, choosing option for *Private networks*
21
+ - If installation fails with an `SSL_error`, follow the following steps:
22
+ - Save certificate file from [this website](https://raw.githubusercontent.com/rubygems/rubygems/master/lib/rubygems/ssl_certs/AddTrustExternalCARoot-2048.pem) to the Downloads directory.
23
+ - **Make sure file is saved with the *.pem extension**
24
+ - Find rubygems folder location by typing `gem which rubygems` into the console. You should get output like `C:/Ruby21/lib/ruby/2.1.0/rubygems.rb`
25
+ - Locate the directory and open it in an explorer window. For the above path, the directory would be `C:\Ruby21\lib\ruby\2.1.0\rubygems`
26
+ - Open the `ssl_certs` directory, and copy the previously-downloaded `*.pem` file into this directory.
27
+ - Close and re-open a console window.
28
+
29
+ 4. Install required gems.
30
+ - Run:
31
+
32
+ ```
33
+ gem install activesupport --no-ri --no-rdoc
34
+ gem install 'tzinfo-data' --no-ri --no-rdoc
35
+ ```
36
+ 5. Download TASCI merger package zipfile from [Github](https://github.com/pmanko/tasci_merger) using the *Download ZIP* button.
37
+
38
+ 10. Merge TASCI files.
39
+ 6. Unpack to *package_directory*.
40
+
41
+ 7. Run **IRB** in *package_directory*.
42
+ - Open console
43
+ - Run `cd unpacked_package_directory` to navigate to package directory
44
+ - Run `irb` to open interactive ruby console
45
+
46
+ 8. Load package.
47
+
48
+ ```ruby
49
+ load('./tasci_merger.rb')
50
+ ```
51
+ 9. Generate master file list.
52
+
53
+ ```ruby
54
+ tasci_merger = ETL::TasciMerger.new
55
+ tasci_merger.create_master_list("TASCI_FILE_DIRECTORY", "OUTPUT_DIRECTORY")
56
+ ```
57
+
58
+ ```ruby
59
+ tm.merge_files(['SUBJECT_CODE'], "MASTER_FILE_PATH", "OUTPUT_DIRECTORY", "TASCI_FILE_DIRECTORY")
60
+ ```
61
+
62
+
63
+
data/bin/merge_tasci ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tasci_merger'
4
+
5
+ # SUBJECT_CODE, TASCI_DIR
6
+ tm = TasciMerger.new(ARGV[0], ARGV[1], ARGV[2])
7
+ tm.create_master_list
8
+ tm.merge_files
data/lib/labtime.rb ADDED
@@ -0,0 +1,111 @@
1
+ require('active_support/values/time_zone')
2
+ require('active_support/time_with_zone')
3
+ require 'active_support/core_ext/time/zones'
4
+ require 'active_support/core_ext/time'
5
+ require 'active_support/core_ext/numeric/time'
6
+
7
+ class Labtime
8
+ include Comparable
9
+
10
+ attr_accessor :year, :hour, :min, :sec, :time_zone
11
+ DEFAULT_TIME_ZONE = ActiveSupport::TimeZone.new("Eastern Time (US & Canada)")
12
+
13
+ def self.parse(realtime)
14
+ # Return nil if nil parameter
15
+ return nil if realtime.nil?
16
+
17
+ # Make sure datetime is an ActiveSupport:TimeWithZone object
18
+ raise ArgumentError, "realtime class #{realtime.class} is not ActiveSupport::TimeWithZone" unless realtime.is_a?(ActiveSupport::TimeWithZone)
19
+
20
+ # year is easy
21
+ year = realtime.year
22
+
23
+ # Reference fo labtime is start of year
24
+ Time.zone = realtime.time_zone
25
+ reference_time = Time.zone.local(year)
26
+
27
+ # find difference between reference and
28
+ second_difference = realtime.to_i - reference_time.to_i
29
+
30
+ # convert second difference to labtime
31
+ hour = second_difference / 3600
32
+ min = (second_difference - (hour * 3600)) / 60
33
+ sec = (second_difference - (hour * 3600) - (min * 60))
34
+
35
+ self.new(year, hour, min, sec, realtime.time_zone)
36
+ end
37
+
38
+ def self.from_decimal(decimal_labtime, year, time_zone = ActiveSupport::TimeZone.new("Eastern Time (US & Canada)"))
39
+ raise ArguementError, "No year supplied!" if year.blank?
40
+
41
+ hour = decimal_labtime.to_i
42
+ remainder = decimal_labtime - hour.to_f
43
+ min_labtime = 60.0 * remainder
44
+ min = min_labtime.to_i
45
+ remainder = min_labtime - min.to_f
46
+ sec = (60 * remainder).round.to_i
47
+
48
+ self.new(year, hour, min, sec, time_zone)
49
+ end
50
+
51
+ def self.from_seconds(sec_time, year, time_zone = DEFAULT_TIME_ZONE)
52
+ hour = (sec_time / 3600.0).to_i
53
+ sec_time = sec_time - (hour * 3600)
54
+ min = (sec_time / 60.0).to_i
55
+ sec_time = sec_time - (min * 60)
56
+ sec = sec_time
57
+
58
+ self.new(year, hour, min, sec, time_zone)
59
+ end
60
+
61
+ def self.from_s(str, time_params = {}, time_zone = DEFAULT_TIME_ZONE)
62
+ time_captures = /(\d+)\:(\d{1,2})(\:(\d{1,2}))?(\s(\d\d\d\d))?\z/.match(str).captures
63
+
64
+ time_params[:hour] ||= time_captures[0]
65
+ time_params[:min] ||= time_captures[1]
66
+ time_params[:sec] ||= time_captures[3]
67
+ time_params[:year] ||= time_captures[5]
68
+
69
+ self.new(time_params[:year], time_params[:hour], time_params[:min], time_params[:sec], time_zone)
70
+ end
71
+
72
+ def initialize(year, hour, min, sec, time_zone = nil)
73
+ @year = year.to_i
74
+ @hour = hour.to_i
75
+ @min = min.to_i
76
+ @sec = sec.to_i
77
+ @time_zone = time_zone || DEFAULT_TIME_ZONE
78
+ end
79
+
80
+ def to_time
81
+ reference_time = time_zone.local(year)
82
+ reference_time + time_in_seconds
83
+ end
84
+
85
+ def <=>(other)
86
+ to_time <=> other.to_time
87
+ end
88
+
89
+ def to_s
90
+ "#{year} #{hour}:#{min}:#{sec} #{time_zone.to_s}"
91
+ end
92
+
93
+ def to_short_s
94
+ "#{hour}:#{min}:#{sec}"
95
+ end
96
+
97
+ def time_in_seconds
98
+ hour * 3600 + min * 60 + sec
99
+ end
100
+
101
+ def add_seconds(sec)
102
+ self.class.from_seconds(self.time_in_seconds + sec, self.year, self.time_zone)
103
+ end
104
+
105
+ def to_decimal
106
+ hour.to_f + (min.to_f/60.0) + (sec.to_f/3600.0)
107
+ end
108
+
109
+ private
110
+
111
+ end
data/lib/man_merger.rb ADDED
@@ -0,0 +1,344 @@
1
+ require 'csv'
2
+
3
+ ## CHANGELOG
4
+ # 2173 Master file: change sp16,17,18 to *_rev
5
+
6
+ module ETL
7
+ class ManMerger
8
+ LIST_DIR = "/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_lists/"
9
+ T_DRIVE_DIRS = ["/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/", "/home/pwm4/Windows/tdrive/IPM/NSBRI_65d_Entrainment/"]
10
+ #T_DRIVE_DIR = "/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/"
11
+ EPOCH_LENGTH = 30
12
+
13
+ def merge_files
14
+ subject_list = load_subject_list
15
+ subject_list.each do |subject_code, file_list|
16
+ merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
17
+ merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
18
+ MY_LOG.info "---- #{subject_code}"
19
+
20
+ previous_first_labtime = nil
21
+ previous_last_labtime = nil
22
+ subject_year = get_subject_year(file_list)
23
+
24
+ file_list.each do |file_hash|
25
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
26
+ matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1
27
+
28
+ ## Validate File List
29
+ if matched_files.length != 1
30
+ raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
31
+ else
32
+ man_file_path = matched_files[0]
33
+ end
34
+
35
+ man_file = File.open(man_file_path)
36
+ LOADER_LOGGER.info "--- Loading #{man_file_path}"
37
+ file_info = {}
38
+
39
+
40
+ ## Ignore Corrupted Files
41
+ #next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"
42
+
43
+ # Date from file name
44
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
45
+ file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)
46
+
47
+ # read file
48
+ lines = man_file.readlines("\r")
49
+ # delete possible empty last line
50
+ lines.pop if lines.last.blank?
51
+
52
+ # get file first and last times
53
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
54
+ file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
55
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
56
+ file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
57
+
58
+ # validate first/last times
59
+ if file_hash[:start_time] != file_info[:first_time]
60
+ MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
61
+ end
62
+ if file_hash[:last_line_time] != file_info[:last_time]
63
+ MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
64
+ end
65
+ if file_hash[:last_line_number] != lines.length
66
+ MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
67
+ end
68
+
69
+ ##
70
+ # VALIDATION
71
+ file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
72
+ file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)
73
+
74
+ start_realtime = file_hash[:start_labtime].to_time
75
+ last_line_realtime = file_hash[:last_line_labtime].to_time
76
+
77
+ first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
78
+ last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])
79
+
80
+ file_info[:first_labtime] = Labtime.parse(first_realtime)
81
+ file_info[:last_labtime] = Labtime.parse(last_realtime)
82
+ predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)
83
+
84
+ sep = false
85
+ if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
86
+ MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
87
+ sep = true
88
+ end
89
+
90
+ # These checks fail if DST TRANSITION HAPPENS
91
+ if last_line_realtime.dst? == start_realtime.dst?
92
+ if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
93
+ MY_LOG.error "---- LAST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
94
+ sep = true
95
+ end
96
+ if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
97
+ MY_LOG.error "---- PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
98
+ sep = true
99
+ end
100
+ end
101
+
102
+ if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
103
+ MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
104
+ sep = true
105
+ end
106
+
107
+ unless previous_first_labtime.nil? or previous_last_labtime.nil?
108
+ MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
109
+ end
110
+
111
+ raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
112
+ raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec
113
+
114
+ MY_LOG.info "-----------------------------------\n\n" if sep
115
+
116
+ last_labtime = nil
117
+ ibob_flag = 0
118
+
119
+ lines.each_with_index do |line, line_number|
120
+ #merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
121
+ =begin
122
+ sleep man file:
123
+ 0 undef/unscored
124
+ 1 stage 1
125
+ 2 stage 2
126
+ 3 stage 3
127
+ 4 stage 4
128
+ 5 wake
129
+ 6 REM
130
+ 7 MVT
131
+ 8 LOff and LOn
132
+
133
+ wake man file:
134
+ 0 undef/un
135
+ cored
136
+ 1 stage 1
137
+ 2 stage 2
138
+ 3 stage 3
139
+ 4 stage 4
140
+ 5 wake
141
+ 6 REM
142
+ 7 MVT
143
+ 8 SEM
144
+ =end
145
+
146
+
147
+ line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
148
+ line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i
149
+
150
+ # Sleep Period Coding:
151
+ # 1 Sleep Onset (Lights Off) (IN BED)
152
+ # 2 Sleep Offset (Lights On) (OUT OF BED)
153
+ if file_hash[:type] == :sleep and line_code == 8
154
+ if ibob_flag == 0
155
+ sleep_period = 1
156
+ ibob_flag = 1
157
+ else
158
+ sleep_period = 2
159
+ ibob_flag = 0
160
+ end
161
+ else
162
+ sleep_period = nil
163
+ end
164
+
165
+ # Sleep Stage Coding:
166
+ # 1 stage 1
167
+ # 2 stage 2
168
+ # 3 stage 3
169
+ # 4 stage 4
170
+ # 6 MT
171
+ # 7 Undef
172
+ # 5 REM
173
+ # 9 Wake
174
+ if line_code >= 1 and line_code <= 4
175
+ line_event = line_code
176
+ elsif line_code == 0
177
+ line_event = 7
178
+ elsif line_code == 5 or line_code == 8
179
+ line_event = 9
180
+ elsif line_code == 6
181
+ line_event = 5
182
+ elsif line_code == 7
183
+ line_event = 6
184
+ else
185
+ raise StandardError, "Cannot map the following event: #{line_code}"
186
+ end
187
+
188
+ # SEM Event Coding:
189
+ # 1 Slow Eye Movement
190
+ # 0 No Slow Eye Movement
191
+ if file_hash[:type] == :wake and line_code == 8
192
+ sem_event = 1
193
+ else
194
+ sem_event = 0
195
+ end
196
+
197
+ last_labtime = line_labtime
198
+
199
+ output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
200
+ merged_file << output_line
201
+ end
202
+
203
+
204
+ previous_first_labtime = file_info[:first_labtime]
205
+ previous_last_labtime = last_labtime
206
+
207
+ end
208
+ merged_file.close
209
+ MY_LOG.info "---- end #{subject_code}\n\n"
210
+
211
+ end
212
+ end
213
+
214
+ def load_subject_list
215
+ subject_info = {}
216
+ Dir.foreach(LIST_DIR) do |file|
217
+ next if file == '.' or file == '..'
218
+ #MY_LOG.info "#{file}"
219
+ csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})
220
+
221
+ # Match and Validate File Name
222
+ matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
223
+ if matched_sc
224
+ subject_code = matched_sc[1].upcase
225
+ else
226
+ next
227
+ end
228
+
229
+ subject_info[subject_code] = []
230
+ csv_file.each do |row|
231
+ file_info = {}
232
+ pattern = /(.*)\.man/i.match(row[0])
233
+
234
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
235
+ if matched_time
236
+ file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
237
+ else
238
+ MY_LOG.error "No Valid Start Time Found: #{row}"
239
+ next
240
+ end
241
+
242
+ matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
243
+ if matched_time
244
+ file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
245
+ else
246
+ MY_LOG.error "No Valid End Time Found: #{row}"
247
+ next
248
+ end
249
+
250
+ file_info[:start_labtime] = row[2].to_f
251
+ file_info[:last_line_number] = row[3].to_i
252
+ file_info[:last_line_labtime] = row[5].to_f
253
+
254
+ if pattern
255
+ file_info[:pattern] = pattern[1]
256
+ subject_info[subject_code] << file_info
257
+
258
+ # Determine if sleep or wake file
259
+ raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
260
+ raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))
261
+
262
+ if /_sp?\d/i.match(pattern[1])
263
+ file_info[:type] = :sleep
264
+ elsif /_wp?\d/i.match(pattern[1])
265
+ file_info[:type] = :wake
266
+ else
267
+ raise StandardError, "Didn't match any SP/WP..."
268
+ end
269
+ else
270
+ MY_LOG.info "No Valid File Name Found: #{row}"
271
+ next
272
+ end
273
+ end
274
+ #MY_LOG.info subject_info[subject_code]
275
+ end
276
+ #MY_LOG.info subject_info.inspect
277
+ subject_info
278
+ end
279
+
280
+ def get_subject_year(file_list)
281
+ years = file_list.map do |h|
282
+ matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
283
+ matched_date ? matched_date[3] : nil
284
+ end
285
+ years.delete_if {|x| x.nil? }
286
+ years = years.uniq
287
+
288
+ raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
289
+ year = years.first.to_i
290
+ year > 30 ? year + 1900 : year + 2000
291
+ end
292
+ end
293
+
294
+
295
+ end
296
+
297
+ =begin
298
+ path: /home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/
299
+ path: /usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/file_list
300
+
301
+ file list:
302
+ subject_code ,start time, labtime, last line,last line time,labtime,,,,check,gap
303
+
304
+ sleep man file:
305
+ 0 undef/unscored
306
+ 1 stage 1
307
+ 2 stage 2
308
+ 3 stage 3
309
+ 4 stage 4
310
+ 5 wake
311
+ 6 REM
312
+ 7 MVT
313
+ 8 LOff and LOn
314
+
315
+ wake man file:
316
+ 0 undef/unscored
317
+ 1 stage 1
318
+ 2 stage 2
319
+ 3 stage 3
320
+ 4 stage 4
321
+ 5 wake
322
+ 6 REM
323
+ 7 MVT
324
+ 8 SEM
325
+
326
+
327
+ sleep stage 8 should be coded as Wake with a SEM
328
+ 5 is Wake
329
+ 1-4 is Sleep stage 1-4
330
+ 7 is REM
331
+ 8 is Wake with SEM plus LOff and Lon
332
+
333
+
334
+ mapping:
335
+ 1 stage 1
336
+ 2 stage 2
337
+ 3 stage 3
338
+ 4 stage 4
339
+ 6 MT
340
+ 7 Undef
341
+ 5 REM
342
+ 9 Wake
343
+ =end
344
+
@@ -0,0 +1,276 @@
1
+ require 'csv'
2
+ require 'man_merger'
3
+ require 'labtime'
4
+
5
+ Time.zone = 'Eastern Time (US & Canada)'
6
+
7
+ class TasciMerger
8
+
9
+ def initialize(subject_code, tasci_directory, output_directory)
10
+ @subject_code = subject_code.chomp
11
+ @tasci_directory = tasci_directory.chomp
12
+ @output_directory = output_directory.chomp
13
+ @master_file_path
14
+ end
15
+
16
+ def create_master_list
17
+ master_file_name = File.join(@output_directory, "tasci_master_#{Time.zone.now.strftime("%Y%m%d")}.csv")
18
+ master_file = CSV.open(master_file_name, "wb")
19
+
20
+ master_file << %w(file_name file_labtime file_full_time total_epochs start_labtime end_labtime)
21
+ master_file_contents = []
22
+
23
+ puts @tasci_directory
24
+ puts File.exists?(@tasci_directory)
25
+
26
+ Dir.foreach(@tasci_directory) do |file|
27
+ next if file == '.' or file == '..'
28
+
29
+ puts file
30
+
31
+ tasci_file = File.open(File.join(@tasci_directory, file))
32
+ file_info = {}
33
+
34
+ ## HEADER INFO
35
+ # Header Line
36
+ tasci_file.readline
37
+
38
+ # File Name
39
+ read_line = tasci_file.readline
40
+ matched_name = /\W*File name \|\W*(.*\.vpd)/i.match(read_line)
41
+ puts "ERROR: #{read_line}" unless matched_name
42
+ file_info[:source_file_name] = matched_name[1]
43
+
44
+ # Record Date
45
+ read_line = tasci_file.readline
46
+ matched_date = /RecordDate\W*\|\W*(..)\/(..)\/(....)\W*\|.*/.match(read_line)
47
+ puts "ERROR: #{read_line}" unless matched_date
48
+ #MY_LOG.info "matched_date: #{matched_date[3]} #{matched_date[1]} #{matched_date[2]}"
49
+ file_info[:record_date] = (matched_date ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i) : nil)
50
+
51
+ # Record Time
52
+ read_line = tasci_file.readline
53
+ matched_time = /RecordTime\W*\|\W*(..):(..):(..)\W*\|\W*Patient ID\W*\|\W*.*\W*\|/.match(read_line)
54
+ puts "ERROR: #{read_line}" unless matched_time
55
+ file_info[:record_full_time] = ((matched_time and matched_date) ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i, matched_time[1].to_i, matched_time[2].to_i, matched_time[3].to_i) : nil)
56
+ file_info[:record_labtime] = Labtime.parse(file_info[:record_full_time])
57
+
58
+ 6.times do
59
+ tasci_file.readline
60
+ end
61
+
62
+ # Epochs and duration
63
+ read_line = tasci_file.readline
64
+ matched_line = /\W*# Epochs\W*\|\W*(\d+)\W*\|\W*Duration\(S\)\W*\|\W*(\d+)\|/.match(read_line)
65
+ puts "ERROR: #{read_line}" unless matched_line
66
+ file_info[:epochs] = matched_line[1].to_i - 1
67
+ file_info[:epoch_duration] = matched_line[2].to_i
68
+
69
+ 5.times do
70
+ tasci_file.readline
71
+ end
72
+
73
+
74
+ first_labtime = nil
75
+ last_labtime = nil
76
+
77
+ until tasci_file.eof?
78
+ line = tasci_file.readline
79
+
80
+ matched_line = /(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d\d):(\d\d):(\d\d)\|\W*(.+)\|\W*(.+)\|/.match(line)
81
+ fields = matched_line.to_a
82
+ fields.delete_at(0)
83
+
84
+ raise StandardError, "fields should have 9 fields: #{fields.length} #{fields} #{line}" unless fields.length == 9
85
+
86
+ # Calculating labtime is tricky - file may span two days
87
+ calculated_line_time = file_info[:record_full_time] + fields[1].to_i.hours + fields[2].to_i.minutes + fields[3].to_i.seconds
88
+ if calculated_line_time.hour == fields[4].to_i and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
89
+ line_time = calculated_line_time
90
+ line_labtime = Labtime.parse(line_time)
91
+ elsif file_info[:record_full_time].dst? != calculated_line_time.dst?
92
+ if (calculated_line_time.hour - fields[4].to_i).abs == 1 and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
93
+ line_time = calculated_line_time
94
+ line_labtime = Labtime.parse(line_time)
95
+ else
96
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
97
+ end
98
+ else
99
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
100
+ end
101
+
102
+ first_labtime = line_labtime if first_labtime.nil?
103
+ last_labtime = line_labtime
104
+
105
+ #MY_LOG.info fields
106
+ end
107
+
108
+ master_file_contents << [file, file_info[:record_labtime].to_short_s, file_info[:record_full_time], file_info[:epochs], first_labtime.to_decimal, last_labtime.to_decimal]
109
+ end
110
+
111
+ master_file_contents.sort! {|x, y| x[4] <=> y[4] }
112
+ master_file_contents.each { |row| master_file << row }
113
+
114
+ puts "Created master file: #{master_file.path}"
115
+ @master_file_path = master_file.path
116
+ master_file_name
117
+ end
118
+
119
+ def merge_files
120
+ raise StandardError, "No master file path set! You must run create_master_list before running this function." unless @master_file_path
121
+
122
+ merged_file = CSV.open(File.join(@output_directory, "#{@subject_code}_merged_#{Time.zone.now.strftime("%Y%m%d")}.csv"), "wb")
123
+ merged_file << %w(SUBJECT_CODE FILE_NAME_SLEEP_WAKE_EPISODE LABTIME SLEEP_STAGE LIGHTS_OFF_ON_FLAG SEM_FLAG)
124
+
125
+ previous_first_labtime = nil
126
+ previous_last_labtime = nil
127
+
128
+ CSV.foreach(@master_file_path, headers: true) do |row|
129
+ puts row
130
+ tasci_file_path = File.join(@tasci_directory, row[0])
131
+
132
+ tasci_file = File.open(tasci_file_path)
133
+ file_info = {}
134
+
135
+ ## HEADER INFO
136
+ # Header Line
137
+ tasci_file.readline
138
+
139
+ # File Name
140
+ read_line = tasci_file.readline
141
+ matched_name = /\W*File name \|\W*(.*\.vpd)/i.match(read_line)
142
+ puts "ERROR: #{read_line}" unless matched_name
143
+ file_info[:source_file_name] = matched_name[1]
144
+
145
+ # Record Date
146
+ read_line = tasci_file.readline
147
+ matched_date = /RecordDate\W*\|\W*(..)\/(..)\/(....)\W*\|.*/.match(read_line)
148
+ puts "ERROR: #{read_line}" unless matched_date
149
+ #MY_LOG.info "matched_date: #{matched_date[3]} #{matched_date[1]} #{matched_date[2]}"
150
+ file_info[:record_date] = (matched_date ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i) : nil)
151
+
152
+ # Record Time
153
+ read_line = tasci_file.readline
154
+ matched_time = /RecordTime\W*\|\W*(..):(..):(..)\W*\|\W*Patient ID\W*\|\W*.*_.*_(\w*)\W*\|/.match(read_line)
155
+ puts "ERROR: #{read_line}" unless matched_time
156
+ file_info[:record_full_time] = ((matched_time and matched_date) ? Time.zone.local(matched_date[3].to_i, matched_date[2].to_i, matched_date[1].to_i, matched_time[1].to_i, matched_time[2].to_i, matched_time[3].to_i) : nil)
157
+ file_info[:record_labtime] = Labtime.parse(file_info[:record_full_time])
158
+ file_info[:sleep_wake_episode] = matched_time[4]
159
+
160
+ 6.times do
161
+ tasci_file.readline
162
+ end
163
+
164
+ # Epochs and duration
165
+ read_line = tasci_file.readline
166
+ matched_line = /\W*# Epochs\W*\|\W*(\d+)\W*\|\W*Duration\(S\)\W*\|\W*(\d+)\|/.match(read_line)
167
+ puts "ERROR: #{read_line}" unless matched_line
168
+ file_info[:epochs] = matched_line[1].to_i
169
+ file_info[:epoch_duration] = matched_line[2].to_i
170
+
171
+ 5.times do
172
+ tasci_file.readline
173
+ end
174
+
175
+ first_labtime = nil
176
+ last_labtime = nil
177
+
178
+ until tasci_file.eof?
179
+ line = tasci_file.readline
180
+
181
+ matched_line = /(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d+)\|\W*(\d\d):(\d\d):(\d\d)\|\W*(.+)\|\W*(.+)\|/.match(line)
182
+ fields = matched_line.to_a
183
+ fields.delete_at(0)
184
+
185
+ raise StandardError, "fields should have 9 fields: #{fields.length} #{fields} #{line}" unless fields.length == 9
186
+
187
+ # Calculating labtime is tricky - file may span two days
188
+ calculated_line_time = file_info[:record_full_time] + fields[1].to_i.hours + fields[2].to_i.minutes + fields[3].to_i.seconds
189
+ if calculated_line_time.hour == fields[4].to_i and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
190
+ line_time = calculated_line_time
191
+ line_labtime = Labtime.parse(line_time)
192
+ elsif file_info[:record_full_time].dst? != calculated_line_time.dst?
193
+ if (calculated_line_time.hour - fields[4].to_i).abs == 1 and calculated_line_time.min == fields[5].to_i and calculated_line_time.sec == fields[6].to_i
194
+ line_time = calculated_line_time
195
+ line_labtime = Labtime.parse(line_time)
196
+ else
197
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
198
+ end
199
+ else
200
+ raise StandardError, "Times DO NOT MATCH IN TASCI FILE #{File.basename(tasci_file_path)}!!! #{calculated_line_time.to_s} #{fields[4]} #{fields[5]} #{fields[6]}"
201
+ end
202
+
203
+ # Sleep Period Coding:
204
+ # 1 Sleep Onset (Lights Off)
205
+ # 2 Sleep Offset (Lights On)
206
+ if /Lights Off/i.match(fields[7]) # Sleep Onset
207
+ sleep_period = 1
208
+ elsif /Lights On/i.match(fields[7]) # Sleep Offset
209
+ sleep_period = 2
210
+ else
211
+ sleep_period = nil
212
+ end
213
+
214
+ # Sleep Stage Coding:
215
+ # 1 stage 1
216
+ # 2 stage 2
217
+ # 3 stage 3
218
+ # 4 stage 4
219
+ # 6 MT
220
+ # 7 Undef
221
+ # 5 REM
222
+ # 9 Wake
223
+ line_event = nil
224
+ if fields[8] == "Wake"
225
+ line_event = 9
226
+ elsif fields[8] == "Undefined"
227
+ line_event = 7
228
+ elsif fields[8] == "N1"
229
+ line_event = 1
230
+ elsif fields[8] == "N2"
231
+ line_event = 2
232
+ elsif fields[8] == "N3"
233
+ line_event = 3
234
+ elsif fields[8] == "4"
235
+ line_event = 4
236
+ elsif fields[8] == "REM"
237
+ line_event = 5
238
+ elsif fields[8] == "MVT"
239
+ line_event = 6
240
+ else
241
+ raise StandardError, "Cannot map the following event: #{fields[8]}"
242
+ end
243
+
244
+ # SEM Event Coding:
245
+ # 1 Slow Eye Movement
246
+ # 0 No Slow Eye Movement
247
+ sem_event = (fields[7] =~ /SEM/ ? 1 : 0)
248
+
249
+ # Previous Effort:
250
+ #line_time = Time.zone.local(file_info[:record_full_time].year, file_info[:record_full_time].month, file_info[:record_full_time].day, fields[4].to_i, fields[5].to_i, fields[6].to_i)
251
+ #line_labtime = Labtime.parse(line_time)
252
+
253
+ first_labtime = line_labtime if first_labtime.nil?
254
+ last_labtime = line_labtime
255
+
256
+ output_line = [@subject_code.upcase, file_info[:sleep_wake_episode], line_labtime.to_decimal, line_event, sleep_period, sem_event]
257
+ merged_file << output_line
258
+
259
+
260
+ #MY_LOG.info fields
261
+ end
262
+
263
+
264
+ unless previous_first_labtime.nil? or previous_last_labtime.nil?
265
+ puts "Start time is before previous end labtime: #{previous_last_labtime.to_short_s} #{first_labtime.to_short_s}" if first_labtime < previous_last_labtime
266
+ end
267
+
268
+
269
+ previous_first_labtime = first_labtime
270
+ previous_last_labtime = last_labtime
271
+ end
272
+ merged_file.close
273
+ end
274
+
275
+ end
276
+
@@ -0,0 +1,21 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'tasci_merger'
3
+ s.version = '0.0.0'
4
+ s.date = '2015-02-06'
5
+ s.summary = "Merger utility for TASCI scored sleep files."
6
+ s.description = "Merger utility for TASCI scored sleep files, built for the Division of Sleep and Circadian Disorders at BWH."
7
+ s.authors = ["Piotr Mankowski"]
8
+ s.email = 'pmankowski@partners.org'
9
+ s.files = %w(LICENSE README.md tasci_merger.gemspec lib/tasci_merger.rb lib/man_merger.rb lib/labtime.rb)
10
+ s.require_path = 'lib'
11
+ s.homepage =
12
+ 'https://github.com/pmanko/tasci_merger'
13
+ s.license = 'MIT'
14
+ s.executables << 'merge_tasci'
15
+
16
+ s.required_ruby_version = '>= 2.1.0'
17
+
18
+ s.add_dependency "activesupport", '~> 4.2', '>= 4.2.0'
19
+
20
+ end
21
+
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tasci_merger
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Piotr Mankowski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '4.2'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 4.2.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '4.2'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 4.2.0
33
+ description: Merger utility for TASCI scored sleep files, built for the Division of
34
+ Sleep and Circadian Disorders at BWH.
35
+ email: pmankowski@partners.org
36
+ executables:
37
+ - merge_tasci
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - LICENSE
42
+ - README.md
43
+ - bin/merge_tasci
44
+ - lib/labtime.rb
45
+ - lib/man_merger.rb
46
+ - lib/tasci_merger.rb
47
+ - tasci_merger.gemspec
48
+ homepage: https://github.com/pmanko/tasci_merger
49
+ licenses:
50
+ - MIT
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 2.1.0
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.2.2
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Merger utility for TASCI scored sleep files.
72
+ test_files: []