brainmap-ImageData 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,356 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ require 'tempfile'
4
+ require 'yaml'
5
+ require 'tmpdir'
6
+ require 'fileutils'
7
+ require 'raw_image_file'
8
+ require 'raw_image_dataset'
9
+ require 'sqlite3'
10
+
11
+
12
+ # A shared function that displays a message and the date/time to standard output.
13
+ def flash(msg)
14
+ puts
15
+ puts "+" * 120
16
+ printf "\t%s\n", msg
17
+ printf "\t%s\n", Time.now
18
+ puts "+" * 120
19
+ puts
20
+ end
21
+
22
+
23
+
24
+ =begin rdoc
25
+ Encapsulates a directory of data acquired during one participant visit. These
26
+ are the raw data directories that are transfered directly from the scanners and
27
+ archived in the raw data section of the vtrak filesystem. After initializing, the
28
+ visit can be scanned to extract metadata for all of the images acquired during the
29
+ visit. The scanning is done in a fairly naive manner: the visit directory is recursively
30
+ walked and in each subdirectory any and all pfiles will be imported in addition to one single
31
+ dicom if any exist. Thus, only a single dicom file among many in a scan session is used to
32
+ retrieve information. checking the individual files for data integrity must be handled
33
+ elsewhere if at all.
34
+ =end
35
+ class VisitRawDataDirectory
36
+ # The absolute path of the visit directory, as a string.
37
+ attr_reader :visit_directory
38
+ # An array of :RawImageDataset objects acquired during this visit.
39
+ attr_reader :datasets
40
+ # Timestamp for this visit, obtained from the first :RawImageDataset
41
+ attr_reader :timestamp
42
+ # RMR number for this visit.
43
+ attr_reader :rmr_number
44
+ # scan_procedure name
45
+ attr_reader :scan_procedure_name
46
+ attr_accessor :db
47
+
48
+ # A new Visit instance needs to know the path to its raw data and scan_procedure name. The scan_procedure
49
+ # name must match a name in the database, if not a new scan_procedure entry will be inserted.
50
+ def initialize(directory, scan_procedure_name=nil)
51
+ raise(IOError, "Visit directory not found: #{directory}") unless File.exist?(File.expand_path(directory))
52
+ @visit_directory = File.expand_path(directory)
53
+ @working_directory = Dir.tmpdir
54
+ @datasets = Array.new
55
+ @timestamp = nil
56
+ @rmr_number = nil
57
+ @scan_procedure_name = scan_procedure_name.nil? ? get_scan_procedure_based_on_raw_directory : scan_procedure_name
58
+ @db = nil
59
+ end
60
+
61
+ # Recursively walks the filesystem inside the visit directory. At each subdirectory, any and all
62
+ # pfiles are scanned and imported in addition to one and only one dicom file. After scanning
63
+ # @datasets will hold an array of ImageDataset instances. Setting the rmr here can raise an
64
+ # exception if no valid rmr is found in the datasets, be prepared to catch it.
65
+ def scan
66
+ flash "Scanning visit raw data directory #{@visit_directory}"
67
+ d = Pathname.new(@visit_directory)
68
+ d.each_subdirectory do |dd|
69
+ dd.each_pfile { |pf| @datasets << import_dataset(pf, dd) }
70
+ dd.first_dicom { |fd| @datasets << import_dataset(fd, dd) }
71
+ end
72
+ @timestamp = get_visit_timestamp
73
+ @rmr_number = get_rmr_number
74
+ flash "Completed scanning #{@visit_directory}"
75
+ end
76
+
77
+ # Inserts each dataset in this visit into the specified database. The specifics
78
+ # of the database insert are handled by the #RawImageDataset class.
79
+ def db_insert!(db_file)
80
+ @db = SQLite3::Database.new(db_file)
81
+ @db.results_as_hash = true
82
+ @db.type_translation = true
83
+
84
+ begin
85
+ # checks scan_procedure in db, inserts if neccessary, returns id
86
+ scan_procedure_id = fetch_or_insert_scan_procedure
87
+
88
+ # insert or update visit as needed
89
+ if visit_is_new? # this is a new visit
90
+ visit_id = insert_new_visit(scan_procedure_id)
91
+ else # visit already exists in DB
92
+ visit_id = get_existing_visit_id
93
+ update_existing_visit(visit_id, scan_procedure_id)
94
+ end
95
+
96
+ # insert each dataset from the visit, also insert an entry in series descriptions table if necessary.
97
+ @datasets.each do |dataset|
98
+ update_series_descriptions_table(dataset.series_description)
99
+ if dataset_is_new?(dataset)
100
+ insert_new_dataset(dataset, visit_id)
101
+ else # dataset is already in DB
102
+ dataset_id = get_existing_dataset_id(dataset)
103
+ update_existing_dataset(dataset, dataset_id)
104
+ end
105
+ end
106
+ rescue Exception => e
107
+ puts e.message
108
+ ensure
109
+ @db.close
110
+ @db = nil
111
+ end
112
+ end
113
+
114
+ private
115
+
116
+ def get_existing_dataset_id(ds)
117
+ @db.execute(ds.db_fetch).first['id']
118
+ end
119
+
120
+ def update_existing_dataset(ds, ds_id)
121
+ @db.execute(ds.db_update(ds_id))
122
+ end
123
+
124
+ def insert_new_dataset(ds, v_id)
125
+ @db.execute(ds.db_insert(v_id))
126
+ end
127
+
128
+ def dataset_is_new?(ds)
129
+ @db.execute(ds.db_fetch).empty?
130
+ end
131
+
132
+ def visit_is_new?
133
+ @db.execute(sql_fetch_visit_matches).empty?
134
+ end
135
+
136
+ def update_series_descriptions_table(sd)
137
+ if @db.execute(sql_fetch_series_description(sd)).empty?
138
+ @db.execute(sql_insert_series_description(sd))
139
+ end
140
+ end
141
+
142
+ def insert_new_visit(p_id)
143
+ @db.execute(sql_insert_visit(p_id))
144
+ return @db.last_insert_row_id
145
+ end
146
+
147
+ def get_existing_visit_id
148
+ return @db.execute(sql_fetch_visit_matches).first['id']
149
+ end
150
+
151
+ def update_existing_visit(v_id, p_id)
152
+ @db.execute(sql_update_visit(v_id, p_id))
153
+ end
154
+
155
+ def fetch_or_insert_scan_procedure
156
+ # if the scan_procedure already exists in db use it, if not insert a new one
157
+ scan_procedure_matches = @db.execute(sql_fetch_scan_procedure_name)
158
+ if scan_procedure_matches.empty?
159
+ @db.execute(sql_insert_scan_procedure)
160
+ new_scan_procedure_id = @db.last_insert_row_id
161
+ end
162
+ return scan_procedure_matches.empty? ? new_scan_procedure_id : scan_procedure_matches.first['id']
163
+ end
164
+
165
+ def sql_update_visit(v_id, p_id)
166
+ # scan_procedure_id = '#{p_id.to_s}',
167
+ "UPDATE visits SET
168
+ date = '#{@timestamp.to_s}',
169
+ rmr = '#{@rmr_number}',
170
+ path = '#{@visit_directory}'
171
+ WHERE id = '#{v_id}'"
172
+ end
173
+
174
+ def sql_insert_scan_procedure
175
+ "INSERT INTO scan_procedures (codename) VALUES ('#{@scan_procedure_name}')"
176
+ end
177
+
178
+ def sql_insert_series_description(sd)
179
+ "INSERT INTO series_descriptions (long_description) VALUES ('#{sd}')"
180
+ end
181
+
182
+ def sql_fetch_visit_matches
183
+ "SELECT id FROM visits WHERE rmr == '#{@rmr_number}'"
184
+ end
185
+
186
+ def sql_fetch_scan_procedure_name
187
+ "SELECT * FROM scan_procedures WHERE codename = '#{@scan_procedure_name}'"
188
+ end
189
+
190
+ def sql_fetch_series_description(sd)
191
+ "SELECT * FROM series_descriptions WHERE long_description = '#{sd}'"
192
+ end
193
+
194
+ def sql_fetch_dataset_matches(ds)
195
+ "SELECT * FROM image_datasets WHERE rmr = '#{ds.rmr_number}' AND path = '#{ds.directory}' AND timestamp = '#{ds.timestamp}'"
196
+ end
197
+
198
+ # generates an sql insert statement to insert this visit with a given participant id
199
+ def sql_insert_visit(scan_procedure_id=0)
200
+ "INSERT INTO visits
201
+ (date, scan_procedure_id, scan_number, initials, rmr, radiology_outcome, notes, transfer_mri, transfer_pet,
202
+ transfer_behavioral_log, check_imaging, check_np, check_MR5_DVD, burn_DICOM_DVD, first_score, second_score,
203
+ enter_info_in_db, conference, compile_folder, dicom_dvd, user_id, path, created_at, updated_at)
204
+ VALUES
205
+ ('#{@timestamp.to_s}', '#{scan_procedure_id.to_s}', '', '', '#{@rmr_number}', 'no', '', 'no', 'no',
206
+ 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', NULL, '#{@visit_directory}', '#{DateTime.now}', '#{DateTime.now}')"
207
+ end
208
+
209
+ def import_dataset(rawfile, original_parent_directory)
210
+ puts "Importing scan session: #{original_parent_directory.to_s} using raw data file: #{rawfile.basename}"
211
+ return RawImageDataset.new(original_parent_directory.to_s, [RawImageFile.new(rawfile.to_s)])
212
+ end
213
+
214
+ def get_visit_timestamp
215
+ (@datasets.sort_by { |ds| ds.timestamp }).first.timestamp
216
+ end
217
+
218
+ # retrieves a valid rmr number from the visit's collection of datasets. Some datasets out there
219
+ # have "rmr not found" set in the rmr_number attribute because their header info is incomplete.
220
+ # Throws an Exception if no valid rmr is found
221
+ def get_rmr_number
222
+ @datasets.each do |ds|
223
+ return ds.rmr_number unless ds.rmr_number == "rmr not found"
224
+ end
225
+ raise(IOError, "No valid RMR number was found for this visit")
226
+ end
227
+
228
+ def get_scan_procedure_based_on_raw_directory
229
+ case @visit_directory
230
+ when /alz_2000.*_2$/
231
+ return 'johnson.alz.visit2'
232
+ when /alz_2000.*_3$/
233
+ return 'johnson.alz.visit3'
234
+ when /alz_2000.alz...$/
235
+ return 'johnson.alz.visit1'
236
+ when /alz_2000/
237
+ return 'johnson.alz.unk.visit'
238
+
239
+ when /tbi_1000.*_2$/
240
+ return 'johnson.tbi-1000.visit2'
241
+ when /tbi_1000.*_3$/
242
+ return 'johnson.tbi-1000.visit3'
243
+ when /tbi_1000.tbi...$/
244
+ return 'johnson.tbi-1000.visit1'
245
+ when /tbi_1000/
246
+ return 'johnson.tbi-1000.unk.visit'
247
+
248
+ when /tbi_aware.*_2$/
249
+ return 'johnson.tbi-aware.visit2'
250
+ when /tbi_aware.*_3$/
251
+ return 'johnson.tbi-aware.visit3'
252
+ when /tbi_aware.tbi...$/
253
+ return 'johnson.tbi-aware.visit1'
254
+ when /tbi_aware/
255
+ return 'johnson.tbi-aware.unk.visit'
256
+
257
+ when /johnson.tbi-va.visit1/
258
+ return 'johnson.tbi-va.visit1'
259
+
260
+ when /pib_pilot_mri/
261
+ return 'johnson.pibmripilot.visit1.uwmr'
262
+
263
+ when /wrap140/
264
+ return 'johnson.wrap140.visit1'
265
+
266
+ when /cms.uwmr/
267
+ return 'johnson.cms.visit1.uwmr'
268
+ when /cms.wais/
269
+ return 'johnson.cms.visit1.wais'
270
+
271
+ when /esprit.9month/
272
+ return 'carlsson.esprit.visit2.9month'
273
+ when /esprit.baseline/
274
+ return 'carlsson.esprit.visit1.baseline'
275
+
276
+ when /gallagher_pd/
277
+ return 'gallagher.pd.visit1'
278
+
279
+ when /pc_4000/
280
+ return 'johnson.pc4000.visit1'
281
+
282
+ when /ries.aware.visit1/
283
+ return 'ries.aware.visit1'
284
+
285
+ else
286
+ return 'unknown.scan_procedure'
287
+ end
288
+ end
289
+
290
+ end
291
+
292
+
293
+
294
+
295
+
296
+ class Pathname
297
+ MIN_PFILE_SIZE = 10_000_000
298
+
299
+ def each_subdirectory
300
+ each_entry do |leaf|
301
+ next if leaf.to_s =~ /^\./
302
+ branch = self + leaf
303
+ next if not branch.directory?
304
+ next if branch.symlink?
305
+ branch.each_subdirectory { |subbranch| yield subbranch }
306
+ yield branch
307
+ end
308
+ end
309
+
310
+ def each_pfile(min_file_size = MIN_PFILE_SIZE)
311
+ entries.each do |leaf|
312
+ next unless leaf.to_s =~ /^P.*\.7|^P.*\.7\.bz2/
313
+ branch = self + leaf
314
+ next if branch.symlink?
315
+ if branch.size >= min_file_size
316
+ lc = branch.local_copy
317
+ begin
318
+ yield lc
319
+ rescue
320
+ # Do nothing
321
+ ensure
322
+ lc.delete
323
+ end
324
+ end
325
+ end
326
+ end
327
+
328
+ def first_dicom
329
+ entries.each do |leaf|
330
+ branch = self + leaf
331
+ if leaf.to_s =~ /^I\.|\.dcm(\.bz2)?$|\.0[0-9]+(\.bz2)?$/
332
+ lc = branch.local_copy
333
+ begin
334
+ yield lc
335
+ rescue
336
+ # Do nothing
337
+ ensure
338
+ lc.delete
339
+ end
340
+ return
341
+ end
342
+ end
343
+ end
344
+
345
+ def local_copy
346
+ tfbase = self.to_s =~ /\.bz2$/ ? self.basename.to_s.chomp(".bz2") : self.basename.to_s
347
+ tmpfile = File.join(Dir.tmpdir, tfbase)
348
+ if self.to_s =~ /\.bz2$/
349
+ `bunzip2 -k -c #{self.to_s} >> #{tmpfile}`
350
+ else
351
+ FileUtils.cp(self.to_s, tmpfile)
352
+ end
353
+ return Pathname.new(tmpfile)
354
+ end
355
+
356
+ end
@@ -0,0 +1,46 @@
1
+ # To change this template, choose Tools | Templates
2
+ # and open the template in the editor.
3
+
4
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
5
+
6
+ require 'test/unit'
7
+ require 'raw_image_dataset'
8
+ require 'raw_image_file'
9
+
10
+ class RawImageDatasetTest < Test::Unit::TestCase
11
+ DBFILE = 'fixtures/development.sqlite3'
12
+
13
+ def setup
14
+ @aa = RawImageFile.new('fixtures/I.001')
15
+ @bb = RawImageFile.new('fixtures/P27648.7')
16
+ @cc = RawImageFile.new('fixtures/P59392.7')
17
+ @dd = RawImageFile.new('fixtures/S4_EFGRE3D.0001')
18
+ @dset = RawImageDataset.new('/Data/home/kris/NetBeansProjects/ImageData/test/fixtures', [@aa,@bb,@cc,@dd])
19
+ end
20
+
21
+ def test_raw_image_files
22
+ assert_equal 4, @dset.raw_image_files.length
23
+ assert_equal '"I.*"', @dset.glob
24
+ assert_equal "SAG T2 W FSE 1.7 skip 0.3", @dset.series_description
25
+ assert_equal "ALZMRI002", @dset.rmr_number
26
+ assert_equal "2003-01-31T04:39:04+00:00", @dset.timestamp.to_s
27
+ assert_equal "ALZMRI002::2003-01-31T04:39:04+00:00", @dset.dataset_key
28
+ assert_equal "DELETE FROM image_datasets WHERE dataset_key = 'ALZMRI002::2003-01-31T04:39:04+00:00'", @dset.db_remove
29
+ end
30
+
31
+ def test_db_insertion
32
+ assert_raise IndexError do
33
+ @dset.db_insert!(DBFILE)
34
+ @dset.db_insert!(DBFILE)
35
+ end
36
+ end
37
+
38
+ def test_raw_image_insertion
39
+ @dset.db_insert_raw_images!(DBFILE)
40
+ end
41
+
42
+ def teardown
43
+ @dset.db_remove_raw_images!(DBFILE)
44
+ @dset.db_remove!(DBFILE)
45
+ end
46
+ end
@@ -0,0 +1,135 @@
1
+ # To change this template, choose Tools | Templates
2
+ # and open the template in the editor.
3
+
4
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
5
+
6
+ require 'test/unit'
7
+ require 'raw_image_file'
8
+
9
+ class RawImageFileTest < Test::Unit::TestCase
10
+ def setup
11
+ @GEDicom = 'fixtures/I.001'
12
+ @DiDicom = 'fixtures/S4_EFGRE3D.0001'
13
+ @EarlyGEPfile = 'fixtures/P59392.7'
14
+ @LateGEPfile = 'fixtures/P27648.7'
15
+ @notafile = 'fixtures/XXX.XXX'
16
+ @ged = RawImageFile.new(@GEDicom)
17
+ @did = RawImageFile.new(@DiDicom)
18
+ @egep = RawImageFile.new(@EarlyGEPfile)
19
+ @lgep = RawImageFile.new(@LateGEPfile)
20
+ end
21
+
22
+ def test_gehdr_dicom_init
23
+ assert_nothing_raised do
24
+ RawImageFile.new(@GEDicom)
25
+ end
26
+ end
27
+ def test_dicomhdr_dicom_init
28
+ assert_nothing_raised do
29
+ RawImageFile.new(@DiDicom)
30
+ end
31
+ end
32
+ def test_early_gehdr_pfile_init
33
+ assert_nothing_raised do
34
+ RawImageFile.new(@EarlyGEPfile)
35
+ end
36
+ end
37
+ def test_late_gehdr_pfile_init
38
+ assert_nothing_raised do
39
+ RawImageFile.new(@LateGEPfile)
40
+ end
41
+ end
42
+ def test_nonfile_init
43
+ assert_raise IOError do
44
+ RawImageFile.new(@notafile)
45
+ end
46
+ end
47
+ def test_pfile?
48
+ assert !@ged.pfile?
49
+ assert !@did.pfile?
50
+ assert @egep.pfile?
51
+ assert @lgep.pfile?
52
+ end
53
+ def test_dicom?
54
+ assert @ged.dicom?
55
+ assert @did.dicom?
56
+ assert !@egep.dicom?
57
+ assert !@lgep.dicom?
58
+ end
59
+ def test_gehdr_dicom_values
60
+ assert_equal "I.001", @ged.filename
61
+ assert_equal "rdgehdr", @ged.hdr_reader
62
+ assert_equal "dicom", @ged.file_type
63
+ assert_equal "2003-01-31T05:02:54+00:00", @ged.timestamp.to_s
64
+ assert_equal "Andys3T", @ged.source
65
+ assert_equal "ALZMRI002", @ged.rmr_number
66
+ assert_equal 1.7, @ged.slice_thickness
67
+ assert_equal 0.3, @ged.slice_spacing
68
+ assert_equal 240.0, @ged.reconstruction_diameter
69
+ assert_equal 256, @ged.acquisition_matrix_x
70
+ assert_equal 256, @ged.acquisition_matrix_y
71
+ assert_equal 9.0, @ged.rep_time
72
+ assert_equal 2, @ged.bold_reps
73
+ end
74
+ def test_dicomhdr_dicom_values
75
+ assert_equal "S4_EFGRE3D.0001", @did.filename
76
+ assert_equal "dicom_hdr", @did.hdr_reader
77
+ assert_equal "dicom", @did.file_type
78
+ assert_equal "2006-11-16T10:59:23+00:00", @did.timestamp.to_s
79
+ assert_equal "Andys3T", @did.source
80
+ assert_equal "RMRRF2267", @did.rmr_number
81
+ assert_equal 1.2, @did.slice_thickness
82
+ assert_equal 1.2, @did.slice_spacing
83
+ assert_equal 240.0, @did.reconstruction_diameter
84
+ assert_equal 256, @did.acquisition_matrix_x
85
+ assert_equal 256, @did.acquisition_matrix_y
86
+ assert_equal 8.364, @did.rep_time
87
+ assert_equal 0, @did.bold_reps
88
+ end
89
+ def test_early_pfile_values
90
+ assert_equal "P59392.7", @egep.filename
91
+ assert_equal "rdgehdr", @egep.hdr_reader
92
+ assert_equal "pfile", @egep.file_type
93
+ assert_equal "2003-01-31T04:39:04+00:00", @egep.timestamp.to_s
94
+ assert_equal "Andys3T", @egep.source
95
+ assert_equal "ALZMRI002", @egep.rmr_number
96
+ assert_equal 4.0, @egep.slice_thickness
97
+ assert_equal 1.0, @egep.slice_spacing
98
+ assert_equal 240.0, @egep.reconstruction_diameter
99
+ assert_equal 64, @egep.acquisition_matrix_x
100
+ assert_equal 64, @egep.acquisition_matrix_y
101
+ assert_equal 1.999996, @egep.rep_time
102
+ assert_equal 124, @egep.bold_reps
103
+ end
104
+ def test_late_pfile_values
105
+ assert_equal "P27648.7", @lgep.filename
106
+ assert_equal "rdgehdr", @lgep.hdr_reader
107
+ assert_equal "pfile", @lgep.file_type
108
+ assert_equal "2006-11-16T04:35:02+00:00", @lgep.timestamp.to_s
109
+ assert_equal "Andys3T", @lgep.source
110
+ assert_equal "RMRRF2267", @lgep.rmr_number
111
+ assert_equal 4.0, @lgep.slice_thickness
112
+ assert_equal 1.0, @lgep.slice_spacing
113
+ assert_equal 240.0, @lgep.reconstruction_diameter
114
+ assert_equal 64, @lgep.acquisition_matrix_x
115
+ assert_equal 64, @lgep.acquisition_matrix_y
116
+ assert_equal 2.000010, @lgep.rep_time
117
+ assert_equal 124, @lgep.bold_reps
118
+ end
119
+
120
+ def test_db_insert
121
+ @ged.db_insert!('fixtures/development.sqlite3')
122
+ @did.db_insert!('fixtures/development.sqlite3')
123
+ @egep.db_insert!('fixtures/development.sqlite3')
124
+ @lgep.db_insert!('fixtures/development.sqlite3')
125
+ end
126
+
127
+
128
+
129
+ def teardown
130
+ @ged.db_remove!('fixtures/development.sqlite3')
131
+ @did.db_remove!('fixtures/development.sqlite3')
132
+ @egep.db_remove!('fixtures/development.sqlite3')
133
+ @lgep.db_remove!('fixtures/development.sqlite3')
134
+ end
135
+ end