brainmap-ImageData 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,356 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ require 'tempfile'
4
+ require 'yaml'
5
+ require 'tmpdir'
6
+ require 'fileutils'
7
+ require 'raw_image_file'
8
+ require 'raw_image_dataset'
9
+ require 'sqlite3'
10
+
11
+
12
+ # A shared function that displays a message and the date/time to standard output.
13
+ def flash(msg)
14
+ puts
15
+ puts "+" * 120
16
+ printf "\t%s\n", msg
17
+ printf "\t%s\n", Time.now
18
+ puts "+" * 120
19
+ puts
20
+ end
21
+
22
+
23
+
24
+ =begin rdoc
25
+ Encapsulates a directory of data acquired during one participant visit. These
26
+ are the raw data directories that are transfered directly from the scanners and
27
+ archived in the raw data section of the vtrak filesystem. After initializing, the
28
+ visit can be scanned to extract metadata for all of the images acquired during the
29
+ visit. The scanning is done in a fairly naive manner: the visit directory is recursively
30
+ walked and in each subdirectory any and all pfiles will be imported in addition to one single
31
+ dicom if any exist. Thus, only a single dicom file among many in a scan session is used to
32
+ retrieve information. checking the individual files for data integrity must be handled
33
+ elsewhere if at all.
34
+ =end
35
+ class VisitRawDataDirectory
36
+ # The absolute path of the visit directory, as a string.
37
+ attr_reader :visit_directory
38
+ # An array of :RawImageDataset objects acquired during this visit.
39
+ attr_reader :datasets
40
+ # Timestamp for this visit, obtained from the first :RawImageDataset
41
+ attr_reader :timestamp
42
+ # RMR number for this visit.
43
+ attr_reader :rmr_number
44
+ # scan_procedure name
45
+ attr_reader :scan_procedure_name
46
+ attr_accessor :db
47
+
48
+ # A new Visit instance needs to know the path to its raw data and scan_procedure name. The scan_procedure
49
+ # name must match a name in the database, if not a new scan_procedure entry will be inserted.
50
+ def initialize(directory, scan_procedure_name=nil)
51
+ raise(IOError, "Visit directory not found: #{directory}") unless File.exist?(File.expand_path(directory))
52
+ @visit_directory = File.expand_path(directory)
53
+ @working_directory = Dir.tmpdir
54
+ @datasets = Array.new
55
+ @timestamp = nil
56
+ @rmr_number = nil
57
+ @scan_procedure_name = scan_procedure_name.nil? ? get_scan_procedure_based_on_raw_directory : scan_procedure_name
58
+ @db = nil
59
+ end
60
+
61
+ # Recursively walks the filesystem inside the visit directory. At each subdirectory, any and all
62
+ # pfiles are scanned and imported in addition to one and only one dicom file. After scanning
63
+ # @datasets will hold an array of ImageDataset instances. Setting the rmr here can raise an
64
+ # exception if no valid rmr is found in the datasets, be prepared to catch it.
65
+ def scan
66
+ flash "Scanning visit raw data directory #{@visit_directory}"
67
+ d = Pathname.new(@visit_directory)
68
+ d.each_subdirectory do |dd|
69
+ dd.each_pfile { |pf| @datasets << import_dataset(pf, dd) }
70
+ dd.first_dicom { |fd| @datasets << import_dataset(fd, dd) }
71
+ end
72
+ @timestamp = get_visit_timestamp
73
+ @rmr_number = get_rmr_number
74
+ flash "Completed scanning #{@visit_directory}"
75
+ end
76
+
77
+ # Inserts each dataset in this visit into the specified database. The specifics
78
+ # of the database insert are handled by the #RawImageDataset class.
79
+ def db_insert!(db_file)
80
+ @db = SQLite3::Database.new(db_file)
81
+ @db.results_as_hash = true
82
+ @db.type_translation = true
83
+
84
+ begin
85
+ # checks scan_procedure in db, inserts if neccessary, returns id
86
+ scan_procedure_id = fetch_or_insert_scan_procedure
87
+
88
+ # insert or update visit as needed
89
+ if visit_is_new? # this is a new visit
90
+ visit_id = insert_new_visit(scan_procedure_id)
91
+ else # visit already exists in DB
92
+ visit_id = get_existing_visit_id
93
+ update_existing_visit(visit_id, scan_procedure_id)
94
+ end
95
+
96
+ # insert each dataset from the visit, also insert an entry in series descriptions table if necessary.
97
+ @datasets.each do |dataset|
98
+ update_series_descriptions_table(dataset.series_description)
99
+ if dataset_is_new?(dataset)
100
+ insert_new_dataset(dataset, visit_id)
101
+ else # dataset is already in DB
102
+ dataset_id = get_existing_dataset_id(dataset)
103
+ update_existing_dataset(dataset, dataset_id)
104
+ end
105
+ end
106
+ rescue Exception => e
107
+ puts e.message
108
+ ensure
109
+ @db.close
110
+ @db = nil
111
+ end
112
+ end
113
+
114
+ private
115
+
116
+ def get_existing_dataset_id(ds)
117
+ @db.execute(ds.db_fetch).first['id']
118
+ end
119
+
120
+ def update_existing_dataset(ds, ds_id)
121
+ @db.execute(ds.db_update(ds_id))
122
+ end
123
+
124
+ def insert_new_dataset(ds, v_id)
125
+ @db.execute(ds.db_insert(v_id))
126
+ end
127
+
128
+ def dataset_is_new?(ds)
129
+ @db.execute(ds.db_fetch).empty?
130
+ end
131
+
132
+ def visit_is_new?
133
+ @db.execute(sql_fetch_visit_matches).empty?
134
+ end
135
+
136
+ def update_series_descriptions_table(sd)
137
+ if @db.execute(sql_fetch_series_description(sd)).empty?
138
+ @db.execute(sql_insert_series_description(sd))
139
+ end
140
+ end
141
+
142
+ def insert_new_visit(p_id)
143
+ @db.execute(sql_insert_visit(p_id))
144
+ return @db.last_insert_row_id
145
+ end
146
+
147
+ def get_existing_visit_id
148
+ return @db.execute(sql_fetch_visit_matches).first['id']
149
+ end
150
+
151
+ def update_existing_visit(v_id, p_id)
152
+ @db.execute(sql_update_visit(v_id, p_id))
153
+ end
154
+
155
+ def fetch_or_insert_scan_procedure
156
+ # if the scan_procedure already exists in db use it, if not insert a new one
157
+ scan_procedure_matches = @db.execute(sql_fetch_scan_procedure_name)
158
+ if scan_procedure_matches.empty?
159
+ @db.execute(sql_insert_scan_procedure)
160
+ new_scan_procedure_id = @db.last_insert_row_id
161
+ end
162
+ return scan_procedure_matches.empty? ? new_scan_procedure_id : scan_procedure_matches.first['id']
163
+ end
164
+
165
+ def sql_update_visit(v_id, p_id)
166
+ # scan_procedure_id = '#{p_id.to_s}',
167
+ "UPDATE visits SET
168
+ date = '#{@timestamp.to_s}',
169
+ rmr = '#{@rmr_number}',
170
+ path = '#{@visit_directory}'
171
+ WHERE id = '#{v_id}'"
172
+ end
173
+
174
+ def sql_insert_scan_procedure
175
+ "INSERT INTO scan_procedures (codename) VALUES ('#{@scan_procedure_name}')"
176
+ end
177
+
178
+ def sql_insert_series_description(sd)
179
+ "INSERT INTO series_descriptions (long_description) VALUES ('#{sd}')"
180
+ end
181
+
182
+ def sql_fetch_visit_matches
183
+ "SELECT id FROM visits WHERE rmr == '#{@rmr_number}'"
184
+ end
185
+
186
+ def sql_fetch_scan_procedure_name
187
+ "SELECT * FROM scan_procedures WHERE codename = '#{@scan_procedure_name}'"
188
+ end
189
+
190
+ def sql_fetch_series_description(sd)
191
+ "SELECT * FROM series_descriptions WHERE long_description = '#{sd}'"
192
+ end
193
+
194
+ def sql_fetch_dataset_matches(ds)
195
+ "SELECT * FROM image_datasets WHERE rmr = '#{ds.rmr_number}' AND path = '#{ds.directory}' AND timestamp = '#{ds.timestamp}'"
196
+ end
197
+
198
+ # generates an sql insert statement to insert this visit with a given participant id
199
+ def sql_insert_visit(scan_procedure_id=0)
200
+ "INSERT INTO visits
201
+ (date, scan_procedure_id, scan_number, initials, rmr, radiology_outcome, notes, transfer_mri, transfer_pet,
202
+ transfer_behavioral_log, check_imaging, check_np, check_MR5_DVD, burn_DICOM_DVD, first_score, second_score,
203
+ enter_info_in_db, conference, compile_folder, dicom_dvd, user_id, path, created_at, updated_at)
204
+ VALUES
205
+ ('#{@timestamp.to_s}', '#{scan_procedure_id.to_s}', '', '', '#{@rmr_number}', 'no', '', 'no', 'no',
206
+ 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'no', NULL, '#{@visit_directory}', '#{DateTime.now}', '#{DateTime.now}')"
207
+ end
208
+
209
+ def import_dataset(rawfile, original_parent_directory)
210
+ puts "Importing scan session: #{original_parent_directory.to_s} using raw data file: #{rawfile.basename}"
211
+ return RawImageDataset.new(original_parent_directory.to_s, [RawImageFile.new(rawfile.to_s)])
212
+ end
213
+
214
+ def get_visit_timestamp
215
+ (@datasets.sort_by { |ds| ds.timestamp }).first.timestamp
216
+ end
217
+
218
+ # retrieves a valid rmr number from the visit's collection of datasets. Some datasets out there
219
+ # have "rmr not found" set in the rmr_number attribute because their header info is incomplete.
220
+ # Throws an Exception if no valid rmr is found
221
+ def get_rmr_number
222
+ @datasets.each do |ds|
223
+ return ds.rmr_number unless ds.rmr_number == "rmr not found"
224
+ end
225
+ raise(IOError, "No valid RMR number was found for this visit")
226
+ end
227
+
228
+ def get_scan_procedure_based_on_raw_directory
229
+ case @visit_directory
230
+ when /alz_2000.*_2$/
231
+ return 'johnson.alz.visit2'
232
+ when /alz_2000.*_3$/
233
+ return 'johnson.alz.visit3'
234
+ when /alz_2000.alz...$/
235
+ return 'johnson.alz.visit1'
236
+ when /alz_2000/
237
+ return 'johnson.alz.unk.visit'
238
+
239
+ when /tbi_1000.*_2$/
240
+ return 'johnson.tbi-1000.visit2'
241
+ when /tbi_1000.*_3$/
242
+ return 'johnson.tbi-1000.visit3'
243
+ when /tbi_1000.tbi...$/
244
+ return 'johnson.tbi-1000.visit1'
245
+ when /tbi_1000/
246
+ return 'johnson.tbi-1000.unk.visit'
247
+
248
+ when /tbi_aware.*_2$/
249
+ return 'johnson.tbi-aware.visit2'
250
+ when /tbi_aware.*_3$/
251
+ return 'johnson.tbi-aware.visit3'
252
+ when /tbi_aware.tbi...$/
253
+ return 'johnson.tbi-aware.visit1'
254
+ when /tbi_aware/
255
+ return 'johnson.tbi-aware.unk.visit'
256
+
257
+ when /johnson.tbi-va.visit1/
258
+ return 'johnson.tbi-va.visit1'
259
+
260
+ when /pib_pilot_mri/
261
+ return 'johnson.pibmripilot.visit1.uwmr'
262
+
263
+ when /wrap140/
264
+ return 'johnson.wrap140.visit1'
265
+
266
+ when /cms.uwmr/
267
+ return 'johnson.cms.visit1.uwmr'
268
+ when /cms.wais/
269
+ return 'johnson.cms.visit1.wais'
270
+
271
+ when /esprit.9month/
272
+ return 'carlsson.esprit.visit2.9month'
273
+ when /esprit.baseline/
274
+ return 'carlsson.esprit.visit1.baseline'
275
+
276
+ when /gallagher_pd/
277
+ return 'gallagher.pd.visit1'
278
+
279
+ when /pc_4000/
280
+ return 'johnson.pc4000.visit1'
281
+
282
+ when /ries.aware.visit1/
283
+ return 'ries.aware.visit1'
284
+
285
+ else
286
+ return 'unknown.scan_procedure'
287
+ end
288
+ end
289
+
290
+ end
291
+
292
+
293
+
294
+
295
+
296
+ class Pathname
297
+ MIN_PFILE_SIZE = 10_000_000
298
+
299
+ def each_subdirectory
300
+ each_entry do |leaf|
301
+ next if leaf.to_s =~ /^\./
302
+ branch = self + leaf
303
+ next if not branch.directory?
304
+ next if branch.symlink?
305
+ branch.each_subdirectory { |subbranch| yield subbranch }
306
+ yield branch
307
+ end
308
+ end
309
+
310
+ def each_pfile(min_file_size = MIN_PFILE_SIZE)
311
+ entries.each do |leaf|
312
+ next unless leaf.to_s =~ /^P.*\.7|^P.*\.7\.bz2/
313
+ branch = self + leaf
314
+ next if branch.symlink?
315
+ if branch.size >= min_file_size
316
+ lc = branch.local_copy
317
+ begin
318
+ yield lc
319
+ rescue
320
+ # Do nothing
321
+ ensure
322
+ lc.delete
323
+ end
324
+ end
325
+ end
326
+ end
327
+
328
+ def first_dicom
329
+ entries.each do |leaf|
330
+ branch = self + leaf
331
+ if leaf.to_s =~ /^I\.|\.dcm(\.bz2)?$|\.0[0-9]+(\.bz2)?$/
332
+ lc = branch.local_copy
333
+ begin
334
+ yield lc
335
+ rescue
336
+ # Do nothing
337
+ ensure
338
+ lc.delete
339
+ end
340
+ return
341
+ end
342
+ end
343
+ end
344
+
345
+ def local_copy
346
+ tfbase = self.to_s =~ /\.bz2$/ ? self.basename.to_s.chomp(".bz2") : self.basename.to_s
347
+ tmpfile = File.join(Dir.tmpdir, tfbase)
348
+ if self.to_s =~ /\.bz2$/
349
+ `bunzip2 -k -c #{self.to_s} >> #{tmpfile}`
350
+ else
351
+ FileUtils.cp(self.to_s, tmpfile)
352
+ end
353
+ return Pathname.new(tmpfile)
354
+ end
355
+
356
+ end
@@ -0,0 +1,46 @@
1
+ # To change this template, choose Tools | Templates
2
+ # and open the template in the editor.
3
+
4
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
5
+
6
+ require 'test/unit'
7
+ require 'raw_image_dataset'
8
+ require 'raw_image_file'
9
+
10
+ class RawImageDatasetTest < Test::Unit::TestCase
11
+ DBFILE = 'fixtures/development.sqlite3'
12
+
13
+ def setup
14
+ @aa = RawImageFile.new('fixtures/I.001')
15
+ @bb = RawImageFile.new('fixtures/P27648.7')
16
+ @cc = RawImageFile.new('fixtures/P59392.7')
17
+ @dd = RawImageFile.new('fixtures/S4_EFGRE3D.0001')
18
+ @dset = RawImageDataset.new('/Data/home/kris/NetBeansProjects/ImageData/test/fixtures', [@aa,@bb,@cc,@dd])
19
+ end
20
+
21
+ def test_raw_image_files
22
+ assert_equal 4, @dset.raw_image_files.length
23
+ assert_equal '"I.*"', @dset.glob
24
+ assert_equal "SAG T2 W FSE 1.7 skip 0.3", @dset.series_description
25
+ assert_equal "ALZMRI002", @dset.rmr_number
26
+ assert_equal "2003-01-31T04:39:04+00:00", @dset.timestamp.to_s
27
+ assert_equal "ALZMRI002::2003-01-31T04:39:04+00:00", @dset.dataset_key
28
+ assert_equal "DELETE FROM image_datasets WHERE dataset_key = 'ALZMRI002::2003-01-31T04:39:04+00:00'", @dset.db_remove
29
+ end
30
+
31
+ def test_db_insertion
32
+ assert_raise IndexError do
33
+ @dset.db_insert!(DBFILE)
34
+ @dset.db_insert!(DBFILE)
35
+ end
36
+ end
37
+
38
+ def test_raw_image_insertion
39
+ @dset.db_insert_raw_images!(DBFILE)
40
+ end
41
+
42
+ def teardown
43
+ @dset.db_remove_raw_images!(DBFILE)
44
+ @dset.db_remove!(DBFILE)
45
+ end
46
+ end
@@ -0,0 +1,135 @@
1
+ # To change this template, choose Tools | Templates
2
+ # and open the template in the editor.
3
+
4
+ $:.unshift File.join(File.dirname(__FILE__),'..','lib')
5
+
6
+ require 'test/unit'
7
+ require 'raw_image_file'
8
+
9
+ class RawImageFileTest < Test::Unit::TestCase
10
+ def setup
11
+ @GEDicom = 'fixtures/I.001'
12
+ @DiDicom = 'fixtures/S4_EFGRE3D.0001'
13
+ @EarlyGEPfile = 'fixtures/P59392.7'
14
+ @LateGEPfile = 'fixtures/P27648.7'
15
+ @notafile = 'fixtures/XXX.XXX'
16
+ @ged = RawImageFile.new(@GEDicom)
17
+ @did = RawImageFile.new(@DiDicom)
18
+ @egep = RawImageFile.new(@EarlyGEPfile)
19
+ @lgep = RawImageFile.new(@LateGEPfile)
20
+ end
21
+
22
+ def test_gehdr_dicom_init
23
+ assert_nothing_raised do
24
+ RawImageFile.new(@GEDicom)
25
+ end
26
+ end
27
+ def test_dicomhdr_dicom_init
28
+ assert_nothing_raised do
29
+ RawImageFile.new(@DiDicom)
30
+ end
31
+ end
32
+ def test_early_gehdr_pfile_init
33
+ assert_nothing_raised do
34
+ RawImageFile.new(@EarlyGEPfile)
35
+ end
36
+ end
37
+ def test_late_gehdr_pfile_init
38
+ assert_nothing_raised do
39
+ RawImageFile.new(@LateGEPfile)
40
+ end
41
+ end
42
+ def test_nonfile_init
43
+ assert_raise IOError do
44
+ RawImageFile.new(@notafile)
45
+ end
46
+ end
47
+ def test_pfile?
48
+ assert !@ged.pfile?
49
+ assert !@did.pfile?
50
+ assert @egep.pfile?
51
+ assert @lgep.pfile?
52
+ end
53
+ def test_dicom?
54
+ assert @ged.dicom?
55
+ assert @did.dicom?
56
+ assert !@egep.dicom?
57
+ assert !@lgep.dicom?
58
+ end
59
+ def test_gehdr_dicom_values
60
+ assert_equal "I.001", @ged.filename
61
+ assert_equal "rdgehdr", @ged.hdr_reader
62
+ assert_equal "dicom", @ged.file_type
63
+ assert_equal "2003-01-31T05:02:54+00:00", @ged.timestamp.to_s
64
+ assert_equal "Andys3T", @ged.source
65
+ assert_equal "ALZMRI002", @ged.rmr_number
66
+ assert_equal 1.7, @ged.slice_thickness
67
+ assert_equal 0.3, @ged.slice_spacing
68
+ assert_equal 240.0, @ged.reconstruction_diameter
69
+ assert_equal 256, @ged.acquisition_matrix_x
70
+ assert_equal 256, @ged.acquisition_matrix_y
71
+ assert_equal 9.0, @ged.rep_time
72
+ assert_equal 2, @ged.bold_reps
73
+ end
74
+ def test_dicomhdr_dicom_values
75
+ assert_equal "S4_EFGRE3D.0001", @did.filename
76
+ assert_equal "dicom_hdr", @did.hdr_reader
77
+ assert_equal "dicom", @did.file_type
78
+ assert_equal "2006-11-16T10:59:23+00:00", @did.timestamp.to_s
79
+ assert_equal "Andys3T", @did.source
80
+ assert_equal "RMRRF2267", @did.rmr_number
81
+ assert_equal 1.2, @did.slice_thickness
82
+ assert_equal 1.2, @did.slice_spacing
83
+ assert_equal 240.0, @did.reconstruction_diameter
84
+ assert_equal 256, @did.acquisition_matrix_x
85
+ assert_equal 256, @did.acquisition_matrix_y
86
+ assert_equal 8.364, @did.rep_time
87
+ assert_equal 0, @did.bold_reps
88
+ end
89
+ def test_early_pfile_values
90
+ assert_equal "P59392.7", @egep.filename
91
+ assert_equal "rdgehdr", @egep.hdr_reader
92
+ assert_equal "pfile", @egep.file_type
93
+ assert_equal "2003-01-31T04:39:04+00:00", @egep.timestamp.to_s
94
+ assert_equal "Andys3T", @egep.source
95
+ assert_equal "ALZMRI002", @egep.rmr_number
96
+ assert_equal 4.0, @egep.slice_thickness
97
+ assert_equal 1.0, @egep.slice_spacing
98
+ assert_equal 240.0, @egep.reconstruction_diameter
99
+ assert_equal 64, @egep.acquisition_matrix_x
100
+ assert_equal 64, @egep.acquisition_matrix_y
101
+ assert_equal 1.999996, @egep.rep_time
102
+ assert_equal 124, @egep.bold_reps
103
+ end
104
+ def test_late_pfile_values
105
+ assert_equal "P27648.7", @lgep.filename
106
+ assert_equal "rdgehdr", @lgep.hdr_reader
107
+ assert_equal "pfile", @lgep.file_type
108
+ assert_equal "2006-11-16T04:35:02+00:00", @lgep.timestamp.to_s
109
+ assert_equal "Andys3T", @lgep.source
110
+ assert_equal "RMRRF2267", @lgep.rmr_number
111
+ assert_equal 4.0, @lgep.slice_thickness
112
+ assert_equal 1.0, @lgep.slice_spacing
113
+ assert_equal 240.0, @lgep.reconstruction_diameter
114
+ assert_equal 64, @lgep.acquisition_matrix_x
115
+ assert_equal 64, @lgep.acquisition_matrix_y
116
+ assert_equal 2.000010, @lgep.rep_time
117
+ assert_equal 124, @lgep.bold_reps
118
+ end
119
+
120
+ def test_db_insert
121
+ @ged.db_insert!('fixtures/development.sqlite3')
122
+ @did.db_insert!('fixtures/development.sqlite3')
123
+ @egep.db_insert!('fixtures/development.sqlite3')
124
+ @lgep.db_insert!('fixtures/development.sqlite3')
125
+ end
126
+
127
+
128
+
129
+ def teardown
130
+ @ged.db_remove!('fixtures/development.sqlite3')
131
+ @did.db_remove!('fixtures/development.sqlite3')
132
+ @egep.db_remove!('fixtures/development.sqlite3')
133
+ @lgep.db_remove!('fixtures/development.sqlite3')
134
+ end
135
+ end