metamri 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,395 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ require 'tempfile'
4
+ require 'yaml'
5
+ require 'tmpdir'
6
+ require 'fileutils'
7
+ require 'raw_image_file'
8
+ require 'raw_image_dataset'
9
+ require 'sqlite3'
10
+
11
+
12
+ # A shared function that displays a message and the date/time to standard output.
13
+ def flash(msg)
14
+ puts
15
+ puts "+" * 120
16
+ printf "\t%s\n", msg
17
+ printf "\t%s\n", Time.now
18
+ puts "+" * 120
19
+ puts
20
+ end
21
+
22
+
23
+
24
+ =begin rdoc
25
+ Encapsulates a directory of data acquired during one participant visit. These
26
+ are the raw data directories that are transfered directly from the scanners and
27
+ archived in the raw data section of the vtrak filesystem. After initializing, the
28
+ visit can be scanned to extract metadata for all of the images acquired during the
29
+ visit. The scanning is done in a fairly naive manner: the visit directory is recursively
30
+ walked and in each subdirectory any and all pfiles will be imported in addition to one single
31
+ dicom if any exist. Thus, only a single dicom file among many in a scan session is used to
32
+ retrieve information. checking the individual files for data integrity must be handled
33
+ elsewhere if at all.
34
+ =end
35
+ class VisitRawDataDirectory
36
+ # The absolute path of the visit directory, as a string.
37
+ attr_reader :visit_directory
38
+ # An array of :RawImageDataset objects acquired during this visit.
39
+ attr_reader :datasets
40
+ # Timestamp for this visit, obtained from the first :RawImageDataset
41
+ attr_reader :timestamp
42
+ # RMR number for this visit.
43
+ attr_reader :rmr_number
44
+ # scan_procedure name
45
+ attr_reader :scan_procedure_name
46
+ # scanner source
47
+ attr_reader :scanner_source
48
+ attr_accessor :db
49
+
50
+ # A new Visit instance needs to know the path to its raw data and scan_procedure name. The scan_procedure
51
+ # name must match a name in the database, if not a new scan_procedure entry will be inserted.
52
+ def initialize(directory, scan_procedure_name=nil)
53
+ raise(IOError, "Visit directory not found: #{directory}") unless File.exist?(File.expand_path(directory))
54
+ @visit_directory = File.expand_path(directory)
55
+ @working_directory = Dir.tmpdir
56
+ @datasets = Array.new
57
+ @timestamp = nil
58
+ @rmr_number = nil
59
+ @scan_procedure_name = scan_procedure_name.nil? ? get_scan_procedure_based_on_raw_directory : scan_procedure_name
60
+ @db = nil
61
+ end
62
+
63
+ # Recursively walks the filesystem inside the visit directory. At each subdirectory, any and all
64
+ # pfiles are scanned and imported in addition to one and only one dicom file. After scanning
65
+ # @datasets will hold an array of ImageDataset instances. Setting the rmr here can raise an
66
+ # exception if no valid rmr is found in the datasets, be prepared to catch it.
67
+ def scan
68
+ flash "Scanning visit raw data directory #{@visit_directory}"
69
+ d = Pathname.new(@visit_directory)
70
+ d.each_subdirectory do |dd|
71
+ begin
72
+ dd.each_pfile { |pf| @datasets << import_dataset(pf, dd) }
73
+ dd.first_dicom { |fd| @datasets << import_dataset(fd, dd) }
74
+ rescue Exception => e
75
+ raise(IndexError, "There was an error scaning dataset #{dd}: #{e}")
76
+ end
77
+ end
78
+
79
+ unless @datasets.size == 0
80
+ @timestamp = get_visit_timestamp
81
+ @rmr_number = get_rmr_number
82
+ @scanner_source = get_scanner_source
83
+ flash "Completed scanning #{@visit_directory}"
84
+ else
85
+ raise(IndexError, "No datasets could be scanned for directory #{@visit_directory}")
86
+ end
87
+ end
88
+
89
+ # use this to initialize Visit objects in the rails app
90
+ def attributes_for_active_record
91
+ {
92
+ :date => @timestamp.to_s,
93
+ :rmr => @rmr_number,
94
+ :path => @visit_directory,
95
+ :scanner_source => get_scanner_source
96
+ }
97
+ end
98
+
99
+ # Inserts each dataset in this visit into the specified database. The specifics
100
+ # of the database insert are handled by the #RawImageDataset class.
101
+ def db_insert!(db_file)
102
+ @db = SQLite3::Database.new(db_file)
103
+ @db.results_as_hash = true
104
+ @db.type_translation = true
105
+
106
+ begin
107
+ # checks scan_procedure in db, inserts if neccessary, returns id
108
+ scan_procedure_id = fetch_or_insert_scan_procedure
109
+
110
+ # insert or update visit as needed
111
+ if visit_is_new? # this is a new visit
112
+ visit_id = insert_new_visit(scan_procedure_id)
113
+ else # visit already exists in DB
114
+ visit_id = get_existing_visit_id
115
+ update_existing_visit(visit_id, scan_procedure_id)
116
+ end
117
+
118
+ # insert each dataset from the visit, also insert an entry in series descriptions table if necessary.
119
+ @datasets.each do |dataset|
120
+ update_series_descriptions_table(dataset.series_description)
121
+ if dataset_is_new?(dataset)
122
+ insert_new_dataset(dataset, visit_id)
123
+ else # dataset is already in DB
124
+ dataset_id = get_existing_dataset_id(dataset)
125
+ update_existing_dataset(dataset, dataset_id)
126
+ end
127
+ end
128
+ rescue Exception => e
129
+ puts e.message
130
+ ensure
131
+ @db.close
132
+ @db = nil
133
+ end
134
+ end
135
+
136
+ private
137
+
138
+ def get_existing_dataset_id(ds)
139
+ @db.execute(ds.db_fetch).first['id']
140
+ end
141
+
142
+ def update_existing_dataset(ds, ds_id)
143
+ @db.execute(ds.db_update(ds_id))
144
+ end
145
+
146
+ def insert_new_dataset(ds, v_id)
147
+ @db.execute(ds.db_insert(v_id))
148
+ end
149
+
150
+ def dataset_is_new?(ds)
151
+ @db.execute(ds.db_fetch).empty?
152
+ end
153
+
154
+ def visit_is_new?
155
+ @db.execute(sql_fetch_visit_matches).empty?
156
+ end
157
+
158
+ def update_series_descriptions_table(sd)
159
+ if @db.execute(sql_fetch_series_description(sd)).empty?
160
+ @db.execute(sql_insert_series_description(sd))
161
+ end
162
+ end
163
+
164
+ def insert_new_visit(p_id)
165
+ puts sql_insert_visit(p_id)
166
+ @db.execute(sql_insert_visit(p_id))
167
+ return @db.last_insert_row_id
168
+ end
169
+
170
+ def get_existing_visit_id
171
+ return @db.execute(sql_fetch_visit_matches).first['id']
172
+ end
173
+
174
+ def update_existing_visit(v_id, p_id)
175
+ puts sql_update_visit(v_id, p_id)
176
+ @db.execute(sql_update_visit(v_id, p_id))
177
+ end
178
+
179
+ def fetch_or_insert_scan_procedure
180
+ # if the scan_procedure already exists in db use it, if not insert a new one
181
+ scan_procedure_matches = @db.execute(sql_fetch_scan_procedure_name)
182
+ if scan_procedure_matches.empty?
183
+ @db.execute(sql_insert_scan_procedure)
184
+ new_scan_procedure_id = @db.last_insert_row_id
185
+ end
186
+ return scan_procedure_matches.empty? ? new_scan_procedure_id : scan_procedure_matches.first['id']
187
+ end
188
+
189
+ def sql_update_visit(v_id, p_id)
190
+ "UPDATE visits SET
191
+ date = '#{@timestamp.to_s}',
192
+ rmr = '#{@rmr_number}',
193
+ path = '#{@visit_directory}',
194
+ scan_procedure_id = '#{p_id.to_s}',
195
+ scanner_source = '#{@scanner_source}'
196
+ WHERE id = '#{v_id}'"
197
+ end
198
+
199
+ def sql_insert_scan_procedure
200
+ "INSERT INTO scan_procedures (codename) VALUES ('#{@scan_procedure_name}')"
201
+ end
202
+
203
+ def sql_insert_series_description(sd)
204
+ "INSERT INTO series_descriptions (long_description) VALUES ('#{sd}')"
205
+ end
206
+
207
+ def sql_fetch_visit_matches
208
+ "SELECT id FROM visits WHERE rmr == '#{@rmr_number}'"
209
+ end
210
+
211
+ def sql_fetch_scan_procedure_name
212
+ "SELECT * FROM scan_procedures WHERE codename = '#{@scan_procedure_name}'"
213
+ end
214
+
215
+ def sql_fetch_series_description(sd)
216
+ "SELECT * FROM series_descriptions WHERE long_description = '#{sd}'"
217
+ end
218
+
219
+ def sql_fetch_dataset_matches(ds)
220
+ "SELECT * FROM image_datasets WHERE rmr = '#{ds.rmr_number}' AND path = '#{ds.directory}' AND timestamp = '#{ds.timestamp}'"
221
+ end
222
+
223
+ # generates an sql insert statement to insert this visit with a given participant id
224
+ def sql_insert_visit(scan_procedure_id=0)
225
+ "INSERT INTO visits
226
+ (date, scan_procedure_id, scan_number, initials, rmr, radiology_outcome, notes, transfer_mri, transfer_pet,
227
+ conference, compile_folder, dicom_dvd, user_id, path, scanner_source, created_at, updated_at)
228
+ VALUES
229
+ ('#{@timestamp.to_s}', '#{scan_procedure_id.to_s}', '', '', '#{@rmr_number}', 'no', '', 'no', 'no',
230
+ 'no', 'no', 'no', NULL, '#{@visit_directory}', '#{@scanner_source}', '#{DateTime.now}', '#{DateTime.now}')"
231
+ end
232
+
233
+ def import_dataset(rawfile, original_parent_directory)
234
+ puts "Importing scan session: #{original_parent_directory.to_s} using raw data file: #{rawfile.basename}"
235
+
236
+ begin
237
+ rawimagefile = RawImageFile.new(rawfile.to_s)
238
+ rescue Exception => e
239
+ raise(IOError, "Trouble reading raw image file #{rawfile}. #{e}")
240
+ end
241
+
242
+ return RawImageDataset.new(original_parent_directory.to_s, [rawimagefile])
243
+ end
244
+
245
+ def get_visit_timestamp
246
+ (@datasets.sort_by { |ds| ds.timestamp }).first.timestamp
247
+ end
248
+
249
+ # retrieves a valid rmr number from the visit's collection of datasets. Some datasets out there
250
+ # have "rmr not found" set in the rmr_number attribute because their header info is incomplete.
251
+ # Throws an Exception if no valid rmr is found
252
+ def get_rmr_number
253
+ @datasets.each do |ds|
254
+ return ds.rmr_number unless ds.rmr_number == "rmr not found"
255
+ end
256
+ raise(IOError, "No valid RMR number was found for this visit")
257
+ end
258
+
259
+ # retrieves a scanner source from the collection of datasets, raises Exception of none is found
260
+ def get_scanner_source
261
+ @datasets.each do |ds|
262
+ return ds.scanner_source unless ds.scanner_source.nil?
263
+ end
264
+ raise(IOError, "No valid scanner source found for this visit")
265
+ end
266
+
267
+ def get_scan_procedure_based_on_raw_directory
268
+ case @visit_directory
269
+ when /alz_2000.*_2$/
270
+ return 'johnson.alz.visit2'
271
+ when /alz_2000.*_3$/
272
+ return 'johnson.alz.visit3'
273
+ when /alz_2000.alz...$/
274
+ return 'johnson.alz.visit1'
275
+ when /alz_2000/
276
+ return 'johnson.alz.unk.visit'
277
+
278
+ when /tbi_1000.*_2$/
279
+ return 'johnson.tbi-1000.visit2'
280
+ when /tbi_1000.*_3$/
281
+ return 'johnson.tbi-1000.visit3'
282
+ when /tbi_1000.tbi...$/
283
+ return 'johnson.tbi-1000.visit1'
284
+ when /tbi_1000/
285
+ return 'johnson.tbi-1000.unk.visit'
286
+
287
+ when /tbi_aware.*_2$/
288
+ return 'johnson.tbi-aware.visit2'
289
+ when /tbi_aware.*_3$/
290
+ return 'johnson.tbi-aware.visit3'
291
+ when /tbi_aware.tbi...$/
292
+ return 'johnson.tbi-aware.visit1'
293
+ when /tbi_aware/
294
+ return 'johnson.tbi-aware.unk.visit'
295
+
296
+ when /johnson.tbi-va.visit1/
297
+ return 'johnson.tbi-va.visit1'
298
+
299
+ when /pib_pilot_mri/
300
+ return 'johnson.pibmripilot.visit1.uwmr'
301
+
302
+ when /wrap140/
303
+ return 'johnson.wrap140.visit1'
304
+
305
+ when /cms.uwmr/
306
+ return 'johnson.cms.visit1.uwmr'
307
+ when /cms.wais/
308
+ return 'johnson.cms.visit1.wais'
309
+
310
+ when /esprit.9month/
311
+ return 'carlsson.esprit.visit2.9month'
312
+ when /esprit.baseline/
313
+ return 'carlsson.esprit.visit1.baseline'
314
+
315
+ when /gallagher_pd/
316
+ return 'gallagher.pd.visit1'
317
+
318
+ when /pc_4000/
319
+ return 'johnson.pc4000.visit1'
320
+
321
+ when /ries.aware.visit1/
322
+ return 'ries.aware.visit1'
323
+
324
+ else
325
+ return 'unknown.scan_procedure'
326
+ end
327
+ end
328
+
329
+ end
330
+
331
+
332
+
333
+
334
+
335
+ class Pathname
336
+ MIN_PFILE_SIZE = 10_000_000
337
+
338
+ def each_subdirectory
339
+ each_entry do |leaf|
340
+ next if leaf.to_s =~ /^\./
341
+ branch = self + leaf
342
+ next if not branch.directory?
343
+ next if branch.symlink?
344
+ branch.each_subdirectory { |subbranch| yield subbranch }
345
+ yield branch
346
+ end
347
+ end
348
+
349
+ def each_pfile(min_file_size = MIN_PFILE_SIZE)
350
+ entries.each do |leaf|
351
+ next unless leaf.to_s =~ /^P.*\.7|^P.*\.7\.bz2/
352
+ branch = self + leaf
353
+ next if branch.symlink?
354
+ if branch.size >= min_file_size
355
+ lc = branch.local_copy
356
+ begin
357
+ yield lc
358
+ rescue Exception => e
359
+ puts "#{e}"
360
+ ensure
361
+ lc.delete
362
+ end
363
+ end
364
+ end
365
+ end
366
+
367
+ def first_dicom
368
+ entries.each do |leaf|
369
+ branch = self + leaf
370
+ if leaf.to_s =~ /^I\.|\.dcm(\.bz2)?$|\.0[0-9]+(\.bz2)?$/
371
+ lc = branch.local_copy
372
+ begin
373
+ yield lc
374
+ rescue Exception => e
375
+ puts "#{e}"
376
+ ensure
377
+ lc.delete
378
+ end
379
+ return
380
+ end
381
+ end
382
+ end
383
+
384
+ def local_copy
385
+ tfbase = self.to_s =~ /\.bz2$/ ? self.basename.to_s.chomp(".bz2") : self.basename.to_s
386
+ tmpfile = File.join(Dir.tmpdir, tfbase)
387
+ if self.to_s =~ /\.bz2$/
388
+ `bunzip2 -k -c #{self.to_s} >> #{tmpfile}`
389
+ else
390
+ FileUtils.cp(self.to_s, tmpfile)
391
+ end
392
+ return Pathname.new(tmpfile)
393
+ end
394
+
395
+ end
@@ -0,0 +1,61 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{metamri}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Kristopher J. Kosmatka"]
12
+ s.date = %q{2009-12-08}
13
+ s.description = %q{Extraction of MRI metadata and insertion into compatible sqlite3 databases.}
14
+ s.email = %q{kk4@medicine.wisc.edu}
15
+ s.executables = ["import_study.rb", "import_visit.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "Manifest",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "bin/import_study.rb",
26
+ "bin/import_visit.rb",
27
+ "lib/metamri.rb",
28
+ "lib/mysql_tools.rb",
29
+ "lib/raw_image_dataset.rb",
30
+ "lib/raw_image_file.rb",
31
+ "lib/series_description_parameters.rb",
32
+ "lib/visit_raw_data_directory.rb",
33
+ "metamri.gemspec",
34
+ "test/raw_image_dataset_test.rb",
35
+ "test/raw_image_file_test.rb",
36
+ "test/visit_duplication_test.rb",
37
+ "test/visit_test.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/brainmap/metamri}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{MRI metadata}
44
+ s.test_files = [
45
+ "test/raw_image_dataset_test.rb",
46
+ "test/raw_image_file_test.rb",
47
+ "test/visit_duplication_test.rb",
48
+ "test/visit_test.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ else
57
+ end
58
+ else
59
+ end
60
+ end
61
+