metamri 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,395 @@
1
+ require 'rubygems'
2
+ require 'pathname'
3
+ require 'tempfile'
4
+ require 'yaml'
5
+ require 'tmpdir'
6
+ require 'fileutils'
7
+ require 'raw_image_file'
8
+ require 'raw_image_dataset'
9
+ require 'sqlite3'
10
+
11
+
12
+ # A shared function that displays a message and the date/time to standard output.
13
+ def flash(msg)
14
+ puts
15
+ puts "+" * 120
16
+ printf "\t%s\n", msg
17
+ printf "\t%s\n", Time.now
18
+ puts "+" * 120
19
+ puts
20
+ end
21
+
22
+
23
+
24
+ =begin rdoc
25
+ Encapsulates a directory of data acquired during one participant visit. These
26
+ are the raw data directories that are transfered directly from the scanners and
27
+ archived in the raw data section of the vtrak filesystem. After initializing, the
28
+ visit can be scanned to extract metadata for all of the images acquired during the
29
+ visit. The scanning is done in a fairly naive manner: the visit directory is recursively
30
+ walked and in each subdirectory any and all pfiles will be imported in addition to one single
31
+ dicom if any exist. Thus, only a single dicom file among many in a scan session is used to
32
+ retrieve information. checking the individual files for data integrity must be handled
33
+ elsewhere if at all.
34
+ =end
35
+ class VisitRawDataDirectory
36
+ # The absolute path of the visit directory, as a string.
37
+ attr_reader :visit_directory
38
+ # An array of :RawImageDataset objects acquired during this visit.
39
+ attr_reader :datasets
40
+ # Timestamp for this visit, obtained from the first :RawImageDataset
41
+ attr_reader :timestamp
42
+ # RMR number for this visit.
43
+ attr_reader :rmr_number
44
+ # scan_procedure name
45
+ attr_reader :scan_procedure_name
46
+ # scanner source
47
+ attr_reader :scanner_source
48
+ attr_accessor :db
49
+
50
+ # A new Visit instance needs to know the path to its raw data and scan_procedure name. The scan_procedure
51
+ # name must match a name in the database, if not a new scan_procedure entry will be inserted.
52
+ def initialize(directory, scan_procedure_name=nil)
53
+ raise(IOError, "Visit directory not found: #{directory}") unless File.exist?(File.expand_path(directory))
54
+ @visit_directory = File.expand_path(directory)
55
+ @working_directory = Dir.tmpdir
56
+ @datasets = Array.new
57
+ @timestamp = nil
58
+ @rmr_number = nil
59
+ @scan_procedure_name = scan_procedure_name.nil? ? get_scan_procedure_based_on_raw_directory : scan_procedure_name
60
+ @db = nil
61
+ end
62
+
63
+ # Recursively walks the filesystem inside the visit directory. At each subdirectory, any and all
64
+ # pfiles are scanned and imported in addition to one and only one dicom file. After scanning
65
+ # @datasets will hold an array of ImageDataset instances. Setting the rmr here can raise an
66
+ # exception if no valid rmr is found in the datasets, be prepared to catch it.
67
+ def scan
68
+ flash "Scanning visit raw data directory #{@visit_directory}"
69
+ d = Pathname.new(@visit_directory)
70
+ d.each_subdirectory do |dd|
71
+ begin
72
+ dd.each_pfile { |pf| @datasets << import_dataset(pf, dd) }
73
+ dd.first_dicom { |fd| @datasets << import_dataset(fd, dd) }
74
+ rescue Exception => e
75
+ raise(IndexError, "There was an error scaning dataset #{dd}: #{e}")
76
+ end
77
+ end
78
+
79
+ unless @datasets.size == 0
80
+ @timestamp = get_visit_timestamp
81
+ @rmr_number = get_rmr_number
82
+ @scanner_source = get_scanner_source
83
+ flash "Completed scanning #{@visit_directory}"
84
+ else
85
+ raise(IndexError, "No datasets could be scanned for directory #{@visit_directory}")
86
+ end
87
+ end
88
+
89
+ # use this to initialize Visit objects in the rails app
90
+ def attributes_for_active_record
91
+ {
92
+ :date => @timestamp.to_s,
93
+ :rmr => @rmr_number,
94
+ :path => @visit_directory,
95
+ :scanner_source => get_scanner_source
96
+ }
97
+ end
98
+
99
+ # Inserts each dataset in this visit into the specified database. The specifics
100
+ # of the database insert are handled by the #RawImageDataset class.
101
+ def db_insert!(db_file)
102
+ @db = SQLite3::Database.new(db_file)
103
+ @db.results_as_hash = true
104
+ @db.type_translation = true
105
+
106
+ begin
107
+ # checks scan_procedure in db, inserts if neccessary, returns id
108
+ scan_procedure_id = fetch_or_insert_scan_procedure
109
+
110
+ # insert or update visit as needed
111
+ if visit_is_new? # this is a new visit
112
+ visit_id = insert_new_visit(scan_procedure_id)
113
+ else # visit already exists in DB
114
+ visit_id = get_existing_visit_id
115
+ update_existing_visit(visit_id, scan_procedure_id)
116
+ end
117
+
118
+ # insert each dataset from the visit, also insert an entry in series descriptions table if necessary.
119
+ @datasets.each do |dataset|
120
+ update_series_descriptions_table(dataset.series_description)
121
+ if dataset_is_new?(dataset)
122
+ insert_new_dataset(dataset, visit_id)
123
+ else # dataset is already in DB
124
+ dataset_id = get_existing_dataset_id(dataset)
125
+ update_existing_dataset(dataset, dataset_id)
126
+ end
127
+ end
128
+ rescue Exception => e
129
+ puts e.message
130
+ ensure
131
+ @db.close
132
+ @db = nil
133
+ end
134
+ end
135
+
136
+ private
137
+
138
+ def get_existing_dataset_id(ds)
139
+ @db.execute(ds.db_fetch).first['id']
140
+ end
141
+
142
+ def update_existing_dataset(ds, ds_id)
143
+ @db.execute(ds.db_update(ds_id))
144
+ end
145
+
146
+ def insert_new_dataset(ds, v_id)
147
+ @db.execute(ds.db_insert(v_id))
148
+ end
149
+
150
+ def dataset_is_new?(ds)
151
+ @db.execute(ds.db_fetch).empty?
152
+ end
153
+
154
+ def visit_is_new?
155
+ @db.execute(sql_fetch_visit_matches).empty?
156
+ end
157
+
158
+ def update_series_descriptions_table(sd)
159
+ if @db.execute(sql_fetch_series_description(sd)).empty?
160
+ @db.execute(sql_insert_series_description(sd))
161
+ end
162
+ end
163
+
164
+ def insert_new_visit(p_id)
165
+ puts sql_insert_visit(p_id)
166
+ @db.execute(sql_insert_visit(p_id))
167
+ return @db.last_insert_row_id
168
+ end
169
+
170
+ def get_existing_visit_id
171
+ return @db.execute(sql_fetch_visit_matches).first['id']
172
+ end
173
+
174
+ def update_existing_visit(v_id, p_id)
175
+ puts sql_update_visit(v_id, p_id)
176
+ @db.execute(sql_update_visit(v_id, p_id))
177
+ end
178
+
179
+ def fetch_or_insert_scan_procedure
180
+ # if the scan_procedure already exists in db use it, if not insert a new one
181
+ scan_procedure_matches = @db.execute(sql_fetch_scan_procedure_name)
182
+ if scan_procedure_matches.empty?
183
+ @db.execute(sql_insert_scan_procedure)
184
+ new_scan_procedure_id = @db.last_insert_row_id
185
+ end
186
+ return scan_procedure_matches.empty? ? new_scan_procedure_id : scan_procedure_matches.first['id']
187
+ end
188
+
189
+ def sql_update_visit(v_id, p_id)
190
+ "UPDATE visits SET
191
+ date = '#{@timestamp.to_s}',
192
+ rmr = '#{@rmr_number}',
193
+ path = '#{@visit_directory}',
194
+ scan_procedure_id = '#{p_id.to_s}',
195
+ scanner_source = '#{@scanner_source}'
196
+ WHERE id = '#{v_id}'"
197
+ end
198
+
199
+ def sql_insert_scan_procedure
200
+ "INSERT INTO scan_procedures (codename) VALUES ('#{@scan_procedure_name}')"
201
+ end
202
+
203
+ def sql_insert_series_description(sd)
204
+ "INSERT INTO series_descriptions (long_description) VALUES ('#{sd}')"
205
+ end
206
+
207
+ def sql_fetch_visit_matches
208
+ "SELECT id FROM visits WHERE rmr == '#{@rmr_number}'"
209
+ end
210
+
211
+ def sql_fetch_scan_procedure_name
212
+ "SELECT * FROM scan_procedures WHERE codename = '#{@scan_procedure_name}'"
213
+ end
214
+
215
+ def sql_fetch_series_description(sd)
216
+ "SELECT * FROM series_descriptions WHERE long_description = '#{sd}'"
217
+ end
218
+
219
+ def sql_fetch_dataset_matches(ds)
220
+ "SELECT * FROM image_datasets WHERE rmr = '#{ds.rmr_number}' AND path = '#{ds.directory}' AND timestamp = '#{ds.timestamp}'"
221
+ end
222
+
223
+ # generates an sql insert statement to insert this visit with a given participant id
224
+ def sql_insert_visit(scan_procedure_id=0)
225
+ "INSERT INTO visits
226
+ (date, scan_procedure_id, scan_number, initials, rmr, radiology_outcome, notes, transfer_mri, transfer_pet,
227
+ conference, compile_folder, dicom_dvd, user_id, path, scanner_source, created_at, updated_at)
228
+ VALUES
229
+ ('#{@timestamp.to_s}', '#{scan_procedure_id.to_s}', '', '', '#{@rmr_number}', 'no', '', 'no', 'no',
230
+ 'no', 'no', 'no', NULL, '#{@visit_directory}', '#{@scanner_source}', '#{DateTime.now}', '#{DateTime.now}')"
231
+ end
232
+
233
+ def import_dataset(rawfile, original_parent_directory)
234
+ puts "Importing scan session: #{original_parent_directory.to_s} using raw data file: #{rawfile.basename}"
235
+
236
+ begin
237
+ rawimagefile = RawImageFile.new(rawfile.to_s)
238
+ rescue Exception => e
239
+ raise(IOError, "Trouble reading raw image file #{rawfile}. #{e}")
240
+ end
241
+
242
+ return RawImageDataset.new(original_parent_directory.to_s, [rawimagefile])
243
+ end
244
+
245
+ def get_visit_timestamp
246
+ (@datasets.sort_by { |ds| ds.timestamp }).first.timestamp
247
+ end
248
+
249
+ # retrieves a valid rmr number from the visit's collection of datasets. Some datasets out there
250
+ # have "rmr not found" set in the rmr_number attribute because their header info is incomplete.
251
+ # Throws an Exception if no valid rmr is found
252
+ def get_rmr_number
253
+ @datasets.each do |ds|
254
+ return ds.rmr_number unless ds.rmr_number == "rmr not found"
255
+ end
256
+ raise(IOError, "No valid RMR number was found for this visit")
257
+ end
258
+
259
+ # retrieves a scanner source from the collection of datasets, raises Exception of none is found
260
+ def get_scanner_source
261
+ @datasets.each do |ds|
262
+ return ds.scanner_source unless ds.scanner_source.nil?
263
+ end
264
+ raise(IOError, "No valid scanner source found for this visit")
265
+ end
266
+
267
+ def get_scan_procedure_based_on_raw_directory
268
+ case @visit_directory
269
+ when /alz_2000.*_2$/
270
+ return 'johnson.alz.visit2'
271
+ when /alz_2000.*_3$/
272
+ return 'johnson.alz.visit3'
273
+ when /alz_2000.alz...$/
274
+ return 'johnson.alz.visit1'
275
+ when /alz_2000/
276
+ return 'johnson.alz.unk.visit'
277
+
278
+ when /tbi_1000.*_2$/
279
+ return 'johnson.tbi-1000.visit2'
280
+ when /tbi_1000.*_3$/
281
+ return 'johnson.tbi-1000.visit3'
282
+ when /tbi_1000.tbi...$/
283
+ return 'johnson.tbi-1000.visit1'
284
+ when /tbi_1000/
285
+ return 'johnson.tbi-1000.unk.visit'
286
+
287
+ when /tbi_aware.*_2$/
288
+ return 'johnson.tbi-aware.visit2'
289
+ when /tbi_aware.*_3$/
290
+ return 'johnson.tbi-aware.visit3'
291
+ when /tbi_aware.tbi...$/
292
+ return 'johnson.tbi-aware.visit1'
293
+ when /tbi_aware/
294
+ return 'johnson.tbi-aware.unk.visit'
295
+
296
+ when /johnson.tbi-va.visit1/
297
+ return 'johnson.tbi-va.visit1'
298
+
299
+ when /pib_pilot_mri/
300
+ return 'johnson.pibmripilot.visit1.uwmr'
301
+
302
+ when /wrap140/
303
+ return 'johnson.wrap140.visit1'
304
+
305
+ when /cms.uwmr/
306
+ return 'johnson.cms.visit1.uwmr'
307
+ when /cms.wais/
308
+ return 'johnson.cms.visit1.wais'
309
+
310
+ when /esprit.9month/
311
+ return 'carlsson.esprit.visit2.9month'
312
+ when /esprit.baseline/
313
+ return 'carlsson.esprit.visit1.baseline'
314
+
315
+ when /gallagher_pd/
316
+ return 'gallagher.pd.visit1'
317
+
318
+ when /pc_4000/
319
+ return 'johnson.pc4000.visit1'
320
+
321
+ when /ries.aware.visit1/
322
+ return 'ries.aware.visit1'
323
+
324
+ else
325
+ return 'unknown.scan_procedure'
326
+ end
327
+ end
328
+
329
+ end
330
+
331
+
332
+
333
+
334
+
335
+ class Pathname
336
+ MIN_PFILE_SIZE = 10_000_000
337
+
338
+ def each_subdirectory
339
+ each_entry do |leaf|
340
+ next if leaf.to_s =~ /^\./
341
+ branch = self + leaf
342
+ next if not branch.directory?
343
+ next if branch.symlink?
344
+ branch.each_subdirectory { |subbranch| yield subbranch }
345
+ yield branch
346
+ end
347
+ end
348
+
349
+ def each_pfile(min_file_size = MIN_PFILE_SIZE)
350
+ entries.each do |leaf|
351
+ next unless leaf.to_s =~ /^P.*\.7|^P.*\.7\.bz2/
352
+ branch = self + leaf
353
+ next if branch.symlink?
354
+ if branch.size >= min_file_size
355
+ lc = branch.local_copy
356
+ begin
357
+ yield lc
358
+ rescue Exception => e
359
+ puts "#{e}"
360
+ ensure
361
+ lc.delete
362
+ end
363
+ end
364
+ end
365
+ end
366
+
367
+ def first_dicom
368
+ entries.each do |leaf|
369
+ branch = self + leaf
370
+ if leaf.to_s =~ /^I\.|\.dcm(\.bz2)?$|\.0[0-9]+(\.bz2)?$/
371
+ lc = branch.local_copy
372
+ begin
373
+ yield lc
374
+ rescue Exception => e
375
+ puts "#{e}"
376
+ ensure
377
+ lc.delete
378
+ end
379
+ return
380
+ end
381
+ end
382
+ end
383
+
384
+ def local_copy
385
+ tfbase = self.to_s =~ /\.bz2$/ ? self.basename.to_s.chomp(".bz2") : self.basename.to_s
386
+ tmpfile = File.join(Dir.tmpdir, tfbase)
387
+ if self.to_s =~ /\.bz2$/
388
+ `bunzip2 -k -c #{self.to_s} >> #{tmpfile}`
389
+ else
390
+ FileUtils.cp(self.to_s, tmpfile)
391
+ end
392
+ return Pathname.new(tmpfile)
393
+ end
394
+
395
+ end
@@ -0,0 +1,61 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{metamri}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Kristopher J. Kosmatka"]
12
+ s.date = %q{2009-12-08}
13
+ s.description = %q{Extraction of MRI metadata and insertion into compatible sqlite3 databases.}
14
+ s.email = %q{kk4@medicine.wisc.edu}
15
+ s.executables = ["import_study.rb", "import_visit.rb"]
16
+ s.extra_rdoc_files = [
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "Manifest",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "bin/import_study.rb",
26
+ "bin/import_visit.rb",
27
+ "lib/metamri.rb",
28
+ "lib/mysql_tools.rb",
29
+ "lib/raw_image_dataset.rb",
30
+ "lib/raw_image_file.rb",
31
+ "lib/series_description_parameters.rb",
32
+ "lib/visit_raw_data_directory.rb",
33
+ "metamri.gemspec",
34
+ "test/raw_image_dataset_test.rb",
35
+ "test/raw_image_file_test.rb",
36
+ "test/visit_duplication_test.rb",
37
+ "test/visit_test.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/brainmap/metamri}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{MRI metadata}
44
+ s.test_files = [
45
+ "test/raw_image_dataset_test.rb",
46
+ "test/raw_image_file_test.rb",
47
+ "test/visit_duplication_test.rb",
48
+ "test/visit_test.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ else
57
+ end
58
+ else
59
+ end
60
+ end
61
+