metamri 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Manifest +16 -0
- data/README.rdoc +43 -0
- data/Rakefile +34 -0
- data/VERSION +1 -0
- data/bin/import_study.rb +170 -0
- data/bin/import_visit.rb +74 -0
- data/lib/metamri.rb +6 -0
- data/lib/mysql_tools.rb +33 -0
- data/lib/raw_image_dataset.rb +147 -0
- data/lib/raw_image_file.rb +418 -0
- data/lib/series_description_parameters.rb +81 -0
- data/lib/visit_raw_data_directory.rb +395 -0
- data/metamri.gemspec +61 -0
- data/test/raw_image_dataset_test.rb +46 -0
- data/test/raw_image_file_test.rb +135 -0
- data/test/visit_duplication_test.rb +24 -0
- data/test/visit_test.rb +77 -0
- metadata +76 -0
@@ -0,0 +1,395 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'pathname'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'yaml'
|
5
|
+
require 'tmpdir'
|
6
|
+
require 'fileutils'
|
7
|
+
require 'raw_image_file'
|
8
|
+
require 'raw_image_dataset'
|
9
|
+
require 'sqlite3'
|
10
|
+
|
11
|
+
|
12
|
+
# A shared function that displays a message and the date/time to standard output.
|
13
|
+
def flash(msg)
|
14
|
+
puts
|
15
|
+
puts "+" * 120
|
16
|
+
printf "\t%s\n", msg
|
17
|
+
printf "\t%s\n", Time.now
|
18
|
+
puts "+" * 120
|
19
|
+
puts
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
=begin rdoc
|
25
|
+
Encapsulates a directory of data acquired during one participant visit. These
|
26
|
+
are the raw data directories that are transfered directly from the scanners and
|
27
|
+
archived in the raw data section of the vtrak filesystem. After initializing, the
|
28
|
+
visit can be scanned to extract metadata for all of the images acquired during the
|
29
|
+
visit. The scanning is done in a fairly naive manner: the visit directory is recursively
|
30
|
+
walked and in each subdirectory any and all pfiles will be imported in addition to one single
|
31
|
+
dicom if any exist. Thus, only a single dicom file among many in a scan session is used to
|
32
|
+
retrieve information. checking the individual files for data integrity must be handled
|
33
|
+
elsewhere if at all.
|
34
|
+
=end
|
35
|
+
class VisitRawDataDirectory
|
36
|
+
# The absolute path of the visit directory, as a string.
|
37
|
+
attr_reader :visit_directory
|
38
|
+
# An array of :RawImageDataset objects acquired during this visit.
|
39
|
+
attr_reader :datasets
|
40
|
+
# Timestamp for this visit, obtained from the first :RawImageDataset
|
41
|
+
attr_reader :timestamp
|
42
|
+
# RMR number for this visit.
|
43
|
+
attr_reader :rmr_number
|
44
|
+
# scan_procedure name
|
45
|
+
attr_reader :scan_procedure_name
|
46
|
+
# scanner source
|
47
|
+
attr_reader :scanner_source
|
48
|
+
attr_accessor :db
|
49
|
+
|
50
|
+
# A new Visit instance needs to know the path to its raw data and scan_procedure name. The scan_procedure
|
51
|
+
# name must match a name in the database, if not a new scan_procedure entry will be inserted.
|
52
|
+
def initialize(directory, scan_procedure_name=nil)
|
53
|
+
raise(IOError, "Visit directory not found: #{directory}") unless File.exist?(File.expand_path(directory))
|
54
|
+
@visit_directory = File.expand_path(directory)
|
55
|
+
@working_directory = Dir.tmpdir
|
56
|
+
@datasets = Array.new
|
57
|
+
@timestamp = nil
|
58
|
+
@rmr_number = nil
|
59
|
+
@scan_procedure_name = scan_procedure_name.nil? ? get_scan_procedure_based_on_raw_directory : scan_procedure_name
|
60
|
+
@db = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
# Recursively walks the filesystem inside the visit directory. At each subdirectory, any and all
|
64
|
+
# pfiles are scanned and imported in addition to one and only one dicom file. After scanning
|
65
|
+
# @datasets will hold an array of ImageDataset instances. Setting the rmr here can raise an
|
66
|
+
# exception if no valid rmr is found in the datasets, be prepared to catch it.
|
67
|
+
def scan
|
68
|
+
flash "Scanning visit raw data directory #{@visit_directory}"
|
69
|
+
d = Pathname.new(@visit_directory)
|
70
|
+
d.each_subdirectory do |dd|
|
71
|
+
begin
|
72
|
+
dd.each_pfile { |pf| @datasets << import_dataset(pf, dd) }
|
73
|
+
dd.first_dicom { |fd| @datasets << import_dataset(fd, dd) }
|
74
|
+
rescue Exception => e
|
75
|
+
raise(IndexError, "There was an error scaning dataset #{dd}: #{e}")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
unless @datasets.size == 0
|
80
|
+
@timestamp = get_visit_timestamp
|
81
|
+
@rmr_number = get_rmr_number
|
82
|
+
@scanner_source = get_scanner_source
|
83
|
+
flash "Completed scanning #{@visit_directory}"
|
84
|
+
else
|
85
|
+
raise(IndexError, "No datasets could be scanned for directory #{@visit_directory}")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# use this to initialize Visit objects in the rails app
|
90
|
+
def attributes_for_active_record
|
91
|
+
{
|
92
|
+
:date => @timestamp.to_s,
|
93
|
+
:rmr => @rmr_number,
|
94
|
+
:path => @visit_directory,
|
95
|
+
:scanner_source => get_scanner_source
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
# Inserts each dataset in this visit into the specified database. The specifics
|
100
|
+
# of the database insert are handled by the #RawImageDataset class.
|
101
|
+
def db_insert!(db_file)
|
102
|
+
@db = SQLite3::Database.new(db_file)
|
103
|
+
@db.results_as_hash = true
|
104
|
+
@db.type_translation = true
|
105
|
+
|
106
|
+
begin
|
107
|
+
# checks scan_procedure in db, inserts if neccessary, returns id
|
108
|
+
scan_procedure_id = fetch_or_insert_scan_procedure
|
109
|
+
|
110
|
+
# insert or update visit as needed
|
111
|
+
if visit_is_new? # this is a new visit
|
112
|
+
visit_id = insert_new_visit(scan_procedure_id)
|
113
|
+
else # visit already exists in DB
|
114
|
+
visit_id = get_existing_visit_id
|
115
|
+
update_existing_visit(visit_id, scan_procedure_id)
|
116
|
+
end
|
117
|
+
|
118
|
+
# insert each dataset from the visit, also insert an entry in series descriptions table if necessary.
|
119
|
+
@datasets.each do |dataset|
|
120
|
+
update_series_descriptions_table(dataset.series_description)
|
121
|
+
if dataset_is_new?(dataset)
|
122
|
+
insert_new_dataset(dataset, visit_id)
|
123
|
+
else # dataset is already in DB
|
124
|
+
dataset_id = get_existing_dataset_id(dataset)
|
125
|
+
update_existing_dataset(dataset, dataset_id)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
rescue Exception => e
|
129
|
+
puts e.message
|
130
|
+
ensure
|
131
|
+
@db.close
|
132
|
+
@db = nil
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
def get_existing_dataset_id(ds)
|
139
|
+
@db.execute(ds.db_fetch).first['id']
|
140
|
+
end
|
141
|
+
|
142
|
+
def update_existing_dataset(ds, ds_id)
|
143
|
+
@db.execute(ds.db_update(ds_id))
|
144
|
+
end
|
145
|
+
|
146
|
+
def insert_new_dataset(ds, v_id)
|
147
|
+
@db.execute(ds.db_insert(v_id))
|
148
|
+
end
|
149
|
+
|
150
|
+
def dataset_is_new?(ds)
|
151
|
+
@db.execute(ds.db_fetch).empty?
|
152
|
+
end
|
153
|
+
|
154
|
+
def visit_is_new?
|
155
|
+
@db.execute(sql_fetch_visit_matches).empty?
|
156
|
+
end
|
157
|
+
|
158
|
+
def update_series_descriptions_table(sd)
|
159
|
+
if @db.execute(sql_fetch_series_description(sd)).empty?
|
160
|
+
@db.execute(sql_insert_series_description(sd))
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def insert_new_visit(p_id)
|
165
|
+
puts sql_insert_visit(p_id)
|
166
|
+
@db.execute(sql_insert_visit(p_id))
|
167
|
+
return @db.last_insert_row_id
|
168
|
+
end
|
169
|
+
|
170
|
+
def get_existing_visit_id
|
171
|
+
return @db.execute(sql_fetch_visit_matches).first['id']
|
172
|
+
end
|
173
|
+
|
174
|
+
def update_existing_visit(v_id, p_id)
|
175
|
+
puts sql_update_visit(v_id, p_id)
|
176
|
+
@db.execute(sql_update_visit(v_id, p_id))
|
177
|
+
end
|
178
|
+
|
179
|
+
def fetch_or_insert_scan_procedure
|
180
|
+
# if the scan_procedure already exists in db use it, if not insert a new one
|
181
|
+
scan_procedure_matches = @db.execute(sql_fetch_scan_procedure_name)
|
182
|
+
if scan_procedure_matches.empty?
|
183
|
+
@db.execute(sql_insert_scan_procedure)
|
184
|
+
new_scan_procedure_id = @db.last_insert_row_id
|
185
|
+
end
|
186
|
+
return scan_procedure_matches.empty? ? new_scan_procedure_id : scan_procedure_matches.first['id']
|
187
|
+
end
|
188
|
+
|
189
|
+
def sql_update_visit(v_id, p_id)
|
190
|
+
"UPDATE visits SET
|
191
|
+
date = '#{@timestamp.to_s}',
|
192
|
+
rmr = '#{@rmr_number}',
|
193
|
+
path = '#{@visit_directory}',
|
194
|
+
scan_procedure_id = '#{p_id.to_s}',
|
195
|
+
scanner_source = '#{@scanner_source}'
|
196
|
+
WHERE id = '#{v_id}'"
|
197
|
+
end
|
198
|
+
|
199
|
+
def sql_insert_scan_procedure
|
200
|
+
"INSERT INTO scan_procedures (codename) VALUES ('#{@scan_procedure_name}')"
|
201
|
+
end
|
202
|
+
|
203
|
+
def sql_insert_series_description(sd)
|
204
|
+
"INSERT INTO series_descriptions (long_description) VALUES ('#{sd}')"
|
205
|
+
end
|
206
|
+
|
207
|
+
def sql_fetch_visit_matches
|
208
|
+
"SELECT id FROM visits WHERE rmr == '#{@rmr_number}'"
|
209
|
+
end
|
210
|
+
|
211
|
+
def sql_fetch_scan_procedure_name
|
212
|
+
"SELECT * FROM scan_procedures WHERE codename = '#{@scan_procedure_name}'"
|
213
|
+
end
|
214
|
+
|
215
|
+
def sql_fetch_series_description(sd)
|
216
|
+
"SELECT * FROM series_descriptions WHERE long_description = '#{sd}'"
|
217
|
+
end
|
218
|
+
|
219
|
+
def sql_fetch_dataset_matches(ds)
|
220
|
+
"SELECT * FROM image_datasets WHERE rmr = '#{ds.rmr_number}' AND path = '#{ds.directory}' AND timestamp = '#{ds.timestamp}'"
|
221
|
+
end
|
222
|
+
|
223
|
+
# generates an sql insert statement to insert this visit with a given participant id
|
224
|
+
def sql_insert_visit(scan_procedure_id=0)
|
225
|
+
"INSERT INTO visits
|
226
|
+
(date, scan_procedure_id, scan_number, initials, rmr, radiology_outcome, notes, transfer_mri, transfer_pet,
|
227
|
+
conference, compile_folder, dicom_dvd, user_id, path, scanner_source, created_at, updated_at)
|
228
|
+
VALUES
|
229
|
+
('#{@timestamp.to_s}', '#{scan_procedure_id.to_s}', '', '', '#{@rmr_number}', 'no', '', 'no', 'no',
|
230
|
+
'no', 'no', 'no', NULL, '#{@visit_directory}', '#{@scanner_source}', '#{DateTime.now}', '#{DateTime.now}')"
|
231
|
+
end
|
232
|
+
|
233
|
+
def import_dataset(rawfile, original_parent_directory)
|
234
|
+
puts "Importing scan session: #{original_parent_directory.to_s} using raw data file: #{rawfile.basename}"
|
235
|
+
|
236
|
+
begin
|
237
|
+
rawimagefile = RawImageFile.new(rawfile.to_s)
|
238
|
+
rescue Exception => e
|
239
|
+
raise(IOError, "Trouble reading raw image file #{rawfile}. #{e}")
|
240
|
+
end
|
241
|
+
|
242
|
+
return RawImageDataset.new(original_parent_directory.to_s, [rawimagefile])
|
243
|
+
end
|
244
|
+
|
245
|
+
def get_visit_timestamp
|
246
|
+
(@datasets.sort_by { |ds| ds.timestamp }).first.timestamp
|
247
|
+
end
|
248
|
+
|
249
|
+
# retrieves a valid rmr number from the visit's collection of datasets. Some datasets out there
|
250
|
+
# have "rmr not found" set in the rmr_number attribute because their header info is incomplete.
|
251
|
+
# Throws an Exception if no valid rmr is found
|
252
|
+
def get_rmr_number
|
253
|
+
@datasets.each do |ds|
|
254
|
+
return ds.rmr_number unless ds.rmr_number == "rmr not found"
|
255
|
+
end
|
256
|
+
raise(IOError, "No valid RMR number was found for this visit")
|
257
|
+
end
|
258
|
+
|
259
|
+
# retrieves a scanner source from the collection of datasets, raises Exception of none is found
|
260
|
+
def get_scanner_source
|
261
|
+
@datasets.each do |ds|
|
262
|
+
return ds.scanner_source unless ds.scanner_source.nil?
|
263
|
+
end
|
264
|
+
raise(IOError, "No valid scanner source found for this visit")
|
265
|
+
end
|
266
|
+
|
267
|
+
def get_scan_procedure_based_on_raw_directory
|
268
|
+
case @visit_directory
|
269
|
+
when /alz_2000.*_2$/
|
270
|
+
return 'johnson.alz.visit2'
|
271
|
+
when /alz_2000.*_3$/
|
272
|
+
return 'johnson.alz.visit3'
|
273
|
+
when /alz_2000.alz...$/
|
274
|
+
return 'johnson.alz.visit1'
|
275
|
+
when /alz_2000/
|
276
|
+
return 'johnson.alz.unk.visit'
|
277
|
+
|
278
|
+
when /tbi_1000.*_2$/
|
279
|
+
return 'johnson.tbi-1000.visit2'
|
280
|
+
when /tbi_1000.*_3$/
|
281
|
+
return 'johnson.tbi-1000.visit3'
|
282
|
+
when /tbi_1000.tbi...$/
|
283
|
+
return 'johnson.tbi-1000.visit1'
|
284
|
+
when /tbi_1000/
|
285
|
+
return 'johnson.tbi-1000.unk.visit'
|
286
|
+
|
287
|
+
when /tbi_aware.*_2$/
|
288
|
+
return 'johnson.tbi-aware.visit2'
|
289
|
+
when /tbi_aware.*_3$/
|
290
|
+
return 'johnson.tbi-aware.visit3'
|
291
|
+
when /tbi_aware.tbi...$/
|
292
|
+
return 'johnson.tbi-aware.visit1'
|
293
|
+
when /tbi_aware/
|
294
|
+
return 'johnson.tbi-aware.unk.visit'
|
295
|
+
|
296
|
+
when /johnson.tbi-va.visit1/
|
297
|
+
return 'johnson.tbi-va.visit1'
|
298
|
+
|
299
|
+
when /pib_pilot_mri/
|
300
|
+
return 'johnson.pibmripilot.visit1.uwmr'
|
301
|
+
|
302
|
+
when /wrap140/
|
303
|
+
return 'johnson.wrap140.visit1'
|
304
|
+
|
305
|
+
when /cms.uwmr/
|
306
|
+
return 'johnson.cms.visit1.uwmr'
|
307
|
+
when /cms.wais/
|
308
|
+
return 'johnson.cms.visit1.wais'
|
309
|
+
|
310
|
+
when /esprit.9month/
|
311
|
+
return 'carlsson.esprit.visit2.9month'
|
312
|
+
when /esprit.baseline/
|
313
|
+
return 'carlsson.esprit.visit1.baseline'
|
314
|
+
|
315
|
+
when /gallagher_pd/
|
316
|
+
return 'gallagher.pd.visit1'
|
317
|
+
|
318
|
+
when /pc_4000/
|
319
|
+
return 'johnson.pc4000.visit1'
|
320
|
+
|
321
|
+
when /ries.aware.visit1/
|
322
|
+
return 'ries.aware.visit1'
|
323
|
+
|
324
|
+
else
|
325
|
+
return 'unknown.scan_procedure'
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
end
|
330
|
+
|
331
|
+
|
332
|
+
|
333
|
+
|
334
|
+
|
335
|
+
class Pathname
|
336
|
+
MIN_PFILE_SIZE = 10_000_000
|
337
|
+
|
338
|
+
def each_subdirectory
|
339
|
+
each_entry do |leaf|
|
340
|
+
next if leaf.to_s =~ /^\./
|
341
|
+
branch = self + leaf
|
342
|
+
next if not branch.directory?
|
343
|
+
next if branch.symlink?
|
344
|
+
branch.each_subdirectory { |subbranch| yield subbranch }
|
345
|
+
yield branch
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
def each_pfile(min_file_size = MIN_PFILE_SIZE)
|
350
|
+
entries.each do |leaf|
|
351
|
+
next unless leaf.to_s =~ /^P.*\.7|^P.*\.7\.bz2/
|
352
|
+
branch = self + leaf
|
353
|
+
next if branch.symlink?
|
354
|
+
if branch.size >= min_file_size
|
355
|
+
lc = branch.local_copy
|
356
|
+
begin
|
357
|
+
yield lc
|
358
|
+
rescue Exception => e
|
359
|
+
puts "#{e}"
|
360
|
+
ensure
|
361
|
+
lc.delete
|
362
|
+
end
|
363
|
+
end
|
364
|
+
end
|
365
|
+
end
|
366
|
+
|
367
|
+
def first_dicom
|
368
|
+
entries.each do |leaf|
|
369
|
+
branch = self + leaf
|
370
|
+
if leaf.to_s =~ /^I\.|\.dcm(\.bz2)?$|\.0[0-9]+(\.bz2)?$/
|
371
|
+
lc = branch.local_copy
|
372
|
+
begin
|
373
|
+
yield lc
|
374
|
+
rescue Exception => e
|
375
|
+
puts "#{e}"
|
376
|
+
ensure
|
377
|
+
lc.delete
|
378
|
+
end
|
379
|
+
return
|
380
|
+
end
|
381
|
+
end
|
382
|
+
end
|
383
|
+
|
384
|
+
def local_copy
|
385
|
+
tfbase = self.to_s =~ /\.bz2$/ ? self.basename.to_s.chomp(".bz2") : self.basename.to_s
|
386
|
+
tmpfile = File.join(Dir.tmpdir, tfbase)
|
387
|
+
if self.to_s =~ /\.bz2$/
|
388
|
+
`bunzip2 -k -c #{self.to_s} >> #{tmpfile}`
|
389
|
+
else
|
390
|
+
FileUtils.cp(self.to_s, tmpfile)
|
391
|
+
end
|
392
|
+
return Pathname.new(tmpfile)
|
393
|
+
end
|
394
|
+
|
395
|
+
end
|
data/metamri.gemspec
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{metamri}
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Kristopher J. Kosmatka"]
|
12
|
+
s.date = %q{2009-12-08}
|
13
|
+
s.description = %q{Extraction of MRI metadata and insertion into compatible sqlite3 databases.}
|
14
|
+
s.email = %q{kk4@medicine.wisc.edu}
|
15
|
+
s.executables = ["import_study.rb", "import_visit.rb"]
|
16
|
+
s.extra_rdoc_files = [
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"Manifest",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"bin/import_study.rb",
|
26
|
+
"bin/import_visit.rb",
|
27
|
+
"lib/metamri.rb",
|
28
|
+
"lib/mysql_tools.rb",
|
29
|
+
"lib/raw_image_dataset.rb",
|
30
|
+
"lib/raw_image_file.rb",
|
31
|
+
"lib/series_description_parameters.rb",
|
32
|
+
"lib/visit_raw_data_directory.rb",
|
33
|
+
"metamri.gemspec",
|
34
|
+
"test/raw_image_dataset_test.rb",
|
35
|
+
"test/raw_image_file_test.rb",
|
36
|
+
"test/visit_duplication_test.rb",
|
37
|
+
"test/visit_test.rb"
|
38
|
+
]
|
39
|
+
s.homepage = %q{http://github.com/brainmap/metamri}
|
40
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = %q{1.3.5}
|
43
|
+
s.summary = %q{MRI metadata}
|
44
|
+
s.test_files = [
|
45
|
+
"test/raw_image_dataset_test.rb",
|
46
|
+
"test/raw_image_file_test.rb",
|
47
|
+
"test/visit_duplication_test.rb",
|
48
|
+
"test/visit_test.rb"
|
49
|
+
]
|
50
|
+
|
51
|
+
if s.respond_to? :specification_version then
|
52
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
53
|
+
s.specification_version = 3
|
54
|
+
|
55
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
56
|
+
else
|
57
|
+
end
|
58
|
+
else
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|