RubyGems - metamri - Versions diffs - 0.1.0 - Mend

metamri 0.1.0

Files changed (19) hide show

data/.gitignore +3 -0
data/Manifest +16 -0
data/README.rdoc +43 -0
data/Rakefile +34 -0
data/VERSION +1 -0
data/bin/import_study.rb +170 -0
data/bin/import_visit.rb +74 -0
data/lib/metamri.rb +6 -0
data/lib/mysql_tools.rb +33 -0
data/lib/raw_image_dataset.rb +147 -0
data/lib/raw_image_file.rb +418 -0
data/lib/series_description_parameters.rb +81 -0
data/lib/visit_raw_data_directory.rb +395 -0
data/metamri.gemspec +61 -0
data/test/raw_image_dataset_test.rb +46 -0
data/test/raw_image_file_test.rb +135 -0
data/test/visit_duplication_test.rb +24 -0
data/test/visit_test.rb +77 -0
metadata +76 -0

data/.gitignore ADDED

@@ -0,0 +1,3 @@
+*.log
+*.gem
+._*

data/Manifest ADDED

@@ -0,0 +1,16 @@
+bin/import_study.rb
+bin/import_visit.rb
+ImageData.gemspec
+lib/metamri.rb
+lib/mysql_tools.rb
+lib/raw_image_dataset.rb
+lib/raw_image_file.rb
+lib/series_description.rb
+lib/visit_raw_data_directory.rb
+Manifest
+Rakefile
+README.rdoc
+test/raw_image_dataset_test.rb
+test/raw_image_file_test.rb
+test/visit_duplication_test.rb
+test/visit_test.rb

data/README.rdoc ADDED

@@ -0,0 +1,43 @@
+== ImageData
+A small library that can be used to extract metadata from large collections of research MR imaging data sets.  Support is also provided to insert the metadata into a Wisconsin ADRC Imaging Core compatible database.  Several
+command line utilities are provided as well as a minimal API that is useful for building ruby on rails rake tasks.
+You will most likely be interested in either:
+= import_visit.rb CLU
+== Synopsis
+ A simple utility for importing imaging data collected during one visit into the WADRC Data Tools web
+ application.  Data from a visit is contained in one big directory that may have many subdirectories.
+ Each individual imaging scan may be composed of an entire directory of dicom files or one single p-file.
+ This utility scans through all of the image data sets and retrieved meta-data about the scans from their
+ header information.
+== Examples
+ import_visit.rb /path/to/raw/mri/data study.codename /path/to/db/db.sqlite3
+== Usage
+ import_visit.rb <raw_data_directory> <scan_procedure_codename> <database_file>
+ For help use: import_visit.rb -h
+== Options
+ -h, --help          Displays help message
+ -v, --visit         Visit raw data directory, absolute path
+ -p, --scan_procedure      scan_procedure codename, e.g. johnson.alz.visit1
+ -d, --database      Database file into which information will imported
+== Author
+ K.J. Kosmatka, kk4@medicine.wisc.edu
+== Copyright
+ Copyright (c) 2009 WADRC Imaging Core.
+or:
+= VisitRawDirectory class
+ see the doc directory

data/Rakefile ADDED

@@ -0,0 +1,34 @@
+#
+# To change this template, choose Tools | Templates
+# and open the template in the editor.
+require 'rubygems'
+require 'rake'
+# require 'echoe'
+#
+# Echoe.new('metamri', '0.1.0') do |p|
+#   p.description    = "Extraction of MRI metadata and insertion into compatible sqlite3 databases."
+#   p.url            = "http://github.com/brainmap/metamri"
+#   p.author         = "Kristopher J. Kosmatka"
+#   p.email          = "kk4@medicine.wisc.edu"
+#   p.ignore_pattern = ["nbproject/*"]
+#   p.development_dependencies = []
+# end
+#
+# Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gemspec|
+    gemspec.name = "metamri"
+    gemspec.summary = "MRI metadata"
+    gemspec.description = "Extraction of MRI metadata and insertion into compatible sqlite3 databases."
+    gemspec.email = "kk4@medicine.wisc.edu"
+    gemspec.homepage = "http://github.com/brainmap/metamri"
+    gemspec.authors = ["Kristopher J. Kosmatka"]
+  end
+  Jeweler::GemcutterTasks.new
+rescue LoadError
+  puts "Jeweler not available. Install it with: sudo gem install jeweler"
+end

data/VERSION ADDED

	@@ -0,0 +1 @@
1	+ 0.1.0

data/bin/import_study.rb ADDED

@@ -0,0 +1,170 @@
+#!/usr/bin/env ruby
+#
+# == Synopsis
+#   A simple utility for importing imaging data for an entire study into the WADRC Data Tools web
+#   application.  Scans each visit within a particular protocol and inserts all the appropriat meta-data
+#   into the given database. Can be run as a command line utility, or the function can be required by other packages.
+#
+# == Examples
+#   import_study.rb alz_1 /path/to/the/rails/db/production.sqlite3
+#
+# == Usage
+#   import_visit.rb <study_code> <database_file>
+#
+#   Study codes are one of:
+#      alz_1, alz_2, cms_wais, cms_uwmr, esprit_1, esprit_2, gallagher_pd, pib_pilot, ries_pilot, ries_1,
+#      tbi1000_1, tbi1000_2, tbi1000_3, tbiva, wrap140
+#
+#   For help use: import_study.rb -h
+#
+# == Options
+#   -h, --help          Displays help message
+#
+# == Author
+#   K.J. Kosmatka, kk4@medicine.wisc.edu
+#
+# == Copyright
+#   Copyright (c) 2009 WADRC Imaging Core.
+#
+$:.unshift File.join(File.dirname(__FILE__),'..','lib')
+require 'visit_raw_data_directory'
+require 'pathname'
+require 'rdoc/usage'
+require 'logger'
+#:stopdoc:
+STUDIES = {
+  :alz_1 =>        { :dir => '/Data/vtrak1/raw/alz_2000',
+                     :logfile => 'alz.visit1.scan.log',
+                     :filter => /^alz...$|^alz..._[AB]/i,
+                     :codename => 'johnson.alz.visit1'
+  },
+  :alz_2 =>        { :dir => '/Data/vtrak1/raw/alz_2000',
+                     :logfile => 'alz.visit2.scan.log',
+                     :filter => /^alz..._2$/,
+                     :codename => 'johnson.alz.visit2'
+  },
+  :bendlin_wmad => { :dir => '/Data/vtrak1/raw/bendlin_WMAD/ge3T_750_scanner',
+                     :logfile => 'bendlin.wmad.scan.log',
+                     :filter => /^wmad/,
+                     :codename => 'bendlin.wmad.visit1'
+  },
+  :cms_wais =>     { :dir => '/Data/vtrak1/raw/cms/wais',
+                     :logfile => 'cms.wais.scan.log',
+                     :filter => /^pc/,
+                     :codename => 'johnson.cms.visit1.wais'
+  },
+  :cms_uwmr =>     { :dir => '/Data/vtrak1/raw/cms/uwmr',
+                     :logfile => 'cms.uwmr.scan.log',
+                     :filter => /^cms...$/,
+                     :codename => 'johnson.cms.visit1.uwmr'
+  },
+  :esprit_1 =>     { :dir => '/Data/vtrak1/raw/esprit/baseline',
+                     :logfile => 'esprit.baseline.scan.log',
+                     :filter => /^esp3/,
+                     :codename => 'carlsson.esprit.visit1.baseline'
+  },
+  :esprit_2 =>     { :dir => '/Data/vtrak1/raw/esprit/9month',
+                     :logfile => 'esprit.9month.scan.log',
+                     :filter => /^esp3/,
+                     :codename => 'carlsson.esprit.visit2.9month'
+  },
+  :gallagher_pd => { :dir => '/Data/vtrak1/raw/gallagher.pd',
+                     :logfile => 'gallagher.scan.log',
+                     :filter => /^pd..._/,
+                     :codename => 'gallagher.pd.visit1'
+  },
+  :pib_pilot =>    { :dir => '/Data/vtrak1/raw/pib_pilot_mri',
+                     :logfile => 'pib.mri.pilot.scan.log',
+                     :filter => /^cpr0/,
+                     :codename => 'johnson.pibmripilot.visit1.uwmr'
+  },
+  :ries_1 =>       { :dir => '/Data/vtrak1/raw/ries.aware.visit1',
+                     :logfile => 'ries.aware.visit1.scan.log',
+                     :filter => /^awr0/,
+                     :codename => 'ries.aware.visit1'
+  },
+  :ries_pilot =>   { :dir => '/Data/vtrak1/raw/ries.aware.visit1',
+                     :logfile => 'ries.aware.pilot.scan.log',
+                     :filter => /^awrP/,
+                     :codename => 'ries.aware.pilot'
+  },
+  :tbi1000_1 =>    { :dir => '/Data/vtrak1/raw/tbi_1000',
+                     :logfile => 'tbi1000.visit1.scan.log',
+                     :filter => /^tbi...$/,
+                     :codename => 'johnson.tbi1000.visit1'
+  },
+  :tbi1000_2 =>    { :dir => '/Data/vtrak1/raw/tbi_1000',
+                     :logfile => 'tbi1000.visit2.scan.log',
+                     :filter => /^tbi..._2/,
+                     :codename => 'johnson.tbi1000.visit2'
+  },
+  :tbi1000_3 =>    { :dir => '/Data/vtrak1/raw/johnson.tbi.aware.visit3',
+                     :logfile => 'tbiaware.visit3.scan.log',
+                     :filter => /^tbi..._3$/,
+                     :codename => 'johnson.tbiaware.visit3'
+  },
+  :tbiva =>        { :dir => '/Data/vtrak1/raw/johnson.tbi-va.visit1',
+                     :logfile => 'tbiva.scan.log',
+                     :filter => /^tbi/,
+                     :codename => 'johnson.tbiva.visit1'
+  },
+  :wrap140 =>      { :dir => '/Data/vtrak1/raw/wrap140',
+                     :logfile => 'wrap140.scan.log',
+                     :filter => /^wrp/,
+                     :codename => 'johnson.wrap140.visit1'
+  }
+}
+#:startdoc:
+# == Function
+#   Imports an entire study.
+#
+# == Arguments
+# study -- a hash specifying the following keys:
+#   :dir => the directory holding all the individual visit directories for this study
+#   :logfile => a file name where logging can be written
+#   :filter => a regex that matches all of the visit directory names that should be scanned
+#   :codename => the study codename, e.g. 'johnson.alz.visit1'
+#
+# dbfile -- the database into which meta-data will be inserted
+#
+def import_study(study, dbfile)
+  studydir = Pathname.new(study[:dir])
+  log = Logger.new(study[:logfile], shift_age = 7, shift_size = 1048576)
+  studydir.entries.each do |visit|
+    next if visit.to_s =~ /^\./
+    next unless visit.to_s =~ study[:filter]
+    visitdir = studydir + visit
+    v = VisitRawDataDirectory.new( visitdir.to_s, study[:codename] )
+    begin
+      v.scan
+      v.db_insert!(dbfile)
+    rescue Exception => e
+      puts "There was a problem scanning a dataset in #{visitdir}... skipping."
+      puts "Exception message: #{e.message}"
+      log.error "There was a problem scanning a dataset in #{visitdir}... skipping."
+      log.error "Exception message: #{e.message}"
+    ensure
+      v = nil
+    end
+  end
+end
+if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
+  RDoc::usage() if (ARGV[0] == '-h' or ARGV.size != 2)
+  study = STUDIES[ARGV[0].to_sym]
+  raise(IndexError, "Study Not Recognized.") if study.nil?
+  dbfile = ARGV[1]
+  raise(IOError, "DB File not writable or not existant") unless File.writable?(dbfile)
+  begin
+    import_study(study, dbfile)
+  rescue IndexError, IOError => e
+    puts "There was an error importing study #{study}. #{e}"
+    raise e
+  end
+end

data/bin/import_visit.rb ADDED

@@ -0,0 +1,74 @@
+#!/usr/bin/env ruby
+#
+# == Synopsis
+#   A simple utility for importing imaging data collected during one visit into the WADRC Data Tools web
+#   application.  Data from a visit is contained in one big directory that may have many subdirectories.
+#   Each individual imaging scan may be composed of an entire directory of dicom files or one single p-file.
+#   This utility scans through all of the image data sets and retrieved meta-data about the scans from their
+#   header information.
+#
+# == Examples
+#   import_visit.rb /Data/vtrak1/raw/alz_2000/alz001 johnson.alz.visit1 /path/to/the/rails/db/production.sqlite3
+#   import_visit.rb /Data/vtrak1/raw/wrap140/wrp001_5917_03042008 johnson.wrap140.visit1 /path/to/the/rails/db/production.sqlite3
+#
+# == Usage
+#   import_visit.rb <raw_data_directory> <scan_procedure_codename> <database_file>
+#
+#   For help use: import_visit.rb -h
+#
+# == Options
+#   -h, --help          Displays help message
+#   -v, --visit         Visit raw data directory, absolute path
+#   -p, --scan_procedure      scan_procedure codename, e.g. johnson.alz.visit1
+#   -d, --database      Database file into which information will imported
+#
+# == Author
+#   K.J. Kosmatka, kk4@medicine.wisc.edu
+#
+# == Copyright
+#   Copyright (c) 2009 WADRC Imaging Core.
+#
+$:.unshift File.join(File.dirname(__FILE__),'..','lib')
+require 'visit_raw_data_directory'
+require 'pathname'
+require 'rdoc/usage'
+require 'logger'
+# == Function
+#   Imports imaging data collected during a single visit into the WADRC Data Tools web application database.
+#
+# == Usage
+#   import_visit(raw_directory, scan_procedure_codename, database)
+#
+# == Example
+#   import_visit('/Data/vtrak1/raw/alz_2000/alz001','johnson.alz.visit1','/path/to/the/rails/db/production.sqlite3')
+#
+def import_visit(raw_directory, scan_procedure_codename, database)
+  log = Logger.new(File.basename(raw_directory))
+  v = VisitRawDataDirectory.new(raw_directory, scan_procedure_codename)
+  puts "+++ Importing #{v.visit_directory} as part of #{v.scan_procedure_name} +++"
+  begin
+    v.scan
+    v.db_insert!(database)
+  rescue Exception => e
+    puts "There was a problem scanning a dataset in #{v.visit_directory}... skipping."
+    puts "Exception message: #{e.message}"
+    log.error "There was a problem scanning a dataset in #{v.visit_directory}... skipping."
+    log.error "Exception message: #{e.message}"
+  ensure
+    v = nil
+  end
+end
+if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
+  RDoc::usage() if (ARGV[0] == '-h' or ARGV.size != 3)
+  raw_directory = ARGV[0]
+  scan_procedure_codename = ARGV[1]
+  database = ARGV[2]
+  raise(IOError, "Database #{database} not writable or doesn't exist.") unless File.writable?(database)
+  import_visit(raw_directory, scan_procedure_codename, database)
+end

data/lib/metamri.rb ADDED

@@ -0,0 +1,6 @@
+require 'raw_image_file'
+require 'raw_image_dataset'
+require 'visit_raw_data_directory'
+module Metamri
+end

data/lib/mysql_tools.rb ADDED

@@ -0,0 +1,33 @@
+require 'mysql'
+class Mysql
+  def summary
+    self.list_tables.each do |tbl|
+      next if tbl =~ /^tws/
+      puts "+" * 160
+      puts "%80s" % tbl
+      puts "+" * 160
+      columns = self.query("select * from #{tbl}").fetch_hash.keys
+      columns.in_chunks_of(6).each do |chunk|
+        puts "%-25s " * chunk.size % chunk
+      end
+      puts "\n\n"
+    end
+  end
+end
+class Array
+  def chunks(number_of_chunks)
+    chunks_of( (self.size/number_of_chunks.to_f).ceil )
+  end
+  def in_chunks_of(chunk_size)
+    nchunks = (self.size/chunk_size.to_f).ceil
+    chunks = Array.new(nchunks) { [] }
+    self.each_with_index do |item,index|
+      chunks[ index/chunk_size ] << item
+    end
+    return chunks
+  end
+end

data/lib/raw_image_dataset.rb ADDED

@@ -0,0 +1,147 @@
+require 'rubygems'
+require 'sqlite3'
+=begin rdoc
+A #Dataset defines a single 3D or 4D image, i.e. either a volume or a time series
+of volumes.  This encapsulation will provide easy manipulation of groups of raw
+image files including basic reconstruction.
+=end
+class RawImageDataset
+  # The directory that contains all the raw images and related files that make up
+  # this data set.
+  attr_reader :directory
+  # An array of #RawImageFile objects that compose the complete data set.
+  attr_reader :raw_image_files
+  # From the first raw image file in the dataset
+  attr_reader :series_description
+  # From the first raw image file in the dataset
+  attr_reader :rmr_number
+  # From the first raw image file in the dataset
+  attr_reader :timestamp
+  # A key string unique to a dataset composed of the rmr number and the timestamp.
+  attr_reader :dataset_key
+  # the file scanned
+  attr_reader :scanned_file
+  # the scanner source
+  attr_reader :scanner_source
+=begin rdoc
+  * dir: The directory containing the files.
+  * files: An array of #RawImageFile objects that compose the complete data set.
+  Initialization raises errors in several cases:
+  * directory doesn't exist => IOError
+  * any of the raw image files is not actually a RawImageFile => IndexError
+  * series description, rmr number, or timestamp cannot be extracted from the first RawImageFile => IndexError
+=end
+  def initialize(directory, raw_image_files)
+    @directory = File.expand_path(directory)
+    raise(IOError, "#{@directory} not found.") if not File.directory?(@directory)
+    raise(IOError, "No raw image files supplied.") if (raw_image_files.nil? or raw_image_files.empty?)
+    raw_image_files.each do |im|
+      raise(IndexError, im.to_s + " is not a RawImageFile") if im.class.to_s != "RawImageFile"
+    end
+    @raw_image_files = raw_image_files
+    @series_description = @raw_image_files.first.series_description
+    raise(IndexError, "No series description found") if @series_description.nil?
+    @rmr_number = @raw_image_files.first.rmr_number
+    raise(IndexError, "No rmr found") if @rmr_number.nil?
+    @timestamp = get_earliest_timestamp
+    raise(IndexError, "No timestamp found") if @timestamp.nil?
+    @dataset_key = @rmr_number + "::" + @timestamp.to_s
+    @scanned_file = @raw_image_files.first.filename
+    raise(IndexError, "No scanned file found") if @scanned_file.nil?
+    @scanner_source = @raw_image_files.first.source
+    raise(IndexError, "No scanner source found") if @scanner_source.nil?
+  end
+=begin rdoc
+Generates an SQL insert statement for this dataset that can be used to populate
+the Johnson Lab rails TransferScans application database backend.  The motivation
+for this is that many dataset inserts can be collected into one db transaction
+at the visit level, or even higher when doing a whole file system scan.
+=end
+  def db_insert(visit_id)
+    "INSERT INTO image_datasets
+    (rmr, series_description, path, timestamp, created_at, updated_at, visit_id,
+    glob, rep_time, bold_reps, slices_per_volume, scanned_file)
+    VALUES ('#{@rmr_number}', '#{@series_description}', '#{@directory}', '#{@timestamp.to_s}', '#{DateTime.now}',
+    '#{DateTime.now}', '#{visit_id}', '#{self.glob}', '#{@raw_image_files.first.rep_time}',
+    '#{@raw_image_files.first.bold_reps}', '#{@raw_image_files.first.num_slices}', '#{@scanned_file}')"
+  end
+  def db_update(dataset_id)
+    "UPDATE image_datasets SET
+     rmr = '#{@rmr_number}',
+     series_description = '#{@series_description}',
+     path = '#{@directory}',
+     timestamp = '#{@timestamp.to_s}',
+     updated_at = '#{DateTime.now.to_s}',
+     glob = '#{self.glob}',
+     rep_time = '#{@raw_image_files.first.rep_time}',
+     bold_reps = '#{@raw_image_files.first.bold_reps}',
+     slices_per_volume = '#{@raw_image_files.first.num_slices}',
+     scanned_file = '#{@scanned_file}'
+     WHERE id = '#{dataset_id}'"
+  end
+  def db_fetch
+    "SELECT * FROM image_datasets
+     WHERE rmr = '#{@rmr_number}'
+     AND path = '#{@directory}'
+     AND timestamp LIKE '#{@timestamp.to_s.split(/\+|Z/).first}%'"
+  end
+  def attributes_for_active_record
+    { :rmr => @rmr_number,
+      :series_description => @series_description,
+      :path => @directory,
+      :timestamp => @timestamp.to_s,
+      :glob => glob,
+      :rep_time => @raw_image_files.first.rep_time,
+      :bold_reps => @raw_image_files.first.bold_reps,
+      :slices_per_volume => @raw_image_files.first.num_slices,
+      :scanned_file => @scanned_file }
+  end
+=begin rdoc
+Returns a globbing wildcard that is used by to3D to gather files for
+reconstruction.  If no compatible glob is found for the data set, nil is returned.
+This is always the case for pfiles. For example if the first file in a data set is I.001, then:
+<tt>dataset.glob</tt>
+<tt>=> "I.*"</tt>
+including the quotes, which are necessary becuase some data sets (functional dicoms)
+have more component files than shell commands can handle.
+=end
+  def glob
+    case @raw_image_files.first.filename
+    when /^E.*dcm$/
+      return 'E*.dcm'
+    when /\.dcm$/
+      return '*.dcm'
+    when /^I\./
+      return 'I.*'
+    when /^I/
+      return 'I*.dcm'
+    when /\.0/
+      return '*.0*'
+    else
+      return nil
+    end
+  end
+private
+  # Gets the earliest timestamp among the raw image files in this dataset.
+  def get_earliest_timestamp
+    @timestamp = (@raw_image_files.sort_by { |i| i.timestamp }).first.timestamp
+  end
+end
+#### END OF CLASS ####