RubyGems - metamri - Versions diffs - 0.1.0 - Mend

metamri 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

data/.gitignore +3 -0
data/Manifest +16 -0
data/README.rdoc +43 -0
data/Rakefile +34 -0
data/VERSION +1 -0
data/bin/import_study.rb +170 -0
data/bin/import_visit.rb +74 -0
data/lib/metamri.rb +6 -0
data/lib/mysql_tools.rb +33 -0
data/lib/raw_image_dataset.rb +147 -0
data/lib/raw_image_file.rb +418 -0
data/lib/series_description_parameters.rb +81 -0
data/lib/visit_raw_data_directory.rb +395 -0
data/metamri.gemspec +61 -0
data/test/raw_image_dataset_test.rb +46 -0
data/test/raw_image_file_test.rb +135 -0
data/test/visit_duplication_test.rb +24 -0
data/test/visit_test.rb +77 -0
metadata +76 -0

data/.gitignore ADDED

@@ -0,0 +1,3 @@
+*.log
+*.gem
+._*

data/Manifest ADDED

@@ -0,0 +1,16 @@
+bin/import_study.rb
+bin/import_visit.rb
+ImageData.gemspec
+lib/metamri.rb
+lib/mysql_tools.rb
+lib/raw_image_dataset.rb
+lib/raw_image_file.rb
+lib/series_description.rb
+lib/visit_raw_data_directory.rb
+Manifest
+Rakefile
+README.rdoc
+test/raw_image_dataset_test.rb
+test/raw_image_file_test.rb
+test/visit_duplication_test.rb
+test/visit_test.rb

data/README.rdoc ADDED

@@ -0,0 +1,43 @@
+== ImageData
+A small library that can be used to extract metadata from large collections of research MR imaging data sets.  Support is also provided to insert the metadata into a Wisconsin ADRC Imaging Core compatible database.  Several
+command line utilities are provided as well as a minimal API that is useful for building ruby on rails rake tasks.
+You will most likely be interested in either:
+= import_visit.rb CLU
+== Synopsis
+ A simple utility for importing imaging data collected during one visit into the WADRC Data Tools web
+ application.  Data from a visit is contained in one big directory that may have many subdirectories.
+ Each individual imaging scan may be composed of an entire directory of dicom files or one single p-file.
+ This utility scans through all of the image data sets and retrieved meta-data about the scans from their
+ header information.
+== Examples
+ import_visit.rb /path/to/raw/mri/data study.codename /path/to/db/db.sqlite3
+== Usage
+ import_visit.rb <raw_data_directory> <scan_procedure_codename> <database_file>
+ For help use: import_visit.rb -h
+== Options
+ -h, --help          Displays help message
+ -v, --visit         Visit raw data directory, absolute path
+ -p, --scan_procedure      scan_procedure codename, e.g. johnson.alz.visit1
+ -d, --database      Database file into which information will imported
+== Author
+ K.J. Kosmatka, kk4@medicine.wisc.edu
+== Copyright
+ Copyright (c) 2009 WADRC Imaging Core.
+or:
+= VisitRawDirectory class
+ see the doc directory

data/Rakefile ADDED

@@ -0,0 +1,34 @@
+#
+# To change this template, choose Tools | Templates
+# and open the template in the editor.
+require 'rubygems'
+require 'rake'
+# require 'echoe'
+#
+# Echoe.new('metamri', '0.1.0') do |p|
+#   p.description    = "Extraction of MRI metadata and insertion into compatible sqlite3 databases."
+#   p.url            = "http://github.com/brainmap/metamri"
+#   p.author         = "Kristopher J. Kosmatka"
+#   p.email          = "kk4@medicine.wisc.edu"
+#   p.ignore_pattern = ["nbproject/*"]
+#   p.development_dependencies = []
+# end
+#
+# Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gemspec|
+    gemspec.name = "metamri"
+    gemspec.summary = "MRI metadata"
+    gemspec.description = "Extraction of MRI metadata and insertion into compatible sqlite3 databases."
+    gemspec.email = "kk4@medicine.wisc.edu"
+    gemspec.homepage = "http://github.com/brainmap/metamri"
+    gemspec.authors = ["Kristopher J. Kosmatka"]
+  end
+  Jeweler::GemcutterTasks.new
+rescue LoadError
+  puts "Jeweler not available. Install it with: sudo gem install jeweler"
+end

data/VERSION ADDED

	@@ -0,0 +1 @@
1	+ 0.1.0

data/bin/import_study.rb ADDED

@@ -0,0 +1,170 @@
+#!/usr/bin/env ruby
+#
+# == Synopsis
+#   A simple utility for importing imaging data for an entire study into the WADRC Data Tools web
+#   application.  Scans each visit within a particular protocol and inserts all the appropriat meta-data
+#   into the given database. Can be run as a command line utility, or the function can be required by other packages.
+#
+# == Examples
+#   import_study.rb alz_1 /path/to/the/rails/db/production.sqlite3
+#
+# == Usage
+#   import_visit.rb <study_code> <database_file>
+#
+#   Study codes are one of:
+#      alz_1, alz_2, cms_wais, cms_uwmr, esprit_1, esprit_2, gallagher_pd, pib_pilot, ries_pilot, ries_1,
+#      tbi1000_1, tbi1000_2, tbi1000_3, tbiva, wrap140
+#
+#   For help use: import_study.rb -h
+#
+# == Options
+#   -h, --help          Displays help message
+#
+# == Author
+#   K.J. Kosmatka, kk4@medicine.wisc.edu
+#
+# == Copyright
+#   Copyright (c) 2009 WADRC Imaging Core.
+#
+$:.unshift File.join(File.dirname(__FILE__),'..','lib')
+require 'visit_raw_data_directory'
+require 'pathname'
+require 'rdoc/usage'
+require 'logger'
+#:stopdoc:
+STUDIES = {
+  :alz_1 =>        { :dir => '/Data/vtrak1/raw/alz_2000',
+                     :logfile => 'alz.visit1.scan.log',
+                     :filter => /^alz...$|^alz..._[AB]/i,
+                     :codename => 'johnson.alz.visit1'
+  },
+  :alz_2 =>        { :dir => '/Data/vtrak1/raw/alz_2000',
+                     :logfile => 'alz.visit2.scan.log',
+                     :filter => /^alz..._2$/,
+                     :codename => 'johnson.alz.visit2'
+  },
+  :bendlin_wmad => { :dir => '/Data/vtrak1/raw/bendlin_WMAD/ge3T_750_scanner',
+                     :logfile => 'bendlin.wmad.scan.log',
+                     :filter => /^wmad/,
+                     :codename => 'bendlin.wmad.visit1'
+  },
+  :cms_wais =>     { :dir => '/Data/vtrak1/raw/cms/wais',
+                     :logfile => 'cms.wais.scan.log',
+                     :filter => /^pc/,
+                     :codename => 'johnson.cms.visit1.wais'
+  },
+  :cms_uwmr =>     { :dir => '/Data/vtrak1/raw/cms/uwmr',
+                     :logfile => 'cms.uwmr.scan.log',
+                     :filter => /^cms...$/,
+                     :codename => 'johnson.cms.visit1.uwmr'
+  },
+  :esprit_1 =>     { :dir => '/Data/vtrak1/raw/esprit/baseline',
+                     :logfile => 'esprit.baseline.scan.log',
+                     :filter => /^esp3/,
+                     :codename => 'carlsson.esprit.visit1.baseline'
+  },
+  :esprit_2 =>     { :dir => '/Data/vtrak1/raw/esprit/9month',
+                     :logfile => 'esprit.9month.scan.log',
+                     :filter => /^esp3/,
+                     :codename => 'carlsson.esprit.visit2.9month'
+  },
+  :gallagher_pd => { :dir => '/Data/vtrak1/raw/gallagher.pd',
+                     :logfile => 'gallagher.scan.log',
+                     :filter => /^pd..._/,
+                     :codename => 'gallagher.pd.visit1'
+  },
+  :pib_pilot =>    { :dir => '/Data/vtrak1/raw/pib_pilot_mri',
+                     :logfile => 'pib.mri.pilot.scan.log',
+                     :filter => /^cpr0/,
+                     :codename => 'johnson.pibmripilot.visit1.uwmr'
+  },
+  :ries_1 =>       { :dir => '/Data/vtrak1/raw/ries.aware.visit1',
+                     :logfile => 'ries.aware.visit1.scan.log',
+                     :filter => /^awr0/,
+                     :codename => 'ries.aware.visit1'
+  },
+  :ries_pilot =>   { :dir => '/Data/vtrak1/raw/ries.aware.visit1',
+                     :logfile => 'ries.aware.pilot.scan.log',
+                     :filter => /^awrP/,
+                     :codename => 'ries.aware.pilot'
+  },
+  :tbi1000_1 =>    { :dir => '/Data/vtrak1/raw/tbi_1000',
+                     :logfile => 'tbi1000.visit1.scan.log',
+                     :filter => /^tbi...$/,
+                     :codename => 'johnson.tbi1000.visit1'
+  },
+  :tbi1000_2 =>    { :dir => '/Data/vtrak1/raw/tbi_1000',
+                     :logfile => 'tbi1000.visit2.scan.log',
+                     :filter => /^tbi..._2/,
+                     :codename => 'johnson.tbi1000.visit2'
+  },
+  :tbi1000_3 =>    { :dir => '/Data/vtrak1/raw/johnson.tbi.aware.visit3',
+                     :logfile => 'tbiaware.visit3.scan.log',
+                     :filter => /^tbi..._3$/,
+                     :codename => 'johnson.tbiaware.visit3'
+  },
+  :tbiva =>        { :dir => '/Data/vtrak1/raw/johnson.tbi-va.visit1',
+                     :logfile => 'tbiva.scan.log',
+                     :filter => /^tbi/,
+                     :codename => 'johnson.tbiva.visit1'
+  },
+  :wrap140 =>      { :dir => '/Data/vtrak1/raw/wrap140',
+                     :logfile => 'wrap140.scan.log',
+                     :filter => /^wrp/,
+                     :codename => 'johnson.wrap140.visit1'
+  }
+}
+#:startdoc:
+# == Function
+#   Imports an entire study.
+#
+# == Arguments
+# study -- a hash specifying the following keys:
+#   :dir => the directory holding all the individual visit directories for this study
+#   :logfile => a file name where logging can be written
+#   :filter => a regex that matches all of the visit directory names that should be scanned
+#   :codename => the study codename, e.g. 'johnson.alz.visit1'
+#
+# dbfile -- the database into which meta-data will be inserted
+#
+def import_study(study, dbfile)
+  studydir = Pathname.new(study[:dir])
+  log = Logger.new(study[:logfile], shift_age = 7, shift_size = 1048576)
+  studydir.entries.each do |visit|
+    next if visit.to_s =~ /^\./
+    next unless visit.to_s =~ study[:filter]
+    visitdir = studydir + visit
+    v = VisitRawDataDirectory.new( visitdir.to_s, study[:codename] )
+    begin
+      v.scan
+      v.db_insert!(dbfile)
+    rescue Exception => e
+      puts "There was a problem scanning a dataset in #{visitdir}... skipping."
+      puts "Exception message: #{e.message}"
+      log.error "There was a problem scanning a dataset in #{visitdir}... skipping."
+      log.error "Exception message: #{e.message}"
+    ensure
+      v = nil
+    end
+  end
+end
+if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
+  RDoc::usage() if (ARGV[0] == '-h' or ARGV.size != 2)
+  study = STUDIES[ARGV[0].to_sym]
+  raise(IndexError, "Study Not Recognized.") if study.nil?
+  dbfile = ARGV[1]
+  raise(IOError, "DB File not writable or not existant") unless File.writable?(dbfile)
+  begin
+    import_study(study, dbfile)
+  rescue IndexError, IOError => e
+    puts "There was an error importing study #{study}. #{e}"
+    raise e
+  end
+end

data/bin/import_visit.rb ADDED

@@ -0,0 +1,74 @@
+#!/usr/bin/env ruby
+#
+# == Synopsis
+#   A simple utility for importing imaging data collected during one visit into the WADRC Data Tools web
+#   application.  Data from a visit is contained in one big directory that may have many subdirectories.
+#   Each individual imaging scan may be composed of an entire directory of dicom files or one single p-file.
+#   This utility scans through all of the image data sets and retrieved meta-data about the scans from their
+#   header information.
+#
+# == Examples
+#   import_visit.rb /Data/vtrak1/raw/alz_2000/alz001 johnson.alz.visit1 /path/to/the/rails/db/production.sqlite3
+#   import_visit.rb /Data/vtrak1/raw/wrap140/wrp001_5917_03042008 johnson.wrap140.visit1 /path/to/the/rails/db/production.sqlite3
+#
+# == Usage
+#   import_visit.rb <raw_data_directory> <scan_procedure_codename> <database_file>
+#
+#   For help use: import_visit.rb -h
+#
+# == Options
+#   -h, --help          Displays help message
+#   -v, --visit         Visit raw data directory, absolute path
+#   -p, --scan_procedure      scan_procedure codename, e.g. johnson.alz.visit1
+#   -d, --database      Database file into which information will imported
+#
+# == Author
+#   K.J. Kosmatka, kk4@medicine.wisc.edu
+#
+# == Copyright
+#   Copyright (c) 2009 WADRC Imaging Core.
+#
+$:.unshift File.join(File.dirname(__FILE__),'..','lib')
+require 'visit_raw_data_directory'
+require 'pathname'
+require 'rdoc/usage'
+require 'logger'
+# == Function
+#   Imports imaging data collected during a single visit into the WADRC Data Tools web application database.
+#
+# == Usage
+#   import_visit(raw_directory, scan_procedure_codename, database)
+#
+# == Example
+#   import_visit('/Data/vtrak1/raw/alz_2000/alz001','johnson.alz.visit1','/path/to/the/rails/db/production.sqlite3')
+#
+def import_visit(raw_directory, scan_procedure_codename, database)
+  log = Logger.new(File.basename(raw_directory))
+  v = VisitRawDataDirectory.new(raw_directory, scan_procedure_codename)
+  puts "+++ Importing #{v.visit_directory} as part of #{v.scan_procedure_name} +++"
+  begin
+    v.scan
+    v.db_insert!(database)
+  rescue Exception => e
+    puts "There was a problem scanning a dataset in #{v.visit_directory}... skipping."
+    puts "Exception message: #{e.message}"
+    log.error "There was a problem scanning a dataset in #{v.visit_directory}... skipping."
+    log.error "Exception message: #{e.message}"
+  ensure
+    v = nil
+  end
+end
+if File.basename(__FILE__) == File.basename($PROGRAM_NAME)
+  RDoc::usage() if (ARGV[0] == '-h' or ARGV.size != 3)
+  raw_directory = ARGV[0]
+  scan_procedure_codename = ARGV[1]
+  database = ARGV[2]
+  raise(IOError, "Database #{database} not writable or doesn't exist.") unless File.writable?(database)
+  import_visit(raw_directory, scan_procedure_codename, database)
+end

data/lib/metamri.rb ADDED

@@ -0,0 +1,6 @@
+require 'raw_image_file'
+require 'raw_image_dataset'
+require 'visit_raw_data_directory'
+module Metamri
+end

data/lib/mysql_tools.rb ADDED

@@ -0,0 +1,33 @@
+require 'mysql'
+class Mysql
+  def summary
+    self.list_tables.each do |tbl|
+      next if tbl =~ /^tws/
+      puts "+" * 160
+      puts "%80s" % tbl
+      puts "+" * 160
+      columns = self.query("select * from #{tbl}").fetch_hash.keys
+      columns.in_chunks_of(6).each do |chunk|
+        puts "%-25s " * chunk.size % chunk
+      end
+      puts "\n\n"
+    end
+  end
+end
+class Array
+  def chunks(number_of_chunks)
+    chunks_of( (self.size/number_of_chunks.to_f).ceil )
+  end
+  def in_chunks_of(chunk_size)
+    nchunks = (self.size/chunk_size.to_f).ceil
+    chunks = Array.new(nchunks) { [] }
+    self.each_with_index do |item,index|
+      chunks[ index/chunk_size ] << item
+    end
+    return chunks
+  end
+end

data/lib/raw_image_dataset.rb ADDED

@@ -0,0 +1,147 @@
+require 'rubygems'
+require 'sqlite3'
+=begin rdoc
+A #Dataset defines a single 3D or 4D image, i.e. either a volume or a time series
+of volumes.  This encapsulation will provide easy manipulation of groups of raw
+image files including basic reconstruction.
+=end
+class RawImageDataset
+  # The directory that contains all the raw images and related files that make up
+  # this data set.
+  attr_reader :directory
+  # An array of #RawImageFile objects that compose the complete data set.
+  attr_reader :raw_image_files
+  # From the first raw image file in the dataset
+  attr_reader :series_description
+  # From the first raw image file in the dataset
+  attr_reader :rmr_number
+  # From the first raw image file in the dataset
+  attr_reader :timestamp
+  # A key string unique to a dataset composed of the rmr number and the timestamp.
+  attr_reader :dataset_key
+  # the file scanned
+  attr_reader :scanned_file
+  # the scanner source
+  attr_reader :scanner_source
+=begin rdoc
+  * dir: The directory containing the files.
+  * files: An array of #RawImageFile objects that compose the complete data set.
+  Initialization raises errors in several cases:
+  * directory doesn't exist => IOError
+  * any of the raw image files is not actually a RawImageFile => IndexError
+  * series description, rmr number, or timestamp cannot be extracted from the first RawImageFile => IndexError
+=end
+  def initialize(directory, raw_image_files)
+    @directory = File.expand_path(directory)
+    raise(IOError, "#{@directory} not found.") if not File.directory?(@directory)
+    raise(IOError, "No raw image files supplied.") if (raw_image_files.nil? or raw_image_files.empty?)
+    raw_image_files.each do |im|
+      raise(IndexError, im.to_s + " is not a RawImageFile") if im.class.to_s != "RawImageFile"
+    end
+    @raw_image_files = raw_image_files
+    @series_description = @raw_image_files.first.series_description
+    raise(IndexError, "No series description found") if @series_description.nil?
+    @rmr_number = @raw_image_files.first.rmr_number
+    raise(IndexError, "No rmr found") if @rmr_number.nil?
+    @timestamp = get_earliest_timestamp
+    raise(IndexError, "No timestamp found") if @timestamp.nil?
+    @dataset_key = @rmr_number + "::" + @timestamp.to_s
+    @scanned_file = @raw_image_files.first.filename
+    raise(IndexError, "No scanned file found") if @scanned_file.nil?
+    @scanner_source = @raw_image_files.first.source
+    raise(IndexError, "No scanner source found") if @scanner_source.nil?
+  end
+=begin rdoc
+Generates an SQL insert statement for this dataset that can be used to populate
+the Johnson Lab rails TransferScans application database backend.  The motivation
+for this is that many dataset inserts can be collected into one db transaction
+at the visit level, or even higher when doing a whole file system scan.
+=end
+  def db_insert(visit_id)
+    "INSERT INTO image_datasets
+    (rmr, series_description, path, timestamp, created_at, updated_at, visit_id,
+    glob, rep_time, bold_reps, slices_per_volume, scanned_file)
+    VALUES ('#{@rmr_number}', '#{@series_description}', '#{@directory}', '#{@timestamp.to_s}', '#{DateTime.now}',
+    '#{DateTime.now}', '#{visit_id}', '#{self.glob}', '#{@raw_image_files.first.rep_time}',
+    '#{@raw_image_files.first.bold_reps}', '#{@raw_image_files.first.num_slices}', '#{@scanned_file}')"
+  end
+  def db_update(dataset_id)
+    "UPDATE image_datasets SET
+     rmr = '#{@rmr_number}',
+     series_description = '#{@series_description}',
+     path = '#{@directory}',
+     timestamp = '#{@timestamp.to_s}',
+     updated_at = '#{DateTime.now.to_s}',
+     glob = '#{self.glob}',
+     rep_time = '#{@raw_image_files.first.rep_time}',
+     bold_reps = '#{@raw_image_files.first.bold_reps}',
+     slices_per_volume = '#{@raw_image_files.first.num_slices}',
+     scanned_file = '#{@scanned_file}'
+     WHERE id = '#{dataset_id}'"
+  end
+  def db_fetch
+    "SELECT * FROM image_datasets
+     WHERE rmr = '#{@rmr_number}'
+     AND path = '#{@directory}'
+     AND timestamp LIKE '#{@timestamp.to_s.split(/\+|Z/).first}%'"
+  end
+  def attributes_for_active_record
+    { :rmr => @rmr_number,
+      :series_description => @series_description,
+      :path => @directory,
+      :timestamp => @timestamp.to_s,
+      :glob => glob,
+      :rep_time => @raw_image_files.first.rep_time,
+      :bold_reps => @raw_image_files.first.bold_reps,
+      :slices_per_volume => @raw_image_files.first.num_slices,
+      :scanned_file => @scanned_file }
+  end
+=begin rdoc
+Returns a globbing wildcard that is used by to3D to gather files for
+reconstruction.  If no compatible glob is found for the data set, nil is returned.
+This is always the case for pfiles. For example if the first file in a data set is I.001, then:
+<tt>dataset.glob</tt>
+<tt>=> "I.*"</tt>
+including the quotes, which are necessary becuase some data sets (functional dicoms)
+have more component files than shell commands can handle.
+=end
+  def glob
+    case @raw_image_files.first.filename
+    when /^E.*dcm$/
+      return 'E*.dcm'
+    when /\.dcm$/
+      return '*.dcm'
+    when /^I\./
+      return 'I.*'
+    when /^I/
+      return 'I*.dcm'
+    when /\.0/
+      return '*.0*'
+    else
+      return nil
+    end
+  end
+private
+  # Gets the earliest timestamp among the raw image files in this dataset.
+  def get_earliest_timestamp
+    @timestamp = (@raw_image_files.sort_by { |i| i.timestamp }).first.timestamp
+  end
+end
+#### END OF CLASS ####