RubyGems - bio-vcf - Versions diffs - 0.8.1 → 0.9.5 - Mend

bio-vcf 0.8.1 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

checksums.yaml +5 -5
data/.travis.yml +1 -11
data/Gemfile +2 -8
data/LICENSE.txt +1 -1
data/README.md +467 -129
data/RELEASE_NOTES.md +27 -0
data/RELEASE_NOTES.md~ +11 -0
data/Rakefile +9 -42
data/TAGS +115 -0
data/VERSION +1 -1
data/bin/bio-vcf +156 -108
data/bio-vcf.gemspec +13 -75
data/features/cli.feature +22 -4
data/features/diff_count.feature +0 -1
data/features/filter.feature +12 -0
data/features/multisample.feature +12 -0
data/features/somaticsniper.feature +2 -0
data/features/step_definitions/cli-feature.rb +15 -6
data/features/step_definitions/diff_count.rb +1 -1
data/features/step_definitions/multisample.rb +19 -0
data/features/step_definitions/somaticsniper.rb +9 -1
data/features/step_definitions/vcf_header.rb +48 -0
data/features/support/env.rb +1 -11
data/features/vcf_header.feature +35 -0
data/lib/bio-vcf.rb +1 -0
data/lib/bio-vcf/pcows.rb +303 -0
data/lib/bio-vcf/vcffile.rb +46 -0
data/lib/bio-vcf/vcfgenotypefield.rb +19 -19
data/lib/bio-vcf/vcfheader.rb +137 -5
data/lib/bio-vcf/vcfheader_line.rb +778 -0
data/lib/bio-vcf/vcfrecord.rb +56 -18
data/lib/bio-vcf/vcfsample.rb +26 -2
data/lib/regressiontest.rb +11 -0
data/lib/regressiontest/cli_exec.rb +101 -0
data/ragel/gen_vcfheaderline_parser.rl +165 -0
data/ragel/generate.sh +8 -0
data/template/vcf2json.erb +16 -16
data/template/vcf2json_full_header.erb +22 -0
data/template/vcf2json_use_meta.erb +41 -0
data/test/data/input/empty.vcf +2 -0
data/test/data/input/gatk_exome.vcf +237 -0
data/test/data/input/gatk_wgs.vcf +1000 -0
data/test/data/input/test.bed +632 -0
data/test/data/regression/empty-stderr.new +12 -0
data/test/data/regression/empty.new +2 -0
data/test/data/regression/empty.ref +2 -0
data/test/data/regression/eval_once-stderr.new +2 -0
data/test/data/regression/eval_once.new +1 -0
data/test/data/regression/eval_once.ref +1 -0
data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
data/test/data/regression/eval_r.info.dp.new +150 -0
data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
data/test/data/regression/ifilter_s.dp.new +31 -0
data/test/data/regression/pass1-stderr.new +10 -0
data/test/data/regression/pass1.new +88 -0
data/test/data/regression/pass1.ref +88 -0
data/test/data/regression/r.info.dp-stderr.new +4 -0
data/test/data/regression/r.info.dp.new +114 -0
data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
data/test/data/regression/rewrite.info.sample.new +150 -0
data/test/data/regression/s.dp-stderr.new +18 -0
data/test/data/regression/s.dp.new +145 -0
data/test/data/regression/seval_s.dp-stderr.new +10 -0
data/test/data/regression/seval_s.dp.new +36 -0
data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
data/test/data/regression/sfilter_seval_s.dp.new +31 -0
data/test/data/regression/thread4-stderr.new +10 -0
data/test/data/regression/thread4.new +150 -0
data/test/data/regression/thread4_4-stderr.new +25 -0
data/test/data/regression/thread4_4.new +130 -0
data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
data/test/data/regression/thread4_4_failed_filter.new +110 -0
data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
data/test/data/regression/vcf2json_full_header.new +225 -0
data/test/data/regression/vcf2json_full_header.ref +225 -0
data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
data/test/data/regression/vcf2json_use_meta.new +4697 -0
data/test/data/regression/vcf2json_use_meta.ref +4697 -0
data/test/performance/metrics.md +18 -1
data/test/stress/stress_test.sh +15 -0
data/test/tmp/test.vcf +12469 -0
metadata +63 -64
data/Gemfile.lock +0 -81

data/lib/bio-vcf/vcfrecord.rb CHANGED

@@ -13,38 +13,49 @@ module BioVcf
       end
     end
-    # Set INFO fields (used by --rewrite)
-    def []= k, v
-      split_fields if not @h
-      kupper = k.upcase
-      @h[kupper] = v
-      @original_key[kupper] = k
-    end
-    def method_missing(m, *args, &block)
+    def [] k
       # split_fields if not @h
       # /#{m}=(?<value>[^;])/.@info
+      kupper = k.upcase
       v = if @h
-            @h[m.to_s.upcase]
+            @h[kupper]
           else
-            @info =~ /#{m.to_s}=([^;]+)/i
+            @info =~ /[\A;]#{k}=([^;]+)/i
             value = $1
             # p [m,value]
             # m = @info.match(/#{m.to_s.upcase}=(?<value>[^;]+)/) slower!
             # value = m[:value]
             if value == nil
               split_fields # no option but to split
-              @h[m.to_s.upcase]
+              @h[kupper]
             else
               value
             end
           end
       ConvertStringToValue::convert(v)
+    end
+    # Set INFO fields (used by --rewrite)
+    def []= k, v
+      split_fields if not @h
+      kupper = k.upcase
+      @h[kupper] = v
+      @original_key[kupper] = k
+    end
+    def fields
+      split_fields
+      @h.keys
+    end
+    def method_missing(m, *args, &block)
+      self[m.to_s]
     end
   private
     def split_fields
+      return @h if @h
       @h = {}
       @original_key = {}
       @info.split(/;/).each do |f|
@@ -151,6 +162,10 @@ module BioVcf
       @qual ||= @fields[5].to_f
     end
+    def filter
+      @filter ||= @fields[6]
+    end
     def info
       @info ||= VcfRecordParser.get_info(@fields[7])
     end
@@ -184,15 +199,21 @@ module BioVcf
     end
     def sample_by_index i
-      # p @fields
       raise "Can not index sample on parameter <#{i}>" if not i.kind_of?(Integer)
       @sample_by_index[i] ||= VcfGenotypeField.new(@fields[i+9],format,@header,ref,alt)
     end
     # Walk the samples. list contains an Array of int (the index)
     def each_sample(list = nil)
-      list = @header.samples_index_array() if not list
-      list.each { |i| yield VcfSample::Sample.new(self,sample_by_index(i.to_i)) }
+      @header.sample_subset_index(list).each { |i|
+        yield VcfSample::Sample.new(self,sample_by_index(i))
+      }
+    end
+    def samples
+      list = []
+      each_sample { |s| list << s }
+      list
     end
     def missing_samples?
@@ -229,6 +250,7 @@ module BioVcf
           $stderr.print "RECORD ERROR!\n"
           $stderr.print [@fields],"\n"
           $stderr.print expr,"\n"
+          $stderr.print "To ignore this error use the -i switch!\n"
         end
         if ignore_missing_data
           $stderr.print e.message if not quiet
@@ -239,19 +261,19 @@ module BioVcf
       end
     end
-    def filter expr, ignore_missing_data: true, quiet: false
+    def gfilter expr, ignore_missing_data: true, quiet: false
       begin
         if not respond_to?(:call_cached_filter)
           code =
           """
-          def call_cached_filter(rec,fields)
+          def call_cached_gfilter(rec,fields)
             r = rec
             #{expr}
           end
           """
           self.class.class_eval(code)
         end
-        res = call_cached_filter(self,@fields)
+        res = call_cached_gfilter(self,@fields)
         if res.kind_of?(Array)
           res.join("\t")
         else
@@ -262,6 +284,7 @@ module BioVcf
           $stderr.print "RECORD ERROR!\n"
           $stderr.print [@fields],"\n"
           $stderr.print expr,"\n"
+          $stderr.print "To ignore this error use the -i switch!\n"
         end
         if ignore_missing_data
           $stderr.print e.message if not quiet
@@ -272,6 +295,21 @@ module BioVcf
       end
     end
+    def add_to_filter_field str
+      filter = @fields[6]
+      if not filter or filter == '.' or filter == 'PASS'
+        filter = str
+      else
+        values = filter.split(/;/)
+        if not values.include?(str)
+          filter = filter +';'+str
+        end
+      end
+      filter = '.' if filter == nil or filter == ''
+      @fields[6] = filter
+      filter
+    end
     # Return the sample
     def method_missing(m, *args, &block)
       name = m.to_s

data/lib/bio-vcf/vcfsample.rb CHANGED

@@ -3,7 +3,7 @@ module BioVcf
     # Check whether a sample is empty (on the raw string value)
     def VcfSample::empty? s
-      s==nil or s == './.' or s == '' or s[0..2]=='./.'
+      s==nil or s == './.' or s == '' or s[0..2]=='./.' or s[0..1] == '.:'
     end
     class Sample
@@ -40,9 +40,24 @@ module BioVcf
       # Split GT into index values
       def gti
         v = fetch_values("GT")
+        v = './.' if v == '.' #In case that you have a single missing value, make both as missing.
         v.split(/[\/\|]/).map{ |v| (v=='.' ? nil : v.to_i) }
       end
+      def gtindex
+        v = fetch_values("GT")
+        return case v
+               when nil then nil
+               when '.' then nil
+               when './.' then nil
+               when '0/0' then 0
+               when '0/1' then 1
+               when '1/1' then 2
+               else
+                 raise "Unknown genotype #{v}"
+               end
+      end
       # Split GT into into a nucleode sequence
       def gts
         gti.map { |i| (i ? @rec.get_gt(i) : nil) }
@@ -51,7 +66,16 @@ module BioVcf
       def cache_method(name, &block)
         self.class.send(:define_method, name, &block)
       end
+      def [] name
+        if @format[name]
+          v = fetch_values(name)
+          return nil if VcfValue::empty?(v)
+          return ConvertStringToValue::convert(v)
+        end
+        nil
+      end
       def method_missing(m, *args, &block)
         name = m.to_s.upcase
         # p [:here,name,m ,@values]

data/lib/regressiontest.rb ADDED

@@ -0,0 +1,11 @@
+# Please require your code below, respecting the naming conventions in the
+# bioruby directory tree.
+#
+# For example, say you have a plugin named bio-plugin, the only uncommented
+# line in this file would be
+#
+#   require 'bio/bio-plugin/plugin'
+#
+# In this file only require other files. Avoid other source code.
+require 'regressiontest/cli_exec'

data/lib/regressiontest/cli_exec.rb ADDED

@@ -0,0 +1,101 @@
+require 'fileutils'
+module RegressionTest
+  DEFAULT_TESTDIR = "test/data/regression"
+  # Regression test runner compares output in ./test/data/regression
+  # (by default).  The convention is to have a file with names .ref
+  # (reference) and create .new
+  #
+  # You can add an :ignore regex option which ignores lines in the
+  # comparson files matching a regex
+  #
+  # :timeout sets the time out for calling a system command
+  #
+  # :should_fail expects the system command to return a non-zero
+  module CliExec
+    FilePair = Struct.new(:outfn,:reffn)
+    def CliExec::exec command, testname, options = {}
+      # ---- Find .ref file
+      fullname = DEFAULT_TESTDIR + "/" + testname
+      basefn = if File.exist?(testname+".ref") || File.exist?(testname+"-stderr.ref")
+                testname
+              elsif File.exist?(fullname + ".ref") || File.exist?(fullname+"-stderr.ref")
+                FileUtils.mkdir_p DEFAULT_TESTDIR
+                fullname
+              else
+                raise "Can not find reference file for #{testname} - expected #{fullname}.ref"
+              end
+      std_out = FilePair.new(basefn + ".new", basefn + ".ref")
+      std_err = FilePair.new(basefn + "-stderr.new", basefn + "-stderr.ref")
+      files = [std_out,std_err]
+      # ---- Create .new file
+      cmd = command + " > #{std_out.outfn} 2>#{std_err.outfn}"
+      $stderr.print cmd,"\n"
+      exec_ret = nil
+      if options[:timeout] && options[:timeout] > 0
+        Timeout.timeout(options[:timeout]) do
+          begin
+            exec_ret = Kernel.system(cmd)
+          rescue Timeout::Error
+            $stderr.print cmd, " failed to finish in under #{options[:timeout]}\n"
+            return false
+          end
+        end
+      else
+        exec_ret = Kernel.system(cmd)
+      end
+      expect_fail = (options[:should_fail] != nil)
+      if !expect_fail and exec_ret==0
+        $stderr.print cmd," returned an error\n"
+        return false
+      end
+      if expect_fail and exec_ret
+        $stderr.print cmd," did not return an error\n"
+        return false
+      end
+      if options[:ignore]
+        regex = options[:ignore]
+        files.each do |f|
+          outfn = f.outfn
+          outfn1 = outfn + ".1"
+          FileUtils.mv(outfn,outfn1)
+          f1 = File.open(outfn1)
+          f2 = File.open(outfn,"w")
+          f1.each_line do | line |
+            f2.print(line) if line !~ /#{regex}/
+          end
+          f1.close
+          f2.close
+          FileUtils::rm(outfn1)
+        end
+      end
+      # ---- Compare the two files
+      files.each do |f|
+        next unless File.exist?(f.reffn)
+        return false unless compare_files(f.outfn,f.reffn,options[:ignore])
+      end
+      return true
+    end
+    def CliExec::compare_files fn1, fn2, ignore = nil
+      if not File.exist?(fn2)
+        FileUtils::cp(fn1,fn2)
+        true
+      else
+        cmd = "diff #{fn2} #{fn1}"
+        $stderr.print cmd+"\n"
+        return true if Kernel.system(cmd) == true
+        # Hmmm. We have a different result. We are going to try again
+        # because sometimes threads have not completed
+        sleep 0.25
+        return true if Kernel.system(cmd) == true
+        $stderr.print "If it is correct, execute \"cp #{fn1} #{fn2}\", and run again"
+        false
+      end
+    end
+  end
+end

data/ragel/gen_vcfheaderline_parser.rl ADDED

@@ -0,0 +1,165 @@
+# Ragel lexer for VCF-header
+#
+# This is compact a parser/lexer for the VCF header format. Bio-vcf
+# uses the parser to generate meta information that can be output to
+# (for example) JSON format. The advantage of using ragel as a state
+# engine is that it allows for easy parsing of key-value pairs with
+# syntax checking and, for example, escaped quotes in quoted string
+# values. This ragel parser/lexer generates valid Ruby; it should be
+# fairly trivial to generate python/C/JAVA instead. Note that this
+# edition validates ID and Number fields only.  Other fields are
+# dumped 'AS IS'.
+#
+# Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
+#
+# by Pjotr Prins (c) 2014/2015
+module BioVcf
+  module VcfHeaderParser
+    module RagelKeyValues
+      def self.debug msg
+        # nothing
+	# $stderr.print "DEBUG: ",msg,"\n"
+      end
+=begin
+%%{
+  machine simple_lexer;
+  action mark { ts=p }
+  action endquoted {
+    emit.call(:value,data,ts,p)
+  }
+  action kw {
+    emit.call(:kw,data,ts,p)
+  }
+  squote = "'";
+  dquote = '"';
+  not_squote_or_escape = [^'\\];
+  not_dquote_or_escape = [^"\\];
+  escaped_something = /\\./;
+  ss = squote ( not_squote_or_escape | escaped_something )* >mark %endquoted squote;
+  dd = dquote ( not_dquote_or_escape | escaped_something )* >mark %endquoted dquote;
+  integer     = ('+'|'-')?digit+;
+  float       = ('+'|'-')?digit+'.'digit+;
+  assignment  = '=';
+  identifier  = ( alnum (alnum|'.'|'_')* );
+  version     = ( digit (alnum|'.'|'_'|'-')* );
+  str         = (ss|dd)* ;
+  boolean     = '.';
+  date        = str;
+  key_word    = ( ('Type'|'Description'|'Source'|identifier - ('ID'|'Number'|'length'|'Version'|'assembly'|'Date'|'CommandLineOptions')) >mark %{ emit.call(:key_word,data,ts,p) } );
+  any_value   = ( str|( integer|float|boolean|identifier >mark %{ emit.call(:value,data,ts,p) } ));
+  id_value   = ( identifier >mark %{ emit.call(:value,data,ts,p) } );
+  version_value  = ( str| ( version >mark %{ emit.call(:value,data,ts,p) } ));
+  date_value  = ( date );
+  gatk_value  = ( str );
+  number_value = ( ( integer|boolean|'A'|'R'|'G' ) >mark %{ emit.call(:value,data,ts,p) } );
+  id_kv     = ( ( ('ID'|'assembly') %kw '=' id_value ) %{ debug("ID FOUND") } @!{ error_code="Malformed ID"} );
+  version_kv = ( ( ('Version') %kw '=' version_value ) @!{ error_code="Version"} );
+  number_kv = ( ( ('Number'|'length') %kw '=' number_value ) @!{ error_code="Number"} );
+  date_kv =  ( ( ('Date') %kw '=' date_value ) %{ debug("DATE FOUND") } @!{ error_code="Date"} );
+  gatk_kv =  ( ( ('CommandLineOptions') %kw '=' gatk_value ) @!{ error_code="GATK"} );
+  key_value = ( id_kv | version_kv | date_kv | number_kv | gatk_kv | (key_word '=' any_value) ) %{ debug("KEY_VALUE found") } >mark @!{ error_code="unknown key-value " };
+  main := ( '##' ('FILTER'|'FORMAT'|'contig'|'INFO'|'ALT'|'GATKCommandLine') '=') (('<'|',') key_value )* '>';
+}%%
+=end
+%% write data;
+# %% this just fixes syntax highlighting...
+def self.run_lexer(buf, options = {})
+  do_debug = (options[:debug] == true)
+  $stderr.print "---> ",buf,"\n" if do_debug
+  data = buf.unpack("c*") if(buf.is_a?(String))
+  eof = data.length
+  values = []
+  stack = []
+  emit = lambda { |type, data, ts, p|
+    # Print the type and text of the last read token
+    # p ts,p
+    $stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
+    values << [type,data[ts...p].pack('c*')]
+  }
+  error_code = nil
+  %% write init;
+  %% write exec;
+  raise "ERROR: "+error_code+" in "+buf if error_code
+  begin
+    res = {}
+    # p values
+    values.each_slice(2) do | a,b |
+      $stderr.print '*',a,b if do_debug
+      keyword = a[1]
+      value = b[1]
+      value = value.to_i if ['length','Epoch'].index(keyword)
+      res[keyword] = value
+      # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
+    end
+  rescue
+    print "ERROR: "
+    p values
+    raise
+  end
+  $stderr.print(res,"\n") if do_debug
+  res
+end
+    end
+  end
+end
+if __FILE__ == $0
+gatkcommandline = <<LINE1
+##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
+LINE1
+h = {}
+s = gatkcommandline.strip
+# print s,"\n"
+result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
+# h[result['ID']] = result
+# p result
+lines = <<LINES
+##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
+##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
+##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property.  Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
+##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id.  The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
+##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical  INFO tags.">
+##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical  INFO tags.">
+##contig=<ID=XXXY12>
+##contig=<ID=Y,length=59373566>
+LINES
+h = {}
+lines.strip.split("\n").each { |s|
+  # print s,"\n"
+  result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
+  h[result['ID']] = result
+  p result
+}
+p h
+raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property.  Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id.  The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical  INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical  INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
+end # test