RubyGems - bio-vcf - Versions diffs - 0.8.0 → 0.9.4 - Mend

bio-vcf 0.8.0 → 0.9.4

Files changed (85) hide show

checksums.yaml +5 -5
data/.travis.yml +1 -11
data/Gemfile +4 -5
data/Gemfile.lock +28 -65
data/LICENSE.txt +1 -1
data/README.md +387 -107
data/RELEASE_NOTES.md +20 -0
data/RELEASE_NOTES.md~ +11 -0
data/Rakefile +3 -40
data/TAGS +115 -0
data/VERSION +1 -1
data/bin/bio-vcf +176 -109
data/bio-vcf.gemspec +14 -70
data/features/cli.feature +22 -4
data/features/diff_count.feature +0 -1
data/features/filter.feature +12 -0
data/features/multisample.feature +25 -0
data/features/somaticsniper.feature +2 -0
data/features/step_definitions/cli-feature.rb +15 -6
data/features/step_definitions/diff_count.rb +1 -1
data/features/step_definitions/multisample.rb +19 -0
data/features/step_definitions/somaticsniper.rb +9 -1
data/features/step_definitions/vcf_header.rb +48 -0
data/features/support/env.rb +0 -9
data/features/vcf_header.feature +35 -0
data/lib/bio-vcf.rb +2 -0
data/lib/bio-vcf/bedfilter.rb +43 -0
data/lib/bio-vcf/pcows.rb +303 -0
data/lib/bio-vcf/template.rb +75 -0
data/lib/bio-vcf/vcffile.rb +46 -0
data/lib/bio-vcf/vcfgenotypefield.rb +25 -20
data/lib/bio-vcf/vcfheader.rb +146 -6
data/lib/bio-vcf/vcfheader_line.rb +778 -0
data/lib/bio-vcf/vcfrecord.rb +56 -18
data/lib/bio-vcf/vcfsample.rb +27 -3
data/ragel/gen_vcfheaderline_parser.rl +165 -0
data/ragel/generate.sh +8 -0
data/template/vcf2json.erb +19 -7
data/template/vcf2json_full_header.erb +22 -0
data/template/vcf2json_use_meta.erb +41 -0
data/template/vcf2rdf_header.erb +24 -0
data/test/data/input/empty.vcf +2 -0
data/test/data/input/gatk_exome.vcf +237 -0
data/test/data/input/gatk_wgs.vcf +1000 -0
data/test/data/input/test.bed +632 -0
data/test/data/regression/empty-stderr.new +12 -0
data/test/data/regression/empty.new +2 -0
data/test/data/regression/empty.ref +2 -0
data/test/data/regression/eval_once-stderr.new +2 -0
data/test/data/regression/eval_once.new +1 -0
data/test/data/regression/eval_once.ref +1 -0
data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
data/test/data/regression/eval_r.info.dp.new +150 -0
data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
data/test/data/regression/ifilter_s.dp.new +31 -0
data/test/data/regression/pass1-stderr.new +10 -0
data/test/data/regression/pass1.new +88 -0
data/test/data/regression/pass1.ref +88 -0
data/test/data/regression/r.info.dp-stderr.new +4 -0
data/test/data/regression/r.info.dp.new +114 -0
data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
data/test/data/regression/rewrite.info.sample.new +150 -0
data/test/data/regression/s.dp-stderr.new +18 -0
data/test/data/regression/s.dp.new +145 -0
data/test/data/regression/seval_s.dp-stderr.new +10 -0
data/test/data/regression/seval_s.dp.new +36 -0
data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
data/test/data/regression/sfilter_seval_s.dp.new +31 -0
data/test/data/regression/thread4-stderr.new +10 -0
data/test/data/regression/thread4.new +150 -0
data/test/data/regression/thread4_4-stderr.new +25 -0
data/test/data/regression/thread4_4.new +130 -0
data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -1
data/test/data/regression/thread4_4_failed_filter.new +110 -0
data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
data/test/data/regression/vcf2json_full_header.new +225 -0
data/test/data/regression/vcf2json_full_header.ref +225 -0
data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
data/test/data/regression/vcf2json_use_meta.new +4697 -0
data/test/data/regression/vcf2json_use_meta.ref +4697 -0
data/test/performance/metrics.md +18 -1
data/test/stress/stress_test.sh +15 -0
data/test/tmp/test.vcf +12469 -0
metadata +65 -64

@@ -13,38 +13,49 @@ module BioVcf
       end
     end
-    # Set INFO fields (used by --rewrite)
-    def []= k, v
-      split_fields if not @h
-      kupper = k.upcase
-      @h[kupper] = v
-      @original_key[kupper] = k
-    end
-    def method_missing(m, *args, &block)
+    def [] k
       # split_fields if not @h
       # /#{m}=(?<value>[^;])/.@info
+      kupper = k.upcase
       v = if @h
-            @h[m.to_s.upcase]
+            @h[kupper]
           else
-            @info =~ /#{m.to_s}=([^;]+)/i
+            @info =~ /[\A;]#{k}=([^;]+)/i
             value = $1
             # p [m,value]
             # m = @info.match(/#{m.to_s.upcase}=(?<value>[^;]+)/) slower!
             # value = m[:value]
             if value == nil
               split_fields # no option but to split
-              @h[m.to_s.upcase]
+              @h[kupper]
             else
               value
             end
           end
       ConvertStringToValue::convert(v)
+    end
+    # Set INFO fields (used by --rewrite)
+    def []= k, v
+      split_fields if not @h
+      kupper = k.upcase
+      @h[kupper] = v
+      @original_key[kupper] = k
+    end
+    def fields
+      split_fields
+      @h.keys
+    end
+    def method_missing(m, *args, &block)
+      self[m.to_s]
     end
   private
     def split_fields
+      return @h if @h
       @h = {}
       @original_key = {}
       @info.split(/;/).each do |f|
@@ -151,6 +162,10 @@ module BioVcf
       @qual ||= @fields[5].to_f
     end
+    def filter
+      @filter ||= @fields[6]
+    end
     def info
       @info ||= VcfRecordParser.get_info(@fields[7])
     end
@@ -184,15 +199,21 @@ module BioVcf
     end
     def sample_by_index i
-      # p @fields
       raise "Can not index sample on parameter <#{i}>" if not i.kind_of?(Integer)
       @sample_by_index[i] ||= VcfGenotypeField.new(@fields[i+9],format,@header,ref,alt)
     end
     # Walk the samples. list contains an Array of int (the index)
     def each_sample(list = nil)
-      list = @header.samples_index_array() if not list
-      list.each { |i| yield VcfSample::Sample.new(self,sample_by_index(i.to_i)) }
+      @header.sample_subset_index(list).each { |i|
+        yield VcfSample::Sample.new(self,sample_by_index(i))
+      }
+    end
+    def samples
+      list = []
+      each_sample { |s| list << s }
+      list
     end
     def missing_samples?
@@ -229,6 +250,7 @@ module BioVcf
           $stderr.print "RECORD ERROR!\n"
           $stderr.print [@fields],"\n"
           $stderr.print expr,"\n"
+          $stderr.print "To ignore this error use the -i switch!\n"
         end
         if ignore_missing_data
           $stderr.print e.message if not quiet
@@ -239,19 +261,19 @@ module BioVcf
       end
     end
-    def filter expr, ignore_missing_data: true, quiet: false
+    def gfilter expr, ignore_missing_data: true, quiet: false
       begin
         if not respond_to?(:call_cached_filter)
           code =
           """
-          def call_cached_filter(rec,fields)
+          def call_cached_gfilter(rec,fields)
             r = rec
             #{expr}
           end
           """
           self.class.class_eval(code)
         end
-        res = call_cached_filter(self,@fields)
+        res = call_cached_gfilter(self,@fields)
         if res.kind_of?(Array)
           res.join("\t")
         else
@@ -262,6 +284,7 @@ module BioVcf
           $stderr.print "RECORD ERROR!\n"
           $stderr.print [@fields],"\n"
           $stderr.print expr,"\n"
+          $stderr.print "To ignore this error use the -i switch!\n"
         end
         if ignore_missing_data
           $stderr.print e.message if not quiet
@@ -272,6 +295,21 @@ module BioVcf
       end
     end
+    def add_to_filter_field str
+      filter = @fields[6]
+      if not filter or filter == '.' or filter == 'PASS'
+        filter = str
+      else
+        values = filter.split(/;/)
+        if not values.include?(str)
+          filter = filter +';'+str
+        end
+      end
+      filter = '.' if filter == nil or filter == ''
+      @fields[6] = filter
+      filter
+    end
     # Return the sample
     def method_missing(m, *args, &block)
       name = m.to_s

data/lib/bio-vcf/vcfsample.rb CHANGED

@@ -3,7 +3,7 @@ module BioVcf
     # Check whether a sample is empty (on the raw string value)
     def VcfSample::empty? s
-      s==nil or s == './.' or s == '' or s[0..2]=='./.'
+      s==nil or s == './.' or s == '' or s[0..2]=='./.' or s[0..1] == '.:'
     end
     class Sample
@@ -40,9 +40,24 @@ module BioVcf
       # Split GT into index values
       def gti
         v = fetch_values("GT")
-        v.split(/\//).map{ |v| (v=='.' ? nil : v.to_i) }
+        v = './.' if v == '.' #In case that you have a single missing value, make both as missing.
+        v.split(/[\/\|]/).map{ |v| (v=='.' ? nil : v.to_i) }
       end
+      def gtindex
+        v = fetch_values("GT")
+        return case v
+               when nil then nil
+               when '.' then nil
+               when './.' then nil
+               when '0/0' then 0
+               when '0/1' then 1
+               when '1/1' then 2
+               else
+                 raise "Unknown genotype #{v}"
+               end
+      end
       # Split GT into into a nucleode sequence
       def gts
         gti.map { |i| (i ? @rec.get_gt(i) : nil) }
@@ -51,7 +66,16 @@ module BioVcf
       def cache_method(name, &block)
         self.class.send(:define_method, name, &block)
       end
+      def [] name
+        if @format[name]
+          v = fetch_values(name)
+          return nil if VcfValue::empty?(v)
+          return ConvertStringToValue::convert(v)
+        end
+        nil
+      end
       def method_missing(m, *args, &block)
         name = m.to_s.upcase
         # p [:here,name,m ,@values]

data/ragel/gen_vcfheaderline_parser.rl ADDED

@@ -0,0 +1,165 @@
+# Ragel lexer for VCF-header
+#
+# This is compact a parser/lexer for the VCF header format. Bio-vcf
+# uses the parser to generate meta information that can be output to
+# (for example) JSON format. The advantage of using ragel as a state
+# engine is that it allows for easy parsing of key-value pairs with
+# syntax checking and, for example, escaped quotes in quoted string
+# values. This ragel parser/lexer generates valid Ruby; it should be
+# fairly trivial to generate python/C/JAVA instead. Note that this
+# edition validates ID and Number fields only.  Other fields are
+# dumped 'AS IS'.
+#
+# Note the .rb version is generated from ./ragel/gen_vcfheaderline_parser.rl
+#
+# by Pjotr Prins (c) 2014/2015
+module BioVcf
+  module VcfHeaderParser
+    module RagelKeyValues
+      def self.debug msg
+        # nothing
+	# $stderr.print "DEBUG: ",msg,"\n"
+      end
+=begin
+%%{
+  machine simple_lexer;
+  action mark { ts=p }
+  action endquoted {
+    emit.call(:value,data,ts,p)
+  }
+  action kw {
+    emit.call(:kw,data,ts,p)
+  }
+  squote = "'";
+  dquote = '"';
+  not_squote_or_escape = [^'\\];
+  not_dquote_or_escape = [^"\\];
+  escaped_something = /\\./;
+  ss = squote ( not_squote_or_escape | escaped_something )* >mark %endquoted squote;
+  dd = dquote ( not_dquote_or_escape | escaped_something )* >mark %endquoted dquote;
+  integer     = ('+'|'-')?digit+;
+  float       = ('+'|'-')?digit+'.'digit+;
+  assignment  = '=';
+  identifier  = ( alnum (alnum|'.'|'_')* );
+  version     = ( digit (alnum|'.'|'_'|'-')* );
+  str         = (ss|dd)* ;
+  boolean     = '.';
+  date        = str;
+  key_word    = ( ('Type'|'Description'|'Source'|identifier - ('ID'|'Number'|'length'|'Version'|'assembly'|'Date'|'CommandLineOptions')) >mark %{ emit.call(:key_word,data,ts,p) } );
+  any_value   = ( str|( integer|float|boolean|identifier >mark %{ emit.call(:value,data,ts,p) } ));
+  id_value   = ( identifier >mark %{ emit.call(:value,data,ts,p) } );
+  version_value  = ( str| ( version >mark %{ emit.call(:value,data,ts,p) } ));
+  date_value  = ( date );
+  gatk_value  = ( str );
+  number_value = ( ( integer|boolean|'A'|'R'|'G' ) >mark %{ emit.call(:value,data,ts,p) } );
+  id_kv     = ( ( ('ID'|'assembly') %kw '=' id_value ) %{ debug("ID FOUND") } @!{ error_code="Malformed ID"} );
+  version_kv = ( ( ('Version') %kw '=' version_value ) @!{ error_code="Version"} );
+  number_kv = ( ( ('Number'|'length') %kw '=' number_value ) @!{ error_code="Number"} );
+  date_kv =  ( ( ('Date') %kw '=' date_value ) %{ debug("DATE FOUND") } @!{ error_code="Date"} );
+  gatk_kv =  ( ( ('CommandLineOptions') %kw '=' gatk_value ) @!{ error_code="GATK"} );
+  key_value = ( id_kv | version_kv | date_kv | number_kv | gatk_kv | (key_word '=' any_value) ) %{ debug("KEY_VALUE found") } >mark @!{ error_code="unknown key-value " };
+  main := ( '##' ('FILTER'|'FORMAT'|'contig'|'INFO'|'ALT'|'GATKCommandLine') '=') (('<'|',') key_value )* '>';
+}%%
+=end
+%% write data;
+# %% this just fixes syntax highlighting...
+def self.run_lexer(buf, options = {})
+  do_debug = (options[:debug] == true)
+  $stderr.print "---> ",buf,"\n" if do_debug
+  data = buf.unpack("c*") if(buf.is_a?(String))
+  eof = data.length
+  values = []
+  stack = []
+  emit = lambda { |type, data, ts, p|
+    # Print the type and text of the last read token
+    # p ts,p
+    $stderr.print "EMITTED: #{type}: #{data[ts...p].pack('c*')}\n" if do_debug
+    values << [type,data[ts...p].pack('c*')]
+  }
+  error_code = nil
+  %% write init;
+  %% write exec;
+  raise "ERROR: "+error_code+" in "+buf if error_code
+  begin
+    res = {}
+    # p values
+    values.each_slice(2) do | a,b |
+      $stderr.print '*',a,b if do_debug
+      keyword = a[1]
+      value = b[1]
+      value = value.to_i if ['length','Epoch'].index(keyword)
+      res[keyword] = value
+      # p h[:value] if h[:name]==:identifier or h[:name]==:value or h[:name]==:string
+    end
+  rescue
+    print "ERROR: "
+    p values
+    raise
+  end
+  $stderr.print(res,"\n") if do_debug
+  res
+end
+    end
+  end
+end
+if __FILE__ == $0
+gatkcommandline = <<LINE1
+##GATKCommandLine=<ID=CombineVariants,Version=3.2-2-gec30cee,Date="Thu Oct 30 13:41:59 CET 2014",Epoch=1414672919266,CommandLineOptions="analysis_type=CombineVariants input_file=[] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/hpc/cog_bioinf/GENOMES/Homo_sapiens.GRCh37.GATK.illumina/Homo_sapiens.GRCh37.GATK.illumina.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=DYNAMIC_SEEK variant_index_parameter=-1 logging_level=INFO log_to_file=null help=false version=false variant=[(RodBindingCollection [(RodBinding name=variant source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_snps.vcf)]), (RodBindingCollection [(RodBinding name=variant2 source=/hpc/cog_bioinf/data/robert/testIAP/testSubsetExome/tmp/testSubsetExome.filtered_indels.vcf)])] out=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.gatk.engine.io.stubs.VariantContextWriterStub genotypemergeoption=UNSORTED filteredrecordsmergetype=KEEP_IF_ANY_UNFILTERED multipleallelesmergetype=BY_TYPE rod_priority_list=null printComplexMerges=false filteredAreUncalled=false minimalVCF=false excludeNonVariants=false setKey=set assumeIdenticalSamples=false minimumN=1 suppressCommandLineHeader=false mergeInfoWithMaxAC=false filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
+LINE1
+h = {}
+s = gatkcommandline.strip
+# print s,"\n"
+result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
+# h[result['ID']] = result
+# p result
+lines = <<LINES
+##FILTER=<ID=HaplotypeScoreHigh,Description="HaplotypeScore > 13.0">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth",Extra="Yes?">
+##FORMAT=<ID=DP4,Number=4,Type=Integer,Description="# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases">
+##INFO=<ID=PM,Number=0,Type=Flag,Description="Variant is Precious(Clinical,Pubmed Cited)">
+##INFO=<ID=VP,Number=1,Type=String,Description="Variation Property.  Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf",Source="dbsnp",Version="138">
+##INFO=<ID=GENEINFO,Number=1,Type=String,Description="Pairs each of gene symbol:gene id.  The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)">
+##INFO=<ID=CLNHGVS,Number=.,Type=String,Description="Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical  INFO tags.">
+##INFO=<ID=CLNHGVS1,Number=.,Type=String,Description="Variant names from \\"HGVS\\". The order of these 'variants' corresponds to the order of the info in the other clinical  INFO tags.">
+##contig=<ID=XXXY12>
+##contig=<ID=Y,length=59373566>
+LINES
+h = {}
+lines.strip.split("\n").each { |s|
+  # print s,"\n"
+  result = BioVcf::VcfHeaderParser::RagelKeyValues.run_lexer(s, debug: true)
+  h[result['ID']] = result
+  p result
+}
+p h
+raise "ERROR" if h != {"HaplotypeScoreHigh"=>{"ID"=>"HaplotypeScoreHigh", "Description"=>"HaplotypeScore > 13.0"}, "GT"=>{"ID"=>"GT", "Number"=>"1", "Type"=>"String", "Description"=>"Genotype"}, "DP"=>{"ID"=>"DP", "Number"=>"1", "Type"=>"Integer", "Description"=>"Total read depth", "Extra"=>"Yes?"}, "DP4"=>{"ID"=>"DP4", "Number"=>"4", "Type"=>"Integer", "Description"=>"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases"}, "PM"=>{"ID"=>"PM", "Number"=>"0", "Type"=>"Flag", "Description"=>"Variant is Precious(Clinical,Pubmed Cited)"}, "VP"=>{"ID"=>"VP", "Number"=>"1", "Type"=>"String", "Description"=>"Variation Property.  Documentation is at ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf", "Source"=>"dbsnp", "Version"=>"138"}, "GENEINFO"=>{"ID"=>"GENEINFO", "Number"=>"1", "Type"=>"String", "Description"=>"Pairs each of gene symbol:gene id.  The gene symbol and id are delimited by a colon (:), and each pair is delimited by a vertical bar (|)"}, "CLNHGVS"=>{"ID"=>"CLNHGVS", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from HGVS. The order of these variants corresponds to the order of the info in the other clinical  INFO tags."}, "CLNHGVS1"=>{"ID"=>"CLNHGVS1", "Number"=>".", "Type"=>"String", "Description"=>"Variant names from \\\"HGVS\\\". The order of these 'variants' corresponds to the order of the info in the other clinical  INFO tags."}, "XXXY12"=>{"ID"=>"XXXY12"}, "Y"=>{"ID"=>"Y", "length"=>59373566}}
+end # test

data/ragel/generate.sh ADDED

@@ -0,0 +1,8 @@
+#! /bin/bash
+ragel -R gen_vcfheaderline_parser.rl
+[ $? -ne 0 ] && exit 1
+ruby gen_vcfheaderline_parser.rb
+cp gen_vcfheaderline_parser.rb ../lib/bio-vcf/vcfheader_line.rb

data/template/vcf2json.erb CHANGED

@@ -1,8 +1,20 @@
+=HEADER
+<% require 'json' %>
 {
-  "seq:chr": "<%= rec.chrom %>" ,
-  "seq:pos": <%= rec.pos %> ,
-  "seq:ref": "<%= rec.ref %>" ,
-  "seq:alt": "<%= rec.alt[0] %>" ,
-  "seq:maf": <%= rec.info.maf[0] %> ,
-  "dp":      <%= rec.info.dp %> ,
-};
+    "HEADER": {
+	"options": <%= options.to_h.to_json %>,
+	"files": <%= ARGV %>,
+	"version": "<%= BIOVCF_VERSION %>"
+    },
+    "BODY": [
+=BODY
+	{
+	    "seq:chr": "<%= rec.chrom %>",
+	    "seq:pos": <%= rec.pos %>,
+	    "seq:ref": "<%= rec.ref %>",
+	    "seq:alt": "<%= rec.alt[0] %>",
+	    "dp":      <%= rec.info.dp %>
+	},
+=FOOTER
+    ]
+}

data/template/vcf2json_full_header.erb ADDED

@@ -0,0 +1,22 @@
+=HEADER
+<% require 'json' %>
+{
+    "HEADER": {
+	"options":  <%= options.to_h.to_json %>,
+	"files":    <%= ARGV %>,
+	"version":  "<%= BIOVCF_VERSION %>"
+    },
+    "COLUMNS": <%= header.column_names.to_json %>,
+    "META": <%= header.meta.to_json %>,
+    "BODY": [
+=BODY
+	{
+	    "seq:chr": "<%= rec.chrom %>" ,
+	    "seq:pos": <%= rec.pos %> ,
+	    "seq:ref": "<%= rec.ref %>" ,
+	    "seq:alt": "<%= rec.alt[0] %>"
+	    <% if rec.info.dp %> , "dp": <%= rec.info.dp %> <% end %>
+	},
+=FOOTER
+    ]
+}

data/template/vcf2json_use_meta.erb ADDED

@@ -0,0 +1,41 @@
+=HEADER
+<% require 'json' %>
+{
+    "HEADER": {
+	"options":<%= options.to_h.to_json %>,
+	"files": <%= ARGV %>,
+	"version": "<%= BIOVCF_VERSION %>"
+    },
+    "COLUMNS": <%= header.column_names.to_json %>,
+    "META": <%= header.meta.to_json %>,
+    "BODY": [
+=BODY
+	<% sample_num = 0
+	sample_name = nil
+	sample_size = header.samples.size
+	%>
+	{
+	    "seq:chr": "<%= rec.chrom %>" ,
+	    "seq:pos": <%= rec.pos %> ,
+	    "seq:ref": "<%= rec.ref %>" ,
+	    "seq:alt": "<%= rec.alt[0] %>"
+	    <% if rec.info.dp %> , "dp": <%= rec.info.dp %> <% end %>,
+	    "samples" : {
+		<% rec.each_sample do |s| %>
+		<% if not s.empty?
+		sample_name = header.samples[sample_num]
+		%>
+		<%= (sample_num!=0 ? "," : "" ) %>
+		<% sample_num += 1%>
+		"<%= sample_name %>": {
+		    <% header.meta['FORMAT'].each_key do |k| %>
+		    "<%= k %>": <%= s[k].to_json %><%= (k==header.meta['FORMAT'].keys.last ? "" : "," ) %>
+		    <% end %>
+		}
+		<% end %>
+		<% end %>
+	    }
+	},
+=FOOTER
+    ]
+}