RubyGems - bio-vcf - Versions diffs - 0.8.0 → 0.9.4 - Mend

bio-vcf 0.8.0 → 0.9.4

Files changed (85) hide show

checksums.yaml +5 -5
data/.travis.yml +1 -11
data/Gemfile +4 -5
data/Gemfile.lock +28 -65
data/LICENSE.txt +1 -1
data/README.md +387 -107
data/RELEASE_NOTES.md +20 -0
data/RELEASE_NOTES.md~ +11 -0
data/Rakefile +3 -40
data/TAGS +115 -0
data/VERSION +1 -1
data/bin/bio-vcf +176 -109
data/bio-vcf.gemspec +14 -70
data/features/cli.feature +22 -4
data/features/diff_count.feature +0 -1
data/features/filter.feature +12 -0
data/features/multisample.feature +25 -0
data/features/somaticsniper.feature +2 -0
data/features/step_definitions/cli-feature.rb +15 -6
data/features/step_definitions/diff_count.rb +1 -1
data/features/step_definitions/multisample.rb +19 -0
data/features/step_definitions/somaticsniper.rb +9 -1
data/features/step_definitions/vcf_header.rb +48 -0
data/features/support/env.rb +0 -9
data/features/vcf_header.feature +35 -0
data/lib/bio-vcf.rb +2 -0
data/lib/bio-vcf/bedfilter.rb +43 -0
data/lib/bio-vcf/pcows.rb +303 -0
data/lib/bio-vcf/template.rb +75 -0
data/lib/bio-vcf/vcffile.rb +46 -0
data/lib/bio-vcf/vcfgenotypefield.rb +25 -20
data/lib/bio-vcf/vcfheader.rb +146 -6
data/lib/bio-vcf/vcfheader_line.rb +778 -0
data/lib/bio-vcf/vcfrecord.rb +56 -18
data/lib/bio-vcf/vcfsample.rb +27 -3
data/ragel/gen_vcfheaderline_parser.rl +165 -0
data/ragel/generate.sh +8 -0
data/template/vcf2json.erb +19 -7
data/template/vcf2json_full_header.erb +22 -0
data/template/vcf2json_use_meta.erb +41 -0
data/template/vcf2rdf_header.erb +24 -0
data/test/data/input/empty.vcf +2 -0
data/test/data/input/gatk_exome.vcf +237 -0
data/test/data/input/gatk_wgs.vcf +1000 -0
data/test/data/input/test.bed +632 -0
data/test/data/regression/empty-stderr.new +12 -0
data/test/data/regression/empty.new +2 -0
data/test/data/regression/empty.ref +2 -0
data/test/data/regression/eval_once-stderr.new +2 -0
data/test/data/regression/eval_once.new +1 -0
data/test/data/regression/eval_once.ref +1 -0
data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
data/test/data/regression/eval_r.info.dp.new +150 -0
data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
data/test/data/regression/ifilter_s.dp.new +31 -0
data/test/data/regression/pass1-stderr.new +10 -0
data/test/data/regression/pass1.new +88 -0
data/test/data/regression/pass1.ref +88 -0
data/test/data/regression/r.info.dp-stderr.new +4 -0
data/test/data/regression/r.info.dp.new +114 -0
data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
data/test/data/regression/rewrite.info.sample.new +150 -0
data/test/data/regression/s.dp-stderr.new +18 -0
data/test/data/regression/s.dp.new +145 -0
data/test/data/regression/seval_s.dp-stderr.new +10 -0
data/test/data/regression/seval_s.dp.new +36 -0
data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
data/test/data/regression/sfilter_seval_s.dp.new +31 -0
data/test/data/regression/thread4-stderr.new +10 -0
data/test/data/regression/thread4.new +150 -0
data/test/data/regression/thread4_4-stderr.new +25 -0
data/test/data/regression/thread4_4.new +130 -0
data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -1
data/test/data/regression/thread4_4_failed_filter.new +110 -0
data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
data/test/data/regression/vcf2json_full_header.new +225 -0
data/test/data/regression/vcf2json_full_header.ref +225 -0
data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
data/test/data/regression/vcf2json_use_meta.new +4697 -0
data/test/data/regression/vcf2json_use_meta.ref +4697 -0
data/test/performance/metrics.md +18 -1
data/test/stress/stress_test.sh +15 -0
data/test/tmp/test.vcf +12469 -0
metadata +65 -64

@@ -0,0 +1,20 @@
+## ChangeLog v0.9.4 (2020????)
+This is an important maintenance release of bio-vcf:
++ Rename bioruby-vcf to bio-vcf and migrate project to [vcflib](https://github.com/vcflib/bio-vcf).
+## Older release notes
++ Getting ready for a 1.0 release
++ Released 0.9.2 as a gem
++ 0.9.1 removed a rare threading bug and cleanup on error
++ Added support for soft filters (request by Brad Chapman)
++ The outputter now writes (properly) in parallel with the parser
++ bio-vcf turns any VCF into JSON with header information, and
+  allows you to pipe that JSON directly into any JSON supporting
+  language, including Python and Javascript!
+## Older changes
+For older changes view the git [log](https://github.com/vcflib/bio-vcf/commits/master).

data/RELEASE_NOTES.md~ ADDED

@@ -0,0 +1,11 @@
+## RELEASE NOTES
+* Getting ready for a 1.0 release
+* Released 0.9.2 as a gem
+* 0.9.1 removed a rare threading bug and cleanup on error
+* Added support for soft filters (request by Brad Chapman)
+* The outputter now writes (properly) in parallel with the parser
+* bio-vcf turns any VCF into JSON with header information, and
+  allows you to pipe that JSON directly into any JSON supporting
+  language, including Python and Javascript!

data/Rakefile CHANGED

@@ -1,49 +1,12 @@
 # encoding: utf-8
 require 'rubygems'
-require 'bundler'
-begin
-  Bundler.setup(:default, :development)
-rescue Bundler::BundlerError => e
-  $stderr.puts e.message
-  $stderr.puts "Run `bundle install` to install missing gems"
-  exit e.status_code
-end
 require 'rake'
-require 'jeweler'
-Jeweler::Tasks.new do |gem|
-  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
-  gem.name = "bio-vcf"
-  gem.homepage = "http://github.com/pjotrp/bioruby-vcf"
-  gem.license = "MIT"
-  gem.summary = %Q{Fast multi-threaded VCF parser}
-  gem.description = %Q{Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting}
-  gem.email = "pjotr.public01@thebird.nl"
-  gem.authors = ["Pjotr Prins"]
-  # dependencies defined in Gemfile
-end
-Jeweler::RubygemsDotOrgTasks.new
-# require 'rspec/core'
-# require 'rspec/core/rake_task'
-# RSpec::Core::RakeTask.new(:spec) do |spec|
-#   spec.pattern = FileList['spec/**/*_spec.rb']
-# end
-# RSpec::Core::RakeTask.new(:rcov) do |spec|
-#   spec.pattern = 'spec/**/*_spec.rb'
-#   spec.rcov = true
-# end
-# require 'rake/testtask'
-# Rake::TestTask.new do |t|
-#   t.pattern = "spec/*_spec.rb"
-# end
 require 'cucumber/rake/task'
-Cucumber::Rake::Task.new(:features)
+Cucumber::Rake::Task.new(:features) do |t|
+  # t.cucumber_opts = "--bundler false"
+end
 task :default => :features

data/TAGS ADDED

@@ -0,0 +1,115 @@
+./bin/bio-vcf,0
+./lib/bio-vcf.rb,0
+./lib/bio-vcf/vcfgenotypefield.rb,1553
+module BioVcf::BioVcf1,0
+  class VcfNucleotides::BioVcf::VcfNucleotides7,167
+    def initialize::BioVcf::VcfNucleotides#BioVcf::VcfNucleotides.new8,193
+    def []::BioVcf::VcfNucleotides#[]13,284
+    def to_ary::BioVcf::VcfNucleotides#to_ary27,628
+    def max::BioVcf::VcfNucleotides#max32,742
+    def min::BioVcf::VcfNucleotides#min37,856
+    def sum::BioVcf::VcfNucleotides#sum42,975
+  class VcfAltInfo::BioVcf::VcfAltInfo50,1082
+    def initialize::BioVcf::VcfAltInfo#BioVcf::VcfAltInfo.new51,1103
+    def []::BioVcf::VcfAltInfo#[]56,1194
+    def to_ary::BioVcf::VcfAltInfo#to_ary70,1512
+    def max::BioVcf::VcfAltInfo#max75,1626
+    def min::BioVcf::VcfAltInfo#min79,1702
+    def sum::BioVcf::VcfAltInfo#sum83,1783
+  class VcfGenotypeField::BioVcf::VcfGenotypeField88,1850
+    def initialize::BioVcf::VcfGenotypeField#BioVcf::VcfGenotypeField.new89,1877
+    def dp4::BioVcf::VcfGenotypeField#dp496,2020
+    def ad::BioVcf::VcfGenotypeField#ad100,2098
+    def pl::BioVcf::VcfGenotypeField#pl104,2174
+    def bcount::BioVcf::VcfGenotypeField#bcount108,2250
+    def bq::BioVcf::VcfGenotypeField#bq112,2343
+    def amq::BioVcf::VcfGenotypeField#amq116,2424
+    def method_missing::BioVcf::VcfGenotypeField#method_missing120,2507
+  class VcfGenotypeFields::BioVcf::VcfGenotypeFields130,2709
+    def initialize::BioVcf::VcfGenotypeFields#BioVcf::VcfGenotypeFields.new131,2737
+    def []::BioVcf::VcfGenotypeFields#[]141,3021
+    def method_missing::BioVcf::VcfGenotypeFields#method_missing145,3136
+./lib/bio-vcf/vcfrdf.rb,156
+module BioVcf::BioVcf1,0
+  module VcfRdf::BioVcf::VcfRdf5,93
+    def VcfRdf::BioVcf::VcfRdf#VcfRdf7,112
+    def VcfRdf::BioVcf::VcfRdf#VcfRdf18,463
+./lib/bio-vcf/vcf.rb,27
+module BioVcf::BioVcf2,1
+./lib/bio-vcf/vcfline.rb,118
+module BioVcf::BioVcf1,0
+  module VcfLine::BioVcf::VcfLine2,16
+    def VcfLine.parse::BioVcf::VcfLine.parse5,82
+./lib/bio-vcf/vcfrecord.rb,1831
+module BioVcf::BioVcf1,0
+  class VcfRecordInfo::BioVcf::VcfRecordInfo3,17
+    def initialize::BioVcf::VcfRecordInfo#BioVcf::VcfRecordInfo.new4,41
+    def method_missing::BioVcf::VcfRecordInfo#method_missing9,163
+  module VcfRecordParser::BioVcf::VcfRecordParser18,329
+    def VcfRecordParser.get_format::BioVcf::VcfRecordParser.get_format20,397
+    def VcfRecordParser.get_info::BioVcf::VcfRecordParser.get_info25,517
+  module VcfRecordCall::BioVcf::VcfRecordCall30,592
+    def call_diff::BioVcf::VcfRecordCall#call_diff31,617
+    def call_nuc::BioVcf::VcfRecordCall#call_nuc35,705
+    def call_tumor_count::BioVcf::VcfRecordCall#call_tumor_count39,764
+    def call_tumor_relative_count::BioVcf::VcfRecordCall#call_tumor_relative_count43,833
+    def call_normal_count::BioVcf::VcfRecordCall#call_normal_count47,955
+    def index::BioVcf::VcfRecordCall#index51,1026
+  class VcfRecord::BioVcf::VcfRecord56,1125
+    attr_reader :header::BioVcf::VcfRecord#header60,1173
+    def initialize::BioVcf::VcfRecord#BioVcf::VcfRecord.new62,1198
+    def chrom::BioVcf::VcfRecord#chrom67,1292
+    def pos::BioVcf::VcfRecord#pos71,1332
+    def ids::BioVcf::VcfRecord#ids75,1384
+    def id::BioVcf::VcfRecord#id79,1443
+    def ref::BioVcf::VcfRecord#ref83,1476
+    def alt::BioVcf::VcfRecord#alt87,1524
+    def qual::BioVcf::VcfRecord#qual91,1582
+    def info::BioVcf::VcfRecord#info95,1636
+    def format::BioVcf::VcfRecord#format99,1711
+    def normal::BioVcf::VcfRecord#normal104,1848
+    def tumor::BioVcf::VcfRecord#tumor109,1997
+    def sample::BioVcf::VcfRecord#sample114,2134
+    def sample_by_name::BioVcf::VcfRecord#sample_by_name118,2227
+    def missing_samples?::BioVcf::VcfRecord#missing_samples?122,2283
+    def method_missing::BioVcf::VcfRecord#method_missing126,2341
+./lib/bio-vcf/variant.rb,470
+module BioVcf::BioVcf1,0
+  module Variant::BioVcf::Variant3,17
+    def Variant.diff::BioVcf::Variant.diff5,37
+    def Variant.threshold_diff::BioVcf::Variant.threshold_diff9,132
+    def Variant.relative_diff::BioVcf::Variant.relative_diff14,269
+    def Variant.relative_threshold_diff::BioVcf::Variant.relative_threshold_diff20,497
+    def Variant.index::BioVcf::Variant.index25,652
+    def Variant.apply_threshold::BioVcf::Variant.apply_threshold31,809
+./lib/bio-vcf/vcfheader.rb,598
+module BioVcf::BioVcf2,1
+  module VcfHeaderParser::BioVcf::VcfHeaderParser4,18
+    def VcfHeaderParser.get_column_names::BioVcf::VcfHeaderParser.get_column_names5,45
+  class VcfHeader::BioVcf::VcfHeader18,339
+    attr_reader :lines::BioVcf::VcfHeader#lines20,360
+    def initialize::BioVcf::VcfHeader#BioVcf::VcfHeader.new22,384
+    def add::BioVcf::VcfHeader#add26,430
+    def version::BioVcf::VcfHeader#version30,483
+    def column_names::BioVcf::VcfHeader#column_names34,578
+    def columns::BioVcf::VcfHeader#columns38,674
+    def samples::BioVcf::VcfHeader#samples42,735
+./features/step_definitions/diff_count.rb,0
+./features/step_definitions/bio-vcf_steps.rb,0
+./features/step_definitions/somaticsniper.rb,0
+./features/step_definitions/multisample.rb,0
+./features/support/env.rb,0

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.8.0
1	+ 0.9.4

data/bin/bio-vcf CHANGED

@@ -4,7 +4,7 @@
 # Author:: Pjotr Prins
 # License:: MIT
 #
-# Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>
+# Copyright (C) 2014-2020 Pjotr Prins <pjotr.prins@thebird.nl>
 USAGE = "Vcf parser"
@@ -15,22 +15,23 @@ VERSION_FILENAME=File.join(gempath,'VERSION')
 version = File.new(VERSION_FILENAME).read.chomp
 require 'bio-vcf'
+require 'bio-vcf/pcows'
 require 'optparse'
 require 'timeout'
 require 'fileutils'
-# Uncomment when using the bio-logger
+# Uncomment when using the bio-logger
 # require 'bio-logger'
 # log = Bio::Log::LoggerPlus.new 'vcf'
-# log.outputters = Bio::Log::Outputter.stderr
+# log.outputters = Bio::Log::Outputter.stderr
 # Bio::Log::CLI.logger('stderr')
 # Bio::Log::CLI.trace('info')
-options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000 }
+options = { show_help: false, source: 'https://github.com/pjotrp/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000, timeout: 180 }
 opts = OptionParser.new do |o|
   o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g.  #{File.basename($0)} < test/data/input/somaticsniper.vcf"
-  o.on('-i','--ignore-missing', 'Ignore missing data') do
+  o.on('-i','--ignore-missing', 'Ignore missing data') do
     options[:ignore_missing] = true
   end
   o.on('--filter cmd',String, 'Evaluate filter on each record') do |cmd|
@@ -57,6 +58,13 @@ opts = OptionParser.new do |o|
   o.on("--efilter-samples list", Array, "Exclude set - overrides exclude set") do |l|
     options[:efilter_samples] = l
   end
+  o.on('--add-filter name',String, 'Set/add filter field to name') do |name|
+    options[:add_filter] = name
+  end
+  o.on("--bed bedfile", String, "Filter on BED elements") do |bed|
+    options[:bed] = bed
+  end
   o.on('-e cmd', '--eval cmd',String, 'Evaluate command on each record') do |cmd|
     options[:eval] = cmd
@@ -64,6 +72,9 @@ opts = OptionParser.new do |o|
   o.on('--eval-once cmd',String, 'Evaluate command once (usually for header info)') do |cmd|
     options[:eval_once] = true
     options[:eval] = cmd
+    # options[:num_threads] = 1
+    # options[:thread_lines] = 1
+    options[:skip_header] = true
   end
   o.on('--seval cmd',String, 'Evaluate command on each sample') do |cmd|
     options[:seval] = cmd
@@ -80,7 +91,7 @@ opts = OptionParser.new do |o|
     options[:rdf] = true
     options[:skip_header] = true
   end
-  o.on("--num-threads [num]", Integer, "Multi-core version (default #{options[:num_threads]})") do |i|
+  o.on("--num-threads [num]", Integer, "Multi-core version (default ALL)") do |i|
     options[:num_threads] = i
   end
   o.on("--thread-lines num", Integer, "Fork thread on num lines (default #{options[:thread_lines]})") do |i|
@@ -92,8 +103,8 @@ opts = OptionParser.new do |o|
   o.on_tail("--tags list", String, "Add tags") do |s|
     options[:tags] = s
   end
-  o.on("--skip-header", "Do not output VCF header info") do
+  o.on("--skip-header", "Do not output VCF header info") do
     options[:skip_header] = true
   end
@@ -108,9 +119,16 @@ opts = OptionParser.new do |o|
     options[:template] = s
     options[:skip_header] = true
   end
-  # Uncomment the following when using the bio-logger
+  o.on("--add-header-tag", "Add bio-vcf status tag to header output") do |t|
+    options[:tag] = true
+  end
+  o.on("--timeout [num]", Integer, "Timeout waiting for thread to complete (default #{options[:timeout]})") do |i|
+    options[:timeout] = i
+  end
+  # Uncomment the following when using the bio-logger
   # o.separator ""
   # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
   #   Bio::Log::CLI.logger(name)
@@ -119,7 +137,16 @@ opts = OptionParser.new do |o|
   # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
   #   Bio::Log::CLI.trace(s)
   # end
-  #
+  #
+  o.on("--names", "Output sample names") do |q|
+    options[:quiet] = true
+    options[:num_threads] = nil
+    options[:eval_once] = true
+    options[:eval] = 'header.samples.join("\t")'
+    # options[:num_threads] = 1
+    # options[:thread_lines] = 1
+    options[:skip_header] = true
+  end
   o.on("--statistics", "Output statistics") do |q|
     options[:statistics] = true
     options[:num_threads] = nil
@@ -128,14 +155,15 @@ opts = OptionParser.new do |o|
     # Bio::Log::CLI.trace('error')
     options[:quiet] = true
   end
   o.on("-v", "--verbose", "Run verbosely") do |v|
     options[:verbose] = true
   end
-  # o.on("--debug", "Show debug messages") do |v|
-  #   Bio::Log::CLI.trace('debug')
-  # end
+  o.on("--debug", "Show debug messages and keep intermediate output") do |v|
+    # Bio::Log::CLI.trace('debug')
+    options[:debug] = true
+  end
   o.separator ""
   o.on_tail('-h', '--help', 'display this help and exit') do
@@ -145,10 +173,12 @@ end
 opts.parse!(ARGV)
-$stderr.print "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n" if !options[:quiet]
+BIOVCF_VERSION=version
+BIOVCF_BANNER = "bio-vcf #{version} (biogem Ruby #{RUBY_VERSION} with pcows) by Pjotr Prins 2015-2020\n"
+$stderr.print BIOVCF_BANNER if !options[:quiet]
-if options[:show_help]
-  print opts
+if options[:show_help]
+  print opts
   print USAGE
   exit 1
 end
@@ -161,18 +191,11 @@ $stderr.print "Options: ",options,"\n" if !options[:quiet]
 if options[:template]
   include BioVcf::RDF
+  require 'bio-vcf/template'
   fn = options[:template]
   raise "No template #{fn}!" if not File.exist?(fn)
-  template = ERB.new(File.read(fn))
-end
-if options[:num_threads] != 1
-  begin
-    require 'parallel'
-  rescue LoadError
-    $stderr.print "Error: Missing 'parallel' module. Install with command 'gem install parallel' if you want multiple threads\n"
-    options[:num_threads] = 1
-  end
+  # template = ERB.new(File.read(fn))
+  template = Bio::Template.new(fn)
 end
 stats = nil
@@ -185,6 +208,8 @@ end
 raise "Missing option --ifilter" if options[:ifilter_samples] and not options[:ifilter]
 raise "Missing option --efilter" if options[:efilter_samples] and not options[:efilter]
 raise "Missing option --sfilter" if options[:sfilter_samples] and not options[:sfilter]
+# raise "Soft filter not supported with --ifilter" if options[:add_filter] and options[:ifilter]
+# raise "Soft filter not supported with --efilter" if options[:add_filter] and options[:efilter]
 if options[:samples]
   samples = options[:samples].map { |s| s.to_i }
@@ -192,13 +217,14 @@ end
 include BioVcf
-# Parse the header section of a VCF file
+# Parse the header section of a VCF file (chomping STDIN)
 def parse_header line, samples, options
-  header = VcfHeader.new
+  header = VcfHeader.new(options[:debug])
   header.add(line)
   print line if not options[:skip_header]
   STDIN.each_line do | headerline |
     if headerline !~ /^#/
+      # If no records in VCF, we never get here
       line = headerline
       break # end of header
     end
@@ -206,12 +232,19 @@ def parse_header line, samples, options
     if not options[:skip_header]
       if headerline =~ /^#CHR/
         # The header before actual data contains the sample names, first inject the BioVcf meta information
-        print header.tag(options),"\n" if not options[:skip_header]
+        print header.tag(options),"\n" if options[:tag] and not options[:skip_header]
+        # Then the additional filter(s)
+        # ##FILTER=<ID=LowQual,Description="Low quality">
+        add_filter = options[:add_filter]
+        if add_filter
+          print "##FILTER=<ID=",add_filter,",Description=\"",options[:filter],"\">\n"
+        end
         selected = header.column_names
         if samples
           newfields = selected[0..8]
           samples.each do |s|
-            newfields << selected[s+9]
+            newfields << selected[s+9]
           end
           selected = newfields
         end
@@ -223,11 +256,15 @@ def parse_header line, samples, options
   end
   print header.printable_header_line(options[:set_header]),"\n" if options[:set_header]
   VcfRdf::header if options[:rdf]
+  if line =~ /^#/
+    # We did not read a record
+    line = nil
+  end
   return header,line
 end
-# Parse a VCF line and return the result as a string
-def parse_line line,header,options,samples,template,stats=nil
+# Parse a VCF line and return the (template) result as a string buffer
+def parse_line line,header,options,bedfilter,samples,template,stats=nil
   fields = VcfLine.parse(line)
   rec = VcfRecord.new(fields,header)
   r = rec # alias
@@ -236,9 +273,11 @@ def parse_line line,header,options,samples,template,stats=nil
   sfilter = options[:sfilter]
   efilter = options[:efilter]
   ifilter = options[:ifilter]
+  add_filter = options[:add_filter] # contains a filter name (soft filter)
   seval = options[:seval]
   ignore_missing = options[:ignore_missing]
   quiet = options[:quiet]
+  set_filter_field = nil
   if sfilter or efilter or ifilter or seval
     # check for samples
@@ -248,15 +287,32 @@ def parse_line line,header,options,samples,template,stats=nil
   # --------------------------
   # Filtering and set analysis
-  return if filter and not rec.filter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
-  if sfilter
+  if bedfilter
+    bed = bedfilter.contains(rec)
+    return if not bed
+  end
+  skip = lambda { |&m|
+    matched = m.call
+    if add_filter
+      set_filter_field = true if matched
+      false  # always continue processing with an add-filter
+    else
+      not matched
+    end
+  }
+  if filter
+    return if skip.call { rec.gfilter(filter,ignore_missing_data: ignore_missing,quiet: quiet) }
+  end
+  if sfilter # sample 'or' filter
     rec.each_sample(options[:sfilter_samples]) do | sample |
-      return if not sample.sfilter(sfilter,ignore_missing_data: ignore_missing,quiet: quiet)
+      return if skip.call { sample.sfilter(sfilter,ignore_missing_data: ignore_missing,quiet: quiet) }
     end
   end
-  if ifilter
+  if ifilter # include sample filter
     found = false
     rec.each_sample(options[:ifilter_samples]) do | sample |
       if sample.ifilter(ifilter,ignore_missing_data: ignore_missing,quiet: quiet)
@@ -265,12 +321,12 @@ def parse_line line,header,options,samples,template,stats=nil
       end
     end
     # Skip if there are no matches
-    return if not found
+    return if skip.call {found}
   end
-  if efilter
+  if efilter # exclude sample filter
     rec.each_sample(options[:efilter_samples]) do | sample |
-      return if not sample.efilter(efilter,ignore_missing_data: ignore_missing,quiet: quiet)
+      return if skip.call{ sample.efilter(efilter,ignore_missing_data: ignore_missing,quiet: quiet) }
     end
   end
@@ -278,18 +334,21 @@ def parse_line line,header,options,samples,template,stats=nil
   # -----------------------------
   # From here on decide on output
+  rec.add_to_filter_field(add_filter) if set_filter_field
   if samples
     # Select certain samples for output
     newfields = fields[0..8]
     samples.each do |s|
-      newfields << fields[s+9]
+      newfields << fields[s+9]
     end
     fields = newfields
   end
   if options[:eval] or seval
     begin
       results = nil # result string
-      if options[:eval]
+      if options[:eval]
         res = rec.eval(options[:eval],ignore_missing_data: ignore_missing,quiet: quiet)
         results = res if res
       end
@@ -307,23 +366,22 @@ def parse_line line,header,options,samples,template,stats=nil
       exit 1
     end
     return results.to_s+"\n" if results
-    exit(1) if options[:eval_once]  # <--- can this be reached?
   else
     if options[:rdf]
       # Output Turtle RDF
       VcfRdf::record(options[:id],rec,options[:tags])
     elsif options[:template]
-      # Ruby ERB template
+      # Use ERB template
       begin
-        template.result(binding)
+        template.body(binding)
       rescue Exception => e
         $stderr.print e,": ",fields,"\n"
         $stderr.print e.backtrace.inspect if options[:verbose]
-        raise
+        raise
       end
     elsif options[:rewrite]
       # Default behaviour prints VCF line, but rewrite info
-      eval(options[:rewrite])
+      eval(options[:rewrite])
       (fields[0..6]+[rec.info.to_s]+fields[8..-1]).join("\t")+"\n"
     elsif stats
       # do nothing
@@ -334,88 +392,97 @@ def parse_line line,header,options,samples,template,stats=nil
   end
 end
+CHUNK_SIZE = options[:thread_lines]
+pcows = PCOWS.new(options[:num_threads],CHUNK_SIZE,'bio-vcf',options[:timeout],
+                  options[:quiet],options[:debug])
 header = nil
 header_output_completed = false
-NUM_THREADS = options[:num_threads]
-CHUNK_SIZE = options[:thread_lines]
-CHUNK_NUM = (NUM_THREADS && NUM_THREADS>6 ? NUM_THREADS*4 : 24)
-chunks = []
-lines = []
+chunk_lines = []
 line_number=0
+if options[:bed]
+  bedfilter = BedFilter.new(options[:bed])
+end
 begin
+  # Define linear parser function (going through one chunk)
   process = lambda { | lines |
     res = []
     lines.each do | line |
-      res << parse_line(line,header,options,samples,template,stats)
+      res << parse_line(line,header,options,bedfilter,samples,template,stats)
     end
     res
   }
-  output = lambda { |collection|
-    collection.each do | result |
-      result.each { |line| print line }
-    end
-  } # end output
   # ---- Main loop
   STDIN.each_line do | line |
     line_number += 1
-    # ---- In this section header information is handled
+    # ---- Skip embedded headers down the line...
     next if header_output_completed and line =~ /^#/
-    if line =~ /^##fileformat=/ or line =~ /^#CHR/
-      header,line = parse_header(line,samples,options)
-    end
-    next if line =~ /^##/ # empty file
-    header_output_completed = true
-    if not options[:efilter_samples] and options[:ifilter_samples]
-      # Create exclude set as a complement of include set
-      options[:efilter_samples] = header.column_names[9..-1].fill{|i|i.to_s}-options[:ifilter_samples]
-    end
-    # ---- In this section the VCF variant lines are parsed
-    lines << line
-    if NUM_THREADS == 1
-      $stderr.print '.' if line_number % CHUNK_SIZE == 0 and not options[:quiet]
-      if lines.size > CHUNK_SIZE
-        process.call(lines).each { | l | print l }
-        lines = []
+    # ---- In the following section header information is handled -
+    #      this only happens once.
+    # ---- Parse the header lines (chomps from STDIN)
+    #      and returns header info and the current line
+    if line =~ /^#/
+      header, line = parse_header(line,samples,options)
+      if line.nil?
+        # No line after header, to there are no records to process
+        break
       end
-    else
-      if lines.size > CHUNK_SIZE
-        chunks << lines
-        if chunks.size > CHUNK_NUM
-          $stderr.print '.' if not options[:quiet]
-          out = Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
-            process.call(chunk)
-          }
-          chunks = []
-          # Output is forked to a separate process too
-          fork do
-            output.call out
-            STDOUT.flush
-            STDOUT.close
-            exit 0
-          end
-        end
-        lines = []
+    end
+    # p [line_number,line]
+    # ---- After the header continue processing
+    if not header_output_completed
+      # one-time post-header processing
+      if not options[:efilter_samples] and options[:ifilter_samples]
+        # Create exclude set as a complement of include set
+        options[:efilter_samples] = header.column_names[9..-1].fill{|i|i.to_s}-options[:ifilter_samples]
       end
+      print template.header(binding) if template
+      header_output_completed = true
+    end
+    if options[:eval_once]
+      # this happens if we only want one line evaluated - say to get
+      # the number of samples
+      print parse_line(line,header,options,bedfilter,samples,template,stats)
+      exit 0
+    end
+    # ---- Lines are collected in one buffer and the lines buffer
+    #      is added to the chunks list (for the threads)
+    chunk_lines << line
+    # ---- In the following section the VCF lines are parsed by chunks
+    #      The chunks may go into different threads
+    if chunk_lines.size >= CHUNK_SIZE
+      # ---- process one chunk
+      $stderr.print '.' if not options[:quiet]
+      pcows.wait_for_worker_slot()
+      pcows.submit_worker(process,chunk_lines)
+      pcows.process_output()
+      chunk_lines = []
     end
   end
-  $stderr.print '.' if not options[:quiet]
-  if NUM_THREADS == 1
-    process.call(lines).each { |l| print l}
-  else
-    chunks << lines
-    output.call Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
-      process.call(chunk)
-    }
-  end
+  pcows.submit_final_worker(process,chunk_lines)
+  pcows.wait_for_workers()
+  pcows.process_remaining_output()
+  print template.footer(binding) if template
   stats.print if stats
 rescue Exception => e
-  # $stderr.print line
-  $stderr.print e.message,"\n"
+  if e.message != 'exit'
+    $stderr.print "ERROR: "
+    $stderr.print e.message,"\n"
+  end
+  pcows.cleanup()
   raise if options[:verbose]
   exit 1
 end