RubyGems - bio-vcf - Versions diffs - 0.8.1 → 0.9.5 - Mend

bio-vcf 0.8.1 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

checksums.yaml +5 -5
data/.travis.yml +1 -11
data/Gemfile +2 -8
data/LICENSE.txt +1 -1
data/README.md +467 -129
data/RELEASE_NOTES.md +27 -0
data/RELEASE_NOTES.md~ +11 -0
data/Rakefile +9 -42
data/TAGS +115 -0
data/VERSION +1 -1
data/bin/bio-vcf +156 -108
data/bio-vcf.gemspec +13 -75
data/features/cli.feature +22 -4
data/features/diff_count.feature +0 -1
data/features/filter.feature +12 -0
data/features/multisample.feature +12 -0
data/features/somaticsniper.feature +2 -0
data/features/step_definitions/cli-feature.rb +15 -6
data/features/step_definitions/diff_count.rb +1 -1
data/features/step_definitions/multisample.rb +19 -0
data/features/step_definitions/somaticsniper.rb +9 -1
data/features/step_definitions/vcf_header.rb +48 -0
data/features/support/env.rb +1 -11
data/features/vcf_header.feature +35 -0
data/lib/bio-vcf.rb +1 -0
data/lib/bio-vcf/pcows.rb +303 -0
data/lib/bio-vcf/vcffile.rb +46 -0
data/lib/bio-vcf/vcfgenotypefield.rb +19 -19
data/lib/bio-vcf/vcfheader.rb +137 -5
data/lib/bio-vcf/vcfheader_line.rb +778 -0
data/lib/bio-vcf/vcfrecord.rb +56 -18
data/lib/bio-vcf/vcfsample.rb +26 -2
data/lib/regressiontest.rb +11 -0
data/lib/regressiontest/cli_exec.rb +101 -0
data/ragel/gen_vcfheaderline_parser.rl +165 -0
data/ragel/generate.sh +8 -0
data/template/vcf2json.erb +16 -16
data/template/vcf2json_full_header.erb +22 -0
data/template/vcf2json_use_meta.erb +41 -0
data/test/data/input/empty.vcf +2 -0
data/test/data/input/gatk_exome.vcf +237 -0
data/test/data/input/gatk_wgs.vcf +1000 -0
data/test/data/input/test.bed +632 -0
data/test/data/regression/empty-stderr.new +12 -0
data/test/data/regression/empty.new +2 -0
data/test/data/regression/empty.ref +2 -0
data/test/data/regression/eval_once-stderr.new +2 -0
data/test/data/regression/eval_once.new +1 -0
data/test/data/regression/eval_once.ref +1 -0
data/test/data/regression/eval_r.info.dp-stderr.new +10 -0
data/test/data/regression/eval_r.info.dp.new +150 -0
data/test/data/regression/ifilter_s.dp-stderr.new +34 -0
data/test/data/regression/ifilter_s.dp.new +31 -0
data/test/data/regression/pass1-stderr.new +10 -0
data/test/data/regression/pass1.new +88 -0
data/test/data/regression/pass1.ref +88 -0
data/test/data/regression/r.info.dp-stderr.new +4 -0
data/test/data/regression/r.info.dp.new +114 -0
data/test/data/regression/rewrite.info.sample-stderr.new +10 -0
data/test/data/regression/rewrite.info.sample.new +150 -0
data/test/data/regression/s.dp-stderr.new +18 -0
data/test/data/regression/s.dp.new +145 -0
data/test/data/regression/seval_s.dp-stderr.new +10 -0
data/test/data/regression/seval_s.dp.new +36 -0
data/test/data/regression/sfilter_seval_s.dp-stderr.new +18 -0
data/test/data/regression/sfilter_seval_s.dp.new +31 -0
data/test/data/regression/thread4-stderr.new +10 -0
data/test/data/regression/thread4.new +150 -0
data/test/data/regression/thread4_4-stderr.new +25 -0
data/test/data/regression/thread4_4.new +130 -0
data/test/data/regression/thread4_4_failed_filter-stderr.new +5 -0
data/test/data/regression/thread4_4_failed_filter-stderr.ref +5 -2
data/test/data/regression/thread4_4_failed_filter.new +110 -0
data/test/data/regression/vcf2json_full_header-stderr.new +10 -0
data/test/data/regression/vcf2json_full_header.new +225 -0
data/test/data/regression/vcf2json_full_header.ref +225 -0
data/test/data/regression/vcf2json_use_meta-stderr.new +10 -0
data/test/data/regression/vcf2json_use_meta.new +4697 -0
data/test/data/regression/vcf2json_use_meta.ref +4697 -0
data/test/performance/metrics.md +18 -1
data/test/stress/stress_test.sh +15 -0
data/test/tmp/test.vcf +12469 -0
metadata +63 -64
data/Gemfile.lock +0 -81

data/RELEASE_NOTES.md ADDED

@@ -0,0 +1,27 @@
+## ChangeLog v0.9.5 (20210118)
++ Improved README and installation instructions
++ Added guix.scm build and instructions (no need for bundler)
++ Moved regressiontest into tree
+## ChangeLog v0.9.4 (20201222)
+This is an important maintenance release of bio-vcf:
++ Rename bioruby-vcf to bio-vcf and migrate project to [vcflib](https://github.com/vcflib/bio-vcf)
++ Fixed tests to match recent Ruby updates
+## Older release notes
++ Getting ready for a 1.0 release
++ Released 0.9.2 as a gem
++ 0.9.1 removed a rare threading bug and cleanup on error
++ Added support for soft filters (request by Brad Chapman)
++ The outputter now writes (properly) in parallel with the parser
++ bio-vcf turns any VCF into JSON with header information, and
+  allows you to pipe that JSON directly into any JSON supporting
+  language, including Python and Javascript!
+## Older changes
+For older changes view the git [log](https://github.com/vcflib/bio-vcf/commits/master).

data/RELEASE_NOTES.md~ ADDED

@@ -0,0 +1,11 @@
+## RELEASE NOTES
+* Getting ready for a 1.0 release
+* Released 0.9.2 as a gem
+* 0.9.1 removed a rare threading bug and cleanup on error
+* Added support for soft filters (request by Brad Chapman)
+* The outputter now writes (properly) in parallel with the parser
+* bio-vcf turns any VCF into JSON with header information, and
+  allows you to pipe that JSON directly into any JSON supporting
+  language, including Python and Javascript!

data/Rakefile CHANGED

@@ -1,54 +1,21 @@
 # encoding: utf-8
-require 'rubygems'
-require 'bundler'
-begin
-  Bundler.setup(:default, :development)
-rescue Bundler::BundlerError => e
-  $stderr.puts e.message
-  $stderr.puts "Run `bundle install` to install missing gems"
-  exit e.status_code
-end
+# require 'rubygems'
 require 'rake'
+# require 'cucumber/rake/task'
-require 'jeweler'
-Jeweler::Tasks.new do |gem|
-  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
-  gem.name = "bio-vcf"
-  gem.homepage = "http://github.com/pjotrp/bioruby-vcf"
-  gem.license = "MIT"
-  gem.summary = %Q{Fast multi-threaded VCF parser}
-  gem.description = %Q{Smart lazy multi-threaded parser for VCF format with useful filtering and output rewriting}
-  gem.email = "pjotr.public01@thebird.nl"
-  gem.authors = ["Pjotr Prins"]
-  gem.required_ruby_version = '>=2.0.0'
-  # dependencies defined in Gemfile
-end
-Jeweler::RubygemsDotOrgTasks.new
-# require 'rspec/core'
-# require 'rspec/core/rake_task'
-# RSpec::Core::RakeTask.new(:spec) do |spec|
-#   spec.pattern = FileList['spec/**/*_spec.rb']
-# end
-# RSpec::Core::RakeTask.new(:rcov) do |spec|
-#   spec.pattern = 'spec/**/*_spec.rb'
-#   spec.rcov = true
+# Cucumber::Rake::Task.new(:features) do |t|
+  # t.cucumber_opts = "--bundler false"
 # end
-# require 'rake/testtask'
-# Rake::TestTask.new do |t|
-#   t.pattern = "spec/*_spec.rb"
-# end
-require 'cucumber/rake/task'
-Cucumber::Rake::Task.new(:features)
+desc 'Run cucumber' # without bundler
+task :features do
+  sh 'cucumber features'
+end
 task :default => :features
-task :test => [ :features ]
+task :test => [ :features ]
 require 'rdoc/task'
 Rake::RDocTask.new do |rdoc|

data/TAGS ADDED

@@ -0,0 +1,115 @@
+./bin/bio-vcf,0
+./lib/bio-vcf.rb,0
+./lib/bio-vcf/vcfgenotypefield.rb,1553
+module BioVcf::BioVcf1,0
+  class VcfNucleotides::BioVcf::VcfNucleotides7,167
+    def initialize::BioVcf::VcfNucleotides#BioVcf::VcfNucleotides.new8,193
+    def []::BioVcf::VcfNucleotides#[]13,284
+    def to_ary::BioVcf::VcfNucleotides#to_ary27,628
+    def max::BioVcf::VcfNucleotides#max32,742
+    def min::BioVcf::VcfNucleotides#min37,856
+    def sum::BioVcf::VcfNucleotides#sum42,975
+  class VcfAltInfo::BioVcf::VcfAltInfo50,1082
+    def initialize::BioVcf::VcfAltInfo#BioVcf::VcfAltInfo.new51,1103
+    def []::BioVcf::VcfAltInfo#[]56,1194
+    def to_ary::BioVcf::VcfAltInfo#to_ary70,1512
+    def max::BioVcf::VcfAltInfo#max75,1626
+    def min::BioVcf::VcfAltInfo#min79,1702
+    def sum::BioVcf::VcfAltInfo#sum83,1783
+  class VcfGenotypeField::BioVcf::VcfGenotypeField88,1850
+    def initialize::BioVcf::VcfGenotypeField#BioVcf::VcfGenotypeField.new89,1877
+    def dp4::BioVcf::VcfGenotypeField#dp496,2020
+    def ad::BioVcf::VcfGenotypeField#ad100,2098
+    def pl::BioVcf::VcfGenotypeField#pl104,2174
+    def bcount::BioVcf::VcfGenotypeField#bcount108,2250
+    def bq::BioVcf::VcfGenotypeField#bq112,2343
+    def amq::BioVcf::VcfGenotypeField#amq116,2424
+    def method_missing::BioVcf::VcfGenotypeField#method_missing120,2507
+  class VcfGenotypeFields::BioVcf::VcfGenotypeFields130,2709
+    def initialize::BioVcf::VcfGenotypeFields#BioVcf::VcfGenotypeFields.new131,2737
+    def []::BioVcf::VcfGenotypeFields#[]141,3021
+    def method_missing::BioVcf::VcfGenotypeFields#method_missing145,3136
+./lib/bio-vcf/vcfrdf.rb,156
+module BioVcf::BioVcf1,0
+  module VcfRdf::BioVcf::VcfRdf5,93
+    def VcfRdf::BioVcf::VcfRdf#VcfRdf7,112
+    def VcfRdf::BioVcf::VcfRdf#VcfRdf18,463
+./lib/bio-vcf/vcf.rb,27
+module BioVcf::BioVcf2,1
+./lib/bio-vcf/vcfline.rb,118
+module BioVcf::BioVcf1,0
+  module VcfLine::BioVcf::VcfLine2,16
+    def VcfLine.parse::BioVcf::VcfLine.parse5,82
+./lib/bio-vcf/vcfrecord.rb,1831
+module BioVcf::BioVcf1,0
+  class VcfRecordInfo::BioVcf::VcfRecordInfo3,17
+    def initialize::BioVcf::VcfRecordInfo#BioVcf::VcfRecordInfo.new4,41
+    def method_missing::BioVcf::VcfRecordInfo#method_missing9,163
+  module VcfRecordParser::BioVcf::VcfRecordParser18,329
+    def VcfRecordParser.get_format::BioVcf::VcfRecordParser.get_format20,397
+    def VcfRecordParser.get_info::BioVcf::VcfRecordParser.get_info25,517
+  module VcfRecordCall::BioVcf::VcfRecordCall30,592
+    def call_diff::BioVcf::VcfRecordCall#call_diff31,617
+    def call_nuc::BioVcf::VcfRecordCall#call_nuc35,705
+    def call_tumor_count::BioVcf::VcfRecordCall#call_tumor_count39,764
+    def call_tumor_relative_count::BioVcf::VcfRecordCall#call_tumor_relative_count43,833
+    def call_normal_count::BioVcf::VcfRecordCall#call_normal_count47,955
+    def index::BioVcf::VcfRecordCall#index51,1026
+  class VcfRecord::BioVcf::VcfRecord56,1125
+    attr_reader :header::BioVcf::VcfRecord#header60,1173
+    def initialize::BioVcf::VcfRecord#BioVcf::VcfRecord.new62,1198
+    def chrom::BioVcf::VcfRecord#chrom67,1292
+    def pos::BioVcf::VcfRecord#pos71,1332
+    def ids::BioVcf::VcfRecord#ids75,1384
+    def id::BioVcf::VcfRecord#id79,1443
+    def ref::BioVcf::VcfRecord#ref83,1476
+    def alt::BioVcf::VcfRecord#alt87,1524
+    def qual::BioVcf::VcfRecord#qual91,1582
+    def info::BioVcf::VcfRecord#info95,1636
+    def format::BioVcf::VcfRecord#format99,1711
+    def normal::BioVcf::VcfRecord#normal104,1848
+    def tumor::BioVcf::VcfRecord#tumor109,1997
+    def sample::BioVcf::VcfRecord#sample114,2134
+    def sample_by_name::BioVcf::VcfRecord#sample_by_name118,2227
+    def missing_samples?::BioVcf::VcfRecord#missing_samples?122,2283
+    def method_missing::BioVcf::VcfRecord#method_missing126,2341
+./lib/bio-vcf/variant.rb,470
+module BioVcf::BioVcf1,0
+  module Variant::BioVcf::Variant3,17
+    def Variant.diff::BioVcf::Variant.diff5,37
+    def Variant.threshold_diff::BioVcf::Variant.threshold_diff9,132
+    def Variant.relative_diff::BioVcf::Variant.relative_diff14,269
+    def Variant.relative_threshold_diff::BioVcf::Variant.relative_threshold_diff20,497
+    def Variant.index::BioVcf::Variant.index25,652
+    def Variant.apply_threshold::BioVcf::Variant.apply_threshold31,809
+./lib/bio-vcf/vcfheader.rb,598
+module BioVcf::BioVcf2,1
+  module VcfHeaderParser::BioVcf::VcfHeaderParser4,18
+    def VcfHeaderParser.get_column_names::BioVcf::VcfHeaderParser.get_column_names5,45
+  class VcfHeader::BioVcf::VcfHeader18,339
+    attr_reader :lines::BioVcf::VcfHeader#lines20,360
+    def initialize::BioVcf::VcfHeader#BioVcf::VcfHeader.new22,384
+    def add::BioVcf::VcfHeader#add26,430
+    def version::BioVcf::VcfHeader#version30,483
+    def column_names::BioVcf::VcfHeader#column_names34,578
+    def columns::BioVcf::VcfHeader#columns38,674
+    def samples::BioVcf::VcfHeader#samples42,735
+./features/step_definitions/diff_count.rb,0
+./features/step_definitions/bio-vcf_steps.rb,0
+./features/step_definitions/somaticsniper.rb,0
+./features/step_definitions/multisample.rb,0
+./features/support/env.rb,0

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.8.1
1	+ 0.9.5

data/bin/bio-vcf CHANGED

@@ -4,7 +4,7 @@
 # Author:: Pjotr Prins
 # License:: MIT
 #
-# Copyright (C) 2014 Pjotr Prins <pjotr.prins@thebird.nl>
+# Copyright (C) 2014-2020 Pjotr Prins <pjotr.prins@thebird.nl>
 USAGE = "Vcf parser"
@@ -15,22 +15,23 @@ VERSION_FILENAME=File.join(gempath,'VERSION')
 version = File.new(VERSION_FILENAME).read.chomp
 require 'bio-vcf'
+require 'bio-vcf/pcows'
 require 'optparse'
 require 'timeout'
 require 'fileutils'
-# Uncomment when using the bio-logger
+# Uncomment when using the bio-logger
 # require 'bio-logger'
 # log = Bio::Log::LoggerPlus.new 'vcf'
-# log.outputters = Bio::Log::Outputter.stderr
+# log.outputters = Bio::Log::Outputter.stderr
 # Bio::Log::CLI.logger('stderr')
 # Bio::Log::CLI.trace('info')
-options = { show_help: false, source: 'https://github.com/CuppenResearch/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000 }
+options = { show_help: false, source: 'https://github.com/pjotrp/bioruby-vcf', version: version+' (Pjotr Prins)', date: Time.now.to_s, thread_lines: 40_000, timeout: 180 }
 opts = OptionParser.new do |o|
   o.banner = "Usage: #{File.basename($0)} [options] filename\ne.g.  #{File.basename($0)} < test/data/input/somaticsniper.vcf"
-  o.on('-i','--ignore-missing', 'Ignore missing data') do
+  o.on('-i','--ignore-missing', 'Ignore missing data') do
     options[:ignore_missing] = true
   end
   o.on('--filter cmd',String, 'Evaluate filter on each record') do |cmd|
@@ -57,6 +58,9 @@ opts = OptionParser.new do |o|
   o.on("--efilter-samples list", Array, "Exclude set - overrides exclude set") do |l|
     options[:efilter_samples] = l
   end
+  o.on('--add-filter name',String, 'Set/add filter field to name') do |name|
+    options[:add_filter] = name
+  end
   o.on("--bed bedfile", String, "Filter on BED elements") do |bed|
     options[:bed] = bed
@@ -68,6 +72,9 @@ opts = OptionParser.new do |o|
   o.on('--eval-once cmd',String, 'Evaluate command once (usually for header info)') do |cmd|
     options[:eval_once] = true
     options[:eval] = cmd
+    # options[:num_threads] = 1
+    # options[:thread_lines] = 1
+    options[:skip_header] = true
   end
   o.on('--seval cmd',String, 'Evaluate command on each sample') do |cmd|
     options[:seval] = cmd
@@ -84,7 +91,7 @@ opts = OptionParser.new do |o|
     options[:rdf] = true
     options[:skip_header] = true
   end
-  o.on("--num-threads [num]", Integer, "Multi-core version (default #{options[:num_threads]})") do |i|
+  o.on("--num-threads [num]", Integer, "Multi-core version (default ALL)") do |i|
     options[:num_threads] = i
   end
   o.on("--thread-lines num", Integer, "Fork thread on num lines (default #{options[:thread_lines]})") do |i|
@@ -96,8 +103,8 @@ opts = OptionParser.new do |o|
   o.on_tail("--tags list", String, "Add tags") do |s|
     options[:tags] = s
   end
-  o.on("--skip-header", "Do not output VCF header info") do
+  o.on("--skip-header", "Do not output VCF header info") do
     options[:skip_header] = true
   end
@@ -112,9 +119,16 @@ opts = OptionParser.new do |o|
     options[:template] = s
     options[:skip_header] = true
   end
-  # Uncomment the following when using the bio-logger
+  o.on("--add-header-tag", "Add bio-vcf status tag to header output") do |t|
+    options[:tag] = true
+  end
+  o.on("--timeout [num]", Integer, "Timeout waiting for thread to complete (default #{options[:timeout]})") do |i|
+    options[:timeout] = i
+  end
+  # Uncomment the following when using the bio-logger
   # o.separator ""
   # o.on("--logger filename",String,"Log to file (default stderr)") do | name |
   #   Bio::Log::CLI.logger(name)
@@ -123,7 +137,16 @@ opts = OptionParser.new do |o|
   # o.on("--trace options",String,"Set log level (default INFO, see bio-logger)") do | s |
   #   Bio::Log::CLI.trace(s)
   # end
-  #
+  #
+  o.on("--names", "Output sample names") do |q|
+    options[:quiet] = true
+    options[:num_threads] = nil
+    options[:eval_once] = true
+    options[:eval] = 'header.samples.join("\t")'
+    # options[:num_threads] = 1
+    # options[:thread_lines] = 1
+    options[:skip_header] = true
+  end
   o.on("--statistics", "Output statistics") do |q|
     options[:statistics] = true
     options[:num_threads] = nil
@@ -132,14 +155,15 @@ opts = OptionParser.new do |o|
     # Bio::Log::CLI.trace('error')
     options[:quiet] = true
   end
   o.on("-v", "--verbose", "Run verbosely") do |v|
     options[:verbose] = true
   end
-  # o.on("--debug", "Show debug messages") do |v|
-  #   Bio::Log::CLI.trace('debug')
-  # end
+  o.on("--debug", "Show debug messages and keep intermediate output") do |v|
+    # Bio::Log::CLI.trace('debug')
+    options[:debug] = true
+  end
   o.separator ""
   o.on_tail('-h', '--help', 'display this help and exit') do
@@ -150,11 +174,11 @@ end
 opts.parse!(ARGV)
 BIOVCF_VERSION=version
-BIOVCF_BANNER = "vcf #{version} (biogem Ruby #{RUBY_VERSION}) by Pjotr Prins 2014\n" if !options[:quiet]
-$stderr.print BIOVCF_BANNER
+BIOVCF_BANNER = "bio-vcf #{version} (biogem Ruby #{RUBY_VERSION} with pcows) by Pjotr Prins 2015-2020\n"
+$stderr.print BIOVCF_BANNER if !options[:quiet]
-if options[:show_help]
-  print opts
+if options[:show_help]
+  print opts
   print USAGE
   exit 1
 end
@@ -174,15 +198,6 @@ if options[:template]
   template = Bio::Template.new(fn)
 end
-if options[:num_threads] != 1
-  begin
-    require 'parallel'
-  rescue LoadError
-    $stderr.print "Error: Missing 'parallel' module. Install with command 'gem install parallel' if you want multiple threads\n"
-    options[:num_threads] = 1
-  end
-end
 stats = nil
 if options[:statistics]
   options[:num_threads] = nil
@@ -193,6 +208,8 @@ end
 raise "Missing option --ifilter" if options[:ifilter_samples] and not options[:ifilter]
 raise "Missing option --efilter" if options[:efilter_samples] and not options[:efilter]
 raise "Missing option --sfilter" if options[:sfilter_samples] and not options[:sfilter]
+# raise "Soft filter not supported with --ifilter" if options[:add_filter] and options[:ifilter]
+# raise "Soft filter not supported with --efilter" if options[:add_filter] and options[:efilter]
 if options[:samples]
   samples = options[:samples].map { |s| s.to_i }
@@ -200,13 +217,14 @@ end
 include BioVcf
-# Parse the header section of a VCF file
+# Parse the header section of a VCF file (chomping STDIN)
 def parse_header line, samples, options
-  header = VcfHeader.new
+  header = VcfHeader.new(options[:debug])
   header.add(line)
   print line if not options[:skip_header]
   STDIN.each_line do | headerline |
     if headerline !~ /^#/
+      # If no records in VCF, we never get here
       line = headerline
       break # end of header
     end
@@ -214,12 +232,19 @@ def parse_header line, samples, options
     if not options[:skip_header]
       if headerline =~ /^#CHR/
         # The header before actual data contains the sample names, first inject the BioVcf meta information
-        print header.tag(options),"\n" if not options[:skip_header]
+        print header.tag(options),"\n" if options[:tag] and not options[:skip_header]
+        # Then the additional filter(s)
+        # ##FILTER=<ID=LowQual,Description="Low quality">
+        add_filter = options[:add_filter]
+        if add_filter
+          print "##FILTER=<ID=",add_filter,",Description=\"",options[:filter],"\">\n"
+        end
         selected = header.column_names
         if samples
           newfields = selected[0..8]
           samples.each do |s|
-            newfields << selected[s+9]
+            newfields << selected[s+9]
           end
           selected = newfields
         end
@@ -231,10 +256,14 @@ def parse_header line, samples, options
   end
   print header.printable_header_line(options[:set_header]),"\n" if options[:set_header]
   VcfRdf::header if options[:rdf]
+  if line =~ /^#/
+    # We did not read a record
+    line = nil
+  end
   return header,line
 end
-# Parse a VCF line and return the result as a string
+# Parse a VCF line and return the (template) result as a string buffer
 def parse_line line,header,options,bedfilter,samples,template,stats=nil
   fields = VcfLine.parse(line)
   rec = VcfRecord.new(fields,header)
@@ -244,9 +273,11 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
   sfilter = options[:sfilter]
   efilter = options[:efilter]
   ifilter = options[:ifilter]
+  add_filter = options[:add_filter] # contains a filter name (soft filter)
   seval = options[:seval]
   ignore_missing = options[:ignore_missing]
   quiet = options[:quiet]
+  set_filter_field = nil
   if sfilter or efilter or ifilter or seval
     # check for samples
@@ -261,15 +292,27 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
     return if not bed
   end
-  return if filter and not rec.filter(filter,ignore_missing_data: ignore_missing,quiet: quiet)
-  if sfilter
+  skip = lambda { |&m|
+    matched = m.call
+    if add_filter
+      set_filter_field = true if matched
+      false  # always continue processing with an add-filter
+    else
+      not matched
+    end
+  }
+  if filter
+    return if skip.call { rec.gfilter(filter,ignore_missing_data: ignore_missing,quiet: quiet) }
+  end
+  if sfilter # sample 'or' filter
     rec.each_sample(options[:sfilter_samples]) do | sample |
-      return if not sample.sfilter(sfilter,ignore_missing_data: ignore_missing,quiet: quiet)
+      return if skip.call { sample.sfilter(sfilter,ignore_missing_data: ignore_missing,quiet: quiet) }
     end
   end
-  if ifilter
+  if ifilter # include sample filter
     found = false
     rec.each_sample(options[:ifilter_samples]) do | sample |
       if sample.ifilter(ifilter,ignore_missing_data: ignore_missing,quiet: quiet)
@@ -278,12 +321,12 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
       end
     end
     # Skip if there are no matches
-    return if not found
+    return if skip.call {found}
   end
-  if efilter
+  if efilter # exclude sample filter
     rec.each_sample(options[:efilter_samples]) do | sample |
-      return if not sample.efilter(efilter,ignore_missing_data: ignore_missing,quiet: quiet)
+      return if skip.call{ sample.efilter(efilter,ignore_missing_data: ignore_missing,quiet: quiet) }
     end
   end
@@ -291,18 +334,21 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
   # -----------------------------
   # From here on decide on output
+  rec.add_to_filter_field(add_filter) if set_filter_field
   if samples
     # Select certain samples for output
     newfields = fields[0..8]
     samples.each do |s|
-      newfields << fields[s+9]
+      newfields << fields[s+9]
     end
     fields = newfields
   end
   if options[:eval] or seval
     begin
       results = nil # result string
-      if options[:eval]
+      if options[:eval]
         res = rec.eval(options[:eval],ignore_missing_data: ignore_missing,quiet: quiet)
         results = res if res
       end
@@ -320,23 +366,22 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
       exit 1
     end
     return results.to_s+"\n" if results
-    exit(1) if options[:eval_once]  # <--- can this be reached?
   else
     if options[:rdf]
       # Output Turtle RDF
       VcfRdf::record(options[:id],rec,options[:tags])
     elsif options[:template]
-      # Ruby ERB template
+      # Use ERB template
       begin
         template.body(binding)
       rescue Exception => e
         $stderr.print e,": ",fields,"\n"
         $stderr.print e.backtrace.inspect if options[:verbose]
-        raise
+        raise
       end
     elsif options[:rewrite]
       # Default behaviour prints VCF line, but rewrite info
-      eval(options[:rewrite])
+      eval(options[:rewrite])
       (fields[0..6]+[rec.info.to_s]+fields[8..-1]).join("\t")+"\n"
     elsif stats
       # do nothing
@@ -347,20 +392,21 @@ def parse_line line,header,options,bedfilter,samples,template,stats=nil
   end
 end
+CHUNK_SIZE = options[:thread_lines]
+pcows = PCOWS.new(options[:num_threads],CHUNK_SIZE,'bio-vcf',options[:timeout],
+                  options[:quiet],options[:debug])
 header = nil
 header_output_completed = false
-NUM_THREADS = options[:num_threads]
-CHUNK_SIZE = options[:thread_lines]
-CHUNK_NUM = (NUM_THREADS && NUM_THREADS>6 ? NUM_THREADS*4 : 24)
-chunks = []
-lines = []
+chunk_lines = []
 line_number=0
 if options[:bed]
   bedfilter = BedFilter.new(options[:bed])
-end
+end
 begin
+  # Define linear parser function (going through one chunk)
   process = lambda { | lines |
     res = []
     lines.each do | line |
@@ -368,73 +414,75 @@ begin
     end
     res
   }
-  output = lambda { |collection|
-    collection.each do | result |
-      result.each { |line| print line }
-    end
-  } # end output
-  print template.header(binding) if template
   # ---- Main loop
   STDIN.each_line do | line |
     line_number += 1
-    # ---- In this section header information is handled
+    # ---- Skip embedded headers down the line...
     next if header_output_completed and line =~ /^#/
-    if line =~ /^##fileformat=/ or line =~ /^#CHR/
-      header,line = parse_header(line,samples,options)
-    end
-    next if line =~ /^##/ # empty file
-    header_output_completed = true
-    if not options[:efilter_samples] and options[:ifilter_samples]
-      # Create exclude set as a complement of include set
-      options[:efilter_samples] = header.column_names[9..-1].fill{|i|i.to_s}-options[:ifilter_samples]
-    end
-    # ---- In this section the VCF variant lines are parsed
-    lines << line
-    if NUM_THREADS == 1
-      $stderr.print '.' if line_number % CHUNK_SIZE == 0 and not options[:quiet]
-      if lines.size > CHUNK_SIZE
-        process.call(lines).each { | l | print l }
-        lines = []
+    # ---- In the following section header information is handled -
+    #      this only happens once.
+    # ---- Parse the header lines (chomps from STDIN)
+    #      and returns header info and the current line
+    if line =~ /^#/
+      header, line = parse_header(line,samples,options)
+      if line.nil?
+        # No line after header, to there are no records to process
+        break
       end
-    else
-      if lines.size > CHUNK_SIZE
-        chunks << lines
-        if chunks.size > CHUNK_NUM
-          $stderr.print '.' if not options[:quiet]
-          out = Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
-            process.call(chunk)
-          }
-          chunks = []
-          # Output is forked to a separate process too
-          fork do
-            output.call out
-            STDOUT.flush
-            STDOUT.close
-            exit 0
-          end
-        end
-        lines = []
+    end
+    # p [line_number,line]
+    # ---- After the header continue processing
+    if not header_output_completed
+      # one-time post-header processing
+      if not options[:efilter_samples] and options[:ifilter_samples]
+        # Create exclude set as a complement of include set
+        options[:efilter_samples] = header.column_names[9..-1].fill{|i|i.to_s}-options[:ifilter_samples]
       end
+      print template.header(binding) if template
+      header_output_completed = true
+    end
+    if options[:eval_once]
+      # this happens if we only want one line evaluated - say to get
+      # the number of samples
+      print parse_line(line,header,options,bedfilter,samples,template,stats)
+      exit 0
+    end
+    # ---- Lines are collected in one buffer and the lines buffer
+    #      is added to the chunks list (for the threads)
+    chunk_lines << line
+    # ---- In the following section the VCF lines are parsed by chunks
+    #      The chunks may go into different threads
+    if chunk_lines.size >= CHUNK_SIZE
+      # ---- process one chunk
+      $stderr.print '.' if not options[:quiet]
+      pcows.wait_for_worker_slot()
+      pcows.submit_worker(process,chunk_lines)
+      pcows.process_output()
+      chunk_lines = []
     end
   end
-  $stderr.print '.' if not options[:quiet]
-  if NUM_THREADS == 1
-    process.call(lines).each { |l| print l}
-  else
-    chunks << lines
-    output.call Parallel.map(chunks, :in_processes => NUM_THREADS) { | chunk |
-      process.call(chunk)
-    }
-  end
+  pcows.submit_final_worker(process,chunk_lines)
+  pcows.wait_for_workers()
+  pcows.process_remaining_output()
   print template.footer(binding) if template
   stats.print if stats
 rescue Exception => e
-  # $stderr.print line
-  $stderr.print e.message,"\n"
+  if e.message != 'exit'
+    $stderr.print "ERROR: "
+    $stderr.print e.message,"\n"
+  end
+  pcows.cleanup()
   raise if options[:verbose]
   exit 1
 end