seqtrimnext 2.0.29

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
@@ -0,0 +1,74 @@
1
+ # $: << '/Users/dariogf/progs/ruby/gems/scbi_plot/lib'
2
+
3
+ require 'scbi_plot'
4
+ # require 'gnu_plot_graph'
5
+
6
+ class GraphStats
7
+
8
+ def initialize(stats,initial_stats=nil)
9
+ #load stats
10
+ init_stats=initial_stats
11
+
12
+ if init_stats.nil?
13
+ r=File.read(File.join(OUTPUT_PATH,'initial_stats.json'))
14
+ init_stats= JSON::parse(r)
15
+ end
16
+ # puts init_stats.to_json
17
+ #r=File.read(File.join(File.dirname(__FILE__),'stats.json'))
18
+ if !File.exists?('graphs')
19
+ Dir.mkdir('graphs')
20
+ end
21
+ @stats=stats
22
+
23
+ @stats.each do |plugin_name,plugin_value|
24
+ # get plugin class
25
+ begin
26
+ plugin_class = Object.const_get(plugin_name)
27
+ rescue Exception => e
28
+ # puts "RESCUE",e.message,e.backtrace
29
+ plugin_class = Plugin
30
+ end
31
+
32
+
33
+ plugin_value.keys.each do |stats_name|
34
+ puts "Plotting #{stats_name} from #{plugin_name}"
35
+ # if graph is not ignored
36
+ if !plugin_class.graph_ignored?(stats_name)
37
+
38
+ x=[]
39
+ y=[]
40
+
41
+ # get filename
42
+ file_name=File.join('graphs',plugin_class.get_graph_filename(plugin_name,stats_name)+'.png')
43
+
44
+ # create new graph object
45
+ plot=ScbiPlot::Histogram.new(file_name,plugin_class.get_graph_title(plugin_name,stats_name))
46
+
47
+ plugin_class.auto_setup(plugin_value[stats_name],stats_name,x,y)
48
+
49
+ # puts plugin_class.name.to_s
50
+ # plot_setup returns true if it has already handled the setup of the plot, if not, handle here
51
+ if !plugin_class.plot_setup(plugin_value[stats_name],stats_name,x,y,init_stats,plot)
52
+ if !x.empty? && !y.empty? && (x.length==y.length)
53
+
54
+ plot.x_label= "Length"
55
+ plot.y_label= "Count"
56
+
57
+ plot.add_x(x)
58
+ plot.add_y(y)
59
+
60
+ plot.do_graph
61
+ end
62
+
63
+ end
64
+
65
+ # if !x.empty? && !y.empty? && (x.length==y.length)
66
+ #
67
+ # end
68
+ end
69
+ end
70
+ end
71
+
72
+ end
73
+
74
+ end
@@ -0,0 +1,43 @@
1
+ require 'open-uri'
2
+
3
+ class InstallDatabase
4
+
5
+
6
+ def initialize(type,db_path)
7
+
8
+
9
+ types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome']
10
+
11
+ if types.include?(type)
12
+
13
+ if !File.exists?(db_path)
14
+ FileUtils.mkdir_p(db_path)
15
+ end
16
+
17
+ remote_db_url="http://www.scbi.uma.es/downloads/#{type}_db.zip"
18
+ local_path=File.join(db_path,'core_db.zip')
19
+ puts "Install databases: #{type}"
20
+
21
+ download_and_unzip(remote_db_url,local_path)
22
+
23
+ else
24
+ puts "Unknown database #{type}"
25
+ puts "Available databases:"
26
+ puts types.join("\n")
27
+ end
28
+ end
29
+
30
+ def download_and_unzip(from_url,to_file)
31
+ puts "Downloading databases from #{from_url} to #{to_file}"
32
+
33
+ open(to_file, "w+") { |f| f.write(open(from_url).read)}
34
+
35
+ puts "Unzipping #{to_file}"
36
+
37
+ # unzip and remove
38
+ # `cd #{File.dirname(to_file)};unzip #{to_file}; rm #{to_file}`
39
+ `cd #{File.dirname(to_file)};unzip #{to_file}; rm #{to_file}`
40
+
41
+ end
42
+
43
+ end
@@ -0,0 +1,123 @@
1
+ #########################################
2
+ # Author:: Almudena Bocinos Rioboo
3
+ # This class provided the methods to check if the necesary software is installed in the user system
4
+ #########################################
5
+
6
+ class InstallRequirements
7
+
8
+
9
+ def initialize
10
+ @external_requirements = {}
11
+ @ruby_requirements = {}
12
+ load_requirements
13
+
14
+
15
+ end
16
+
17
+ def check_install_requirements
18
+ res = true
19
+
20
+ errors = check_system_requirements
21
+
22
+ if !errors.empty?
23
+
24
+ $stderr.puts ' Unable to find these external requeriments:'
25
+ errors.each do |error|
26
+ $stderr.puts ' -' + error
27
+ res = false
28
+ end #end each
29
+
30
+ end #end if
31
+
32
+ errors = check_ruby_requirements
33
+ if !errors.empty?
34
+ $stderr.puts ' Unable to find these Ruby requeriments:'
35
+ errors.each do |error|
36
+ $stderr.puts ' -' + error
37
+ res = false
38
+ end #end each
39
+ end #end if
40
+
41
+ return res
42
+ end
43
+
44
+
45
+
46
+ private
47
+
48
+ def check_system_requirements
49
+
50
+ errors=[]
51
+ @external_requirements.each do |cmd,msg|
52
+ if !system("which #{cmd} > /dev/null ")
53
+ errors.push "It's necessary to install #{cmd}. " + msg
54
+ end
55
+ end
56
+
57
+ return errors
58
+ end
59
+
60
+ def check_ruby_requirements(install=true)
61
+ errors=[]
62
+
63
+
64
+
65
+ @ruby_requirements.each do |cmd,msg|
66
+ if !system("gem list #{cmd} | grep #{cmd} > /dev/null")
67
+ if install
68
+ puts "Are you sure you wan't to install #{cmd} gem? ([Y/n]):"
69
+ res=stdin.readline
70
+ if res.chomp.upcase!='N'
71
+ system("echo gem install #{cmd}")
72
+ end
73
+ else
74
+ errors.push "It's necessary to install #{cmd}. Issue a: gem install #{cmd} " + msg
75
+ end
76
+ end
77
+ end
78
+
79
+ return errors
80
+ end
81
+
82
+
83
+
84
+ # seqtrim's requirements are specified here
85
+ def load_requirements
86
+
87
+ @external_requirements['blastn']= "You need to install Blast+ 2.2.24 or greater and make sure it is available in your path (export PATH=$PATH:path_to_blast).\nYou can download it from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/"
88
+ @external_requirements['cd-hit-454']= "Download from http://code.google.com/p/cdhit/downloads/list"
89
+ @external_requirements['gnuplot']= "Download from http://www.gnuplot.info/download.html"
90
+
91
+ # @external_requirements['pepe']= ""
92
+
93
+
94
+ # @ruby_requirements = { 'n2array' => "" ,
95
+ @ruby_requirements['narray'] = ''
96
+ @ruby_requirements['gnuplot'] = ''
97
+ @ruby_requirements['term-ansicolor'] = ''
98
+ @ruby_requirements['xml-simple'] = ''
99
+ @ruby_requirements['scbi_blast'] = ''
100
+ @ruby_requirements['scbi_drb'] = ''
101
+ @ruby_requirements['scbi_fasta'] = ''
102
+ @ruby_requirements['scbi_fastq'] = ''
103
+ @ruby_requirements['scbi_plot'] = ''
104
+ @ruby_requirements['scbi_math'] = ''
105
+ # @ruby_requirements['scbi_fastq2'] = ''
106
+
107
+ end # end def
108
+
109
+ def install
110
+
111
+ # gem install gnuplot
112
+ # gem install narray
113
+ # gem install scbi_blast
114
+ # gem install scbi_drb
115
+ # gem install scbi_fasta
116
+ # gem install scbi_fastq
117
+ # gem install term-ansicolor
118
+ # gem install xml-simple
119
+
120
+
121
+ end
122
+
123
+ end
@@ -0,0 +1,49 @@
1
+
2
+ #List all entries in a DB, by name
3
+ #
4
+ #list all DB names if db is ALL
5
+
6
+ class ListDb
7
+
8
+ def initialize(path,db)
9
+
10
+ filename=File.join(path,'formatted',db)
11
+ if File.exists?(filename)
12
+
13
+ f = File.open(filename)
14
+
15
+ f.grep(/^>(.*)$/) do |line|
16
+ puts $1
17
+ end
18
+ f.close
19
+ else
20
+ puts "File #{filename} doesn't exists"
21
+ puts ''
22
+ puts "Available databases:"
23
+ puts '-'*20
24
+ d=Dir.glob(File.join(path,'formatted','*.fasta'))
25
+ d.entries.map{|e| puts File.basename(e)}
26
+
27
+
28
+ # cmd= "grep '^>' #{File.join(path,'formatted',db+'.fasta')}"
29
+
30
+ # system(cmd)
31
+ end
32
+
33
+ end
34
+
35
+ def self.list_databases(path)
36
+ res = []
37
+
38
+ if File.exists?(path)
39
+ d=Dir.glob(File.join(path,'formatted','*.fasta'))
40
+
41
+ res = d.entries.map{|e| File.basename(e)}
42
+ end
43
+ return res
44
+
45
+
46
+ end
47
+
48
+
49
+ end
@@ -0,0 +1,113 @@
1
+
2
+ class MakeBlastDb
3
+
4
+ def initialize(dir)
5
+
6
+ @db_folder = dir
7
+ @status_folder = File.join(@db_folder,'status_info')
8
+ @formatted_folder = File.join(@db_folder,'formatted')
9
+
10
+ update_dbs
11
+ end
12
+
13
+ def catFasta(path_start,path_end)
14
+ $LOG.debug("Cat of #{path_start}")
15
+
16
+ # system("cat #{path_start} > #{path_end}")
17
+ system("cat /dev/null > #{path_end}")
18
+
19
+ system("for i in `find #{path_start} -type f ! -name '.*'`; do echo cat of $i; cat $i >> #{path_end}; echo \"\n\" >> #{path_end}; done")
20
+
21
+ end
22
+
23
+ def dirEmpty?(path_db)
24
+
25
+ folder2=Dir.open("#{path_db}")
26
+
27
+ ignore = ['.','..','.DS_Store']
28
+
29
+ res = folder2.entries - ignore
30
+
31
+ return res.empty?
32
+ end
33
+
34
+ def merge_db_files(path_db, db_name, formatted_folder)
35
+ if !dirEmpty?(path_db)
36
+ #hay que hacer el cat solo cuando cambian los ficheros que hay en subfolder1
37
+ formatted_file = File.join(formatted_folder, db_name+'.fasta')
38
+ catFasta(File.join(path_db),formatted_file)
39
+ end
40
+ end
41
+
42
+ def self.format_db(path_db, db_name, formatted_folder)
43
+
44
+ #hay que hacer el cat solo cuando cambian los ficheros que hay en subfolder1
45
+ formatted_file = File.join(formatted_folder, db_name+'.fasta')
46
+ cmd = "makeblastdb -in #{formatted_file} -parse_seqids -dbtype nucl >> logs/formatdb.log"
47
+ system(cmd)
48
+ $LOG.info(cmd)
49
+
50
+ end
51
+
52
+ #---------------------------------------------------------------------------------------------------
53
+ # Check if files for DataBase have been updated, and only when that has happened, makeblastdb will run
54
+ # Consideres the next directories structure:
55
+ #
56
+ # @dir is the main directory
57
+ # @dir/folder0 is the directoy where will be storaged the DB created/updated
58
+ # @dir/folder0/subfolder1 is where are storaged all the fasta files of the type subfolder1
59
+ # @dir/update is where register the log for each subfolder1, to check if DB has been updated
60
+ #---------------------------------------------------------------------------------------------------
61
+ def update_dbs
62
+
63
+ FileUtils.mkdir_p(@status_folder)
64
+ FileUtils.mkdir_p(@formatted_folder)
65
+
66
+ ignore_folders=['.','..','status_info','formatted']
67
+
68
+ $LOG.info("Checking Blast databases at #{@db_folder} for updates")
69
+
70
+ dbs_folder=Dir.open(@db_folder)
71
+
72
+ #if all file_update.entries is in folder1.entries then cat db/* > DB , make blast, guardar ls nuevo
73
+ dbs_folder.entries.each do |db_name|
74
+
75
+ db_folder=File.join(@db_folder,db_name)
76
+ if (!ignore_folders.include?(db_name) and File.directory?(db_folder))
77
+
78
+ #puts "Checking #{db_name} in #{db_folder}"
79
+
80
+ #path_db = File.join(@dir,db_folder)
81
+
82
+ # set status files
83
+ new_status_file = File.join(@status_folder,'new_'+db_name+'.txt')
84
+ old_status_file = File.join(@status_folder,'old_'+db_name+'.txt')
85
+
86
+ cmd = "ls -lR #{db_folder} > #{new_status_file}"
87
+ $LOG.debug(cmd)
88
+ # list new status tu new_status_file
89
+ # system("ls -lR #{File.join(db_folder,'*')} > #{new_status_file}")
90
+ system(cmd)
91
+
92
+ # if new and old statuses files changed, then reformat
93
+ if (!(File.exists?(old_status_file)) || !system("diff -q #{new_status_file} #{old_status_file} > /dev/null ") || !File.exists?(File.join(@formatted_folder,db_name+'.fasta')))
94
+
95
+ $LOG.info("Database #{db_name} modified. Merging and formatting")
96
+
97
+ merge_db_files(db_folder,db_name,@formatted_folder)
98
+
99
+ MakeBlastDb.format_db(db_folder,db_name,@formatted_folder)
100
+
101
+ # rename new_status_file to replace the old one
102
+ system("mv #{new_status_file} #{old_status_file}")
103
+ end
104
+
105
+ end
106
+
107
+ end #end folder1.entries
108
+
109
+ end
110
+
111
+ end
112
+
113
+
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class OneBlast
4
+
5
+ def initialize(database, blast_type = 'blastp')
6
+
7
+ @blast_type = blast_type
8
+ @database = database
9
+ @c=0
10
+ end
11
+
12
+
13
+ def do_blast(seq_fasta)
14
+
15
+ @f = File.new('one_blast_aux.fasta','w+')
16
+ @f.puts ">SEQNAME_"+@c.to_s
17
+ @f.puts seq_fasta
18
+ @c = @c+1
19
+ @f.close
20
+
21
+ cmd = '~blast/programs/x86_64/bin/blastall -p '+@blast_type+' -d '+@database + ' -i one_blast_aux.fasta -o one_blast_aux.out'
22
+ #puts cmd
23
+ system(cmd)
24
+
25
+ res =''
26
+ File.open('one_blast_aux.out').each_line { |line|
27
+
28
+ res = line
29
+
30
+
31
+ }
32
+
33
+ end
34
+
35
+ def close
36
+
37
+ end
38
+
39
+ end
40
+
41
+