seqtrimnext 2.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +114 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +159 -0
  5. data/Rakefile +38 -0
  6. data/bin/create_graphs.rb +46 -0
  7. data/bin/extract_seqs.rb +45 -0
  8. data/bin/extract_seqs_from_fasta.rb +56 -0
  9. data/bin/extract_seqs_from_fastq.rb +45 -0
  10. data/bin/fasta2fastq.rb +38 -0
  11. data/bin/fastq2fasta.rb +35 -0
  12. data/bin/gen_qual.rb +46 -0
  13. data/bin/get_seq.rb +46 -0
  14. data/bin/group_by_range.rb +17 -0
  15. data/bin/join_ilumina_paired.rb +130 -0
  16. data/bin/parse_amplicons.rb +95 -0
  17. data/bin/parse_json_results.rb +66 -0
  18. data/bin/parse_params.rb +82 -0
  19. data/bin/resume_clusters.rb +48 -0
  20. data/bin/resume_rejected.sh +9 -0
  21. data/bin/reverse_paired.rb +49 -0
  22. data/bin/seqtrimnext +368 -0
  23. data/bin/split_fastq.rb +42 -0
  24. data/bin/split_ilumina_paired.rb +65 -0
  25. data/bin/split_paired.rb +70 -0
  26. data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
  27. data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
  28. data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
  29. data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
  30. data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
  31. data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
  32. data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
  33. data/lib/seqtrimnext/actions/action_insert.rb +32 -0
  34. data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
  35. data/lib/seqtrimnext/actions/action_key.rb +30 -0
  36. data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
  37. data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
  38. data/lib/seqtrimnext/actions/action_linker.rb +30 -0
  39. data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
  40. data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
  41. data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
  42. data/lib/seqtrimnext/actions/action_mid.rb +30 -0
  43. data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
  44. data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
  45. data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
  46. data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
  47. data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
  48. data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
  49. data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
  50. data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
  51. data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
  52. data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
  53. data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
  54. data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
  55. data/lib/seqtrimnext/classes/action_manager.rb +47 -0
  56. data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
  57. data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
  58. data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
  59. data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
  60. data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
  61. data/lib/seqtrimnext/classes/install_database.rb +43 -0
  62. data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
  63. data/lib/seqtrimnext/classes/list_db.rb +49 -0
  64. data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
  65. data/lib/seqtrimnext/classes/one_blast.rb +41 -0
  66. data/lib/seqtrimnext/classes/params.rb +387 -0
  67. data/lib/seqtrimnext/classes/piro.rb +78 -0
  68. data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
  69. data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
  70. data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
  71. data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
  72. data/lib/seqtrimnext/classes/sequence.rb +55 -0
  73. data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
  74. data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
  75. data/lib/seqtrimnext/plugins/plugin.rb +267 -0
  76. data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
  77. data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
  78. data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
  79. data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
  80. data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
  81. data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
  82. data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
  83. data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
  84. data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
  85. data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
  86. data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
  87. data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
  88. data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
  89. data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
  90. data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
  91. data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
  92. data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
  93. data/lib/seqtrimnext/templates/amplicons.txt +16 -0
  94. data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
  95. data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
  96. data/lib/seqtrimnext/templates/low_quality.txt +5 -0
  97. data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
  98. data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
  99. data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
  100. data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
  101. data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
  102. data/lib/seqtrimnext/utils/global_match.rb +65 -0
  103. data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
  104. data/lib/seqtrimnext/utils/json_utils.rb +50 -0
  105. data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
  106. data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
  107. data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
  108. data/lib/seqtrimnext/utils/string_utils.rb +56 -0
  109. data/lib/seqtrimnext.rb +37 -0
  110. data/script/console +10 -0
  111. data/script/destroy +14 -0
  112. data/script/generate +14 -0
  113. data/test/test_helper.rb +3 -0
  114. data/test/test_seqtrimnext.rb +11 -0
  115. metadata +318 -0
@@ -0,0 +1,74 @@
1
+ # $: << '/Users/dariogf/progs/ruby/gems/scbi_plot/lib'
2
+
3
+ require 'scbi_plot'
4
+ # require 'gnu_plot_graph'
5
+
6
+ class GraphStats
7
+
8
+ def initialize(stats,initial_stats=nil)
9
+ #load stats
10
+ init_stats=initial_stats
11
+
12
+ if init_stats.nil?
13
+ r=File.read(File.join(OUTPUT_PATH,'initial_stats.json'))
14
+ init_stats= JSON::parse(r)
15
+ end
16
+ # puts init_stats.to_json
17
+ #r=File.read(File.join(File.dirname(__FILE__),'stats.json'))
18
+ if !File.exists?('graphs')
19
+ Dir.mkdir('graphs')
20
+ end
21
+ @stats=stats
22
+
23
+ @stats.each do |plugin_name,plugin_value|
24
+ # get plugin class
25
+ begin
26
+ plugin_class = Object.const_get(plugin_name)
27
+ rescue Exception => e
28
+ # puts "RESCUE",e.message,e.backtrace
29
+ plugin_class = Plugin
30
+ end
31
+
32
+
33
+ plugin_value.keys.each do |stats_name|
34
+ puts "Plotting #{stats_name} from #{plugin_name}"
35
+ # if graph is not ignored
36
+ if !plugin_class.graph_ignored?(stats_name)
37
+
38
+ x=[]
39
+ y=[]
40
+
41
+ # get filename
42
+ file_name=File.join('graphs',plugin_class.get_graph_filename(plugin_name,stats_name)+'.png')
43
+
44
+ # create new graph object
45
+ plot=ScbiPlot::Histogram.new(file_name,plugin_class.get_graph_title(plugin_name,stats_name))
46
+
47
+ plugin_class.auto_setup(plugin_value[stats_name],stats_name,x,y)
48
+
49
+ # puts plugin_class.name.to_s
50
+ # plot_setup returns true if it has already handled the setup of the plot, if not, handle here
51
+ if !plugin_class.plot_setup(plugin_value[stats_name],stats_name,x,y,init_stats,plot)
52
+ if !x.empty? && !y.empty? && (x.length==y.length)
53
+
54
+ plot.x_label= "Length"
55
+ plot.y_label= "Count"
56
+
57
+ plot.add_x(x)
58
+ plot.add_y(y)
59
+
60
+ plot.do_graph
61
+ end
62
+
63
+ end
64
+
65
+ # if !x.empty? && !y.empty? && (x.length==y.length)
66
+ #
67
+ # end
68
+ end
69
+ end
70
+ end
71
+
72
+ end
73
+
74
+ end
@@ -0,0 +1,43 @@
1
+ require 'open-uri'
2
+
3
+ class InstallDatabase
4
+
5
+
6
+ def initialize(type,db_path)
7
+
8
+
9
+ types=['core','cont_bacteria','cont_fungi','cont_mitochondrias','cont_plastids','cont_ribosome']
10
+
11
+ if types.include?(type)
12
+
13
+ if !File.exists?(db_path)
14
+ FileUtils.mkdir_p(db_path)
15
+ end
16
+
17
+ remote_db_url="http://www.scbi.uma.es/downloads/#{type}_db.zip"
18
+ local_path=File.join(db_path,'core_db.zip')
19
+ puts "Install databases: #{type}"
20
+
21
+ download_and_unzip(remote_db_url,local_path)
22
+
23
+ else
24
+ puts "Unknown database #{type}"
25
+ puts "Available databases:"
26
+ puts types.join("\n")
27
+ end
28
+ end
29
+
30
+ def download_and_unzip(from_url,to_file)
31
+ puts "Downloading databases from #{from_url} to #{to_file}"
32
+
33
+ open(to_file, "w+") { |f| f.write(open(from_url).read)}
34
+
35
+ puts "Unzipping #{to_file}"
36
+
37
+ # unzip and remove
38
+ # `cd #{File.dirname(to_file)};unzip #{to_file}; rm #{to_file}`
39
+ `cd #{File.dirname(to_file)};unzip #{to_file}; rm #{to_file}`
40
+
41
+ end
42
+
43
+ end
@@ -0,0 +1,123 @@
1
+ #########################################
2
+ # Author:: Almudena Bocinos Rioboo
3
+ # This class provided the methods to check if the necesary software is installed in the user system
4
+ #########################################
5
+
6
+ class InstallRequirements
7
+
8
+
9
+ def initialize
10
+ @external_requirements = {}
11
+ @ruby_requirements = {}
12
+ load_requirements
13
+
14
+
15
+ end
16
+
17
+ def check_install_requirements
18
+ res = true
19
+
20
+ errors = check_system_requirements
21
+
22
+ if !errors.empty?
23
+
24
+ $stderr.puts ' Unable to find these external requeriments:'
25
+ errors.each do |error|
26
+ $stderr.puts ' -' + error
27
+ res = false
28
+ end #end each
29
+
30
+ end #end if
31
+
32
+ errors = check_ruby_requirements
33
+ if !errors.empty?
34
+ $stderr.puts ' Unable to find these Ruby requeriments:'
35
+ errors.each do |error|
36
+ $stderr.puts ' -' + error
37
+ res = false
38
+ end #end each
39
+ end #end if
40
+
41
+ return res
42
+ end
43
+
44
+
45
+
46
+ private
47
+
48
+ def check_system_requirements
49
+
50
+ errors=[]
51
+ @external_requirements.each do |cmd,msg|
52
+ if !system("which #{cmd} > /dev/null ")
53
+ errors.push "It's necessary to install #{cmd}. " + msg
54
+ end
55
+ end
56
+
57
+ return errors
58
+ end
59
+
60
+ def check_ruby_requirements(install=true)
61
+ errors=[]
62
+
63
+
64
+
65
+ @ruby_requirements.each do |cmd,msg|
66
+ if !system("gem list #{cmd} | grep #{cmd} > /dev/null")
67
+ if install
68
+ puts "Are you sure you wan't to install #{cmd} gem? ([Y/n]):"
69
+ res=stdin.readline
70
+ if res.chomp.upcase!='N'
71
+ system("echo gem install #{cmd}")
72
+ end
73
+ else
74
+ errors.push "It's necessary to install #{cmd}. Issue a: gem install #{cmd} " + msg
75
+ end
76
+ end
77
+ end
78
+
79
+ return errors
80
+ end
81
+
82
+
83
+
84
+ # seqtrim's requirements are specified here
85
+ def load_requirements
86
+
87
+ @external_requirements['blastn']= "You need to install Blast+ 2.2.24 or greater and make sure it is available in your path (export PATH=$PATH:path_to_blast).\nYou can download it from ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/"
88
+ @external_requirements['cd-hit-454']= "Download from http://code.google.com/p/cdhit/downloads/list"
89
+ @external_requirements['gnuplot']= "Download from http://www.gnuplot.info/download.html"
90
+
91
+ # @external_requirements['pepe']= ""
92
+
93
+
94
+ # @ruby_requirements = { 'n2array' => "" ,
95
+ @ruby_requirements['narray'] = ''
96
+ @ruby_requirements['gnuplot'] = ''
97
+ @ruby_requirements['term-ansicolor'] = ''
98
+ @ruby_requirements['xml-simple'] = ''
99
+ @ruby_requirements['scbi_blast'] = ''
100
+ @ruby_requirements['scbi_drb'] = ''
101
+ @ruby_requirements['scbi_fasta'] = ''
102
+ @ruby_requirements['scbi_fastq'] = ''
103
+ @ruby_requirements['scbi_plot'] = ''
104
+ @ruby_requirements['scbi_math'] = ''
105
+ # @ruby_requirements['scbi_fastq2'] = ''
106
+
107
+ end # end def
108
+
109
+ def install
110
+
111
+ # gem install gnuplot
112
+ # gem install narray
113
+ # gem install scbi_blast
114
+ # gem install scbi_drb
115
+ # gem install scbi_fasta
116
+ # gem install scbi_fastq
117
+ # gem install term-ansicolor
118
+ # gem install xml-simple
119
+
120
+
121
+ end
122
+
123
+ end
@@ -0,0 +1,49 @@
1
+
2
+ #List all entries in a DB, by name
3
+ #
4
+ #list all DB names if db is ALL
5
+
6
+ class ListDb
7
+
8
+ def initialize(path,db)
9
+
10
+ filename=File.join(path,'formatted',db)
11
+ if File.exists?(filename)
12
+
13
+ f = File.open(filename)
14
+
15
+ f.grep(/^>(.*)$/) do |line|
16
+ puts $1
17
+ end
18
+ f.close
19
+ else
20
+ puts "File #{filename} doesn't exists"
21
+ puts ''
22
+ puts "Available databases:"
23
+ puts '-'*20
24
+ d=Dir.glob(File.join(path,'formatted','*.fasta'))
25
+ d.entries.map{|e| puts File.basename(e)}
26
+
27
+
28
+ # cmd= "grep '^>' #{File.join(path,'formatted',db+'.fasta')}"
29
+
30
+ # system(cmd)
31
+ end
32
+
33
+ end
34
+
35
+ def self.list_databases(path)
36
+ res = []
37
+
38
+ if File.exists?(path)
39
+ d=Dir.glob(File.join(path,'formatted','*.fasta'))
40
+
41
+ res = d.entries.map{|e| File.basename(e)}
42
+ end
43
+ return res
44
+
45
+
46
+ end
47
+
48
+
49
+ end
@@ -0,0 +1,113 @@
1
+
2
+ class MakeBlastDb
3
+
4
+ def initialize(dir)
5
+
6
+ @db_folder = dir
7
+ @status_folder = File.join(@db_folder,'status_info')
8
+ @formatted_folder = File.join(@db_folder,'formatted')
9
+
10
+ update_dbs
11
+ end
12
+
13
+ def catFasta(path_start,path_end)
14
+ $LOG.debug("Cat of #{path_start}")
15
+
16
+ # system("cat #{path_start} > #{path_end}")
17
+ system("cat /dev/null > #{path_end}")
18
+
19
+ system("for i in `find #{path_start} -type f ! -name '.*'`; do echo cat of $i; cat $i >> #{path_end}; echo \"\n\" >> #{path_end}; done")
20
+
21
+ end
22
+
23
+ def dirEmpty?(path_db)
24
+
25
+ folder2=Dir.open("#{path_db}")
26
+
27
+ ignore = ['.','..','.DS_Store']
28
+
29
+ res = folder2.entries - ignore
30
+
31
+ return res.empty?
32
+ end
33
+
34
+ def merge_db_files(path_db, db_name, formatted_folder)
35
+ if !dirEmpty?(path_db)
36
+ #hay que hacer el cat solo cuando cambian los ficheros que hay en subfolder1
37
+ formatted_file = File.join(formatted_folder, db_name+'.fasta')
38
+ catFasta(File.join(path_db),formatted_file)
39
+ end
40
+ end
41
+
42
+ def self.format_db(path_db, db_name, formatted_folder)
43
+
44
+ #hay que hacer el cat solo cuando cambian los ficheros que hay en subfolder1
45
+ formatted_file = File.join(formatted_folder, db_name+'.fasta')
46
+ cmd = "makeblastdb -in #{formatted_file} -parse_seqids -dbtype nucl >> logs/formatdb.log"
47
+ system(cmd)
48
+ $LOG.info(cmd)
49
+
50
+ end
51
+
52
+ #---------------------------------------------------------------------------------------------------
53
+ # Check if files for DataBase have been updated, and only when that has happened, makeblastdb will run
54
+ # Consideres the next directories structure:
55
+ #
56
+ # @dir is the main directory
57
+ # @dir/folder0 is the directoy where will be storaged the DB created/updated
58
+ # @dir/folder0/subfolder1 is where are storaged all the fasta files of the type subfolder1
59
+ # @dir/update is where register the log for each subfolder1, to check if DB has been updated
60
+ #---------------------------------------------------------------------------------------------------
61
+ def update_dbs
62
+
63
+ FileUtils.mkdir_p(@status_folder)
64
+ FileUtils.mkdir_p(@formatted_folder)
65
+
66
+ ignore_folders=['.','..','status_info','formatted']
67
+
68
+ $LOG.info("Checking Blast databases at #{@db_folder} for updates")
69
+
70
+ dbs_folder=Dir.open(@db_folder)
71
+
72
+ #if all file_update.entries is in folder1.entries then cat db/* > DB , make blast, guardar ls nuevo
73
+ dbs_folder.entries.each do |db_name|
74
+
75
+ db_folder=File.join(@db_folder,db_name)
76
+ if (!ignore_folders.include?(db_name) and File.directory?(db_folder))
77
+
78
+ #puts "Checking #{db_name} in #{db_folder}"
79
+
80
+ #path_db = File.join(@dir,db_folder)
81
+
82
+ # set status files
83
+ new_status_file = File.join(@status_folder,'new_'+db_name+'.txt')
84
+ old_status_file = File.join(@status_folder,'old_'+db_name+'.txt')
85
+
86
+ cmd = "ls -lR #{db_folder} > #{new_status_file}"
87
+ $LOG.debug(cmd)
88
+ # list new status tu new_status_file
89
+ # system("ls -lR #{File.join(db_folder,'*')} > #{new_status_file}")
90
+ system(cmd)
91
+
92
+ # if new and old statuses files changed, then reformat
93
+ if (!(File.exists?(old_status_file)) || !system("diff -q #{new_status_file} #{old_status_file} > /dev/null ") || !File.exists?(File.join(@formatted_folder,db_name+'.fasta')))
94
+
95
+ $LOG.info("Database #{db_name} modified. Merging and formatting")
96
+
97
+ merge_db_files(db_folder,db_name,@formatted_folder)
98
+
99
+ MakeBlastDb.format_db(db_folder,db_name,@formatted_folder)
100
+
101
+ # rename new_status_file to replace the old one
102
+ system("mv #{new_status_file} #{old_status_file}")
103
+ end
104
+
105
+ end
106
+
107
+ end #end folder1.entries
108
+
109
+ end
110
+
111
+ end
112
+
113
+
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ class OneBlast
4
+
5
+ def initialize(database, blast_type = 'blastp')
6
+
7
+ @blast_type = blast_type
8
+ @database = database
9
+ @c=0
10
+ end
11
+
12
+
13
+ def do_blast(seq_fasta)
14
+
15
+ @f = File.new('one_blast_aux.fasta','w+')
16
+ @f.puts ">SEQNAME_"+@c.to_s
17
+ @f.puts seq_fasta
18
+ @c = @c+1
19
+ @f.close
20
+
21
+ cmd = '~blast/programs/x86_64/bin/blastall -p '+@blast_type+' -d '+@database + ' -i one_blast_aux.fasta -o one_blast_aux.out'
22
+ #puts cmd
23
+ system(cmd)
24
+
25
+ res =''
26
+ File.open('one_blast_aux.out').each_line { |line|
27
+
28
+ res = line
29
+
30
+
31
+ }
32
+
33
+ end
34
+
35
+ def close
36
+
37
+ end
38
+
39
+ end
40
+
41
+