scbi_mapreduce 0.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +49 -0
- data/Manifest.txt +46 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +295 -0
- data/Rakefile +28 -0
- data/bin/scbi_mapreduce +52 -0
- data/lib/scbi_mapreduce.rb +15 -0
- data/lib/scbi_mapreduce/error_handler.rb +15 -0
- data/lib/scbi_mapreduce/main_worker.rb +50 -0
- data/lib/scbi_mapreduce/manager.rb +110 -0
- data/lib/scbi_mapreduce/work_manager.rb +405 -0
- data/lib/scbi_mapreduce/worker.rb +163 -0
- data/lib/scbi_mapreduce/worker_launcher.rb +96 -0
- data/lib/scbi_mapreduce/zlib_serializer.rb +32 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/skeleton/dummy_calcs/README.txt +25 -0
- data/skeleton/dummy_calcs/lib/calculations.rb +37 -0
- data/skeleton/dummy_calcs/lib/thread_pool.rb +107 -0
- data/skeleton/dummy_calcs/linear_implementation.rb +22 -0
- data/skeleton/dummy_calcs/main.rb +67 -0
- data/skeleton/dummy_calcs/my_worker.rb +56 -0
- data/skeleton/dummy_calcs/my_worker_manager.rb +52 -0
- data/skeleton/dummy_calcs/threads_implementation.rb +33 -0
- data/skeleton/remove_mids/README.txt +30 -0
- data/skeleton/remove_mids/launch_only_workers.rb +29 -0
- data/skeleton/remove_mids/lib/db/mids.fasta +120 -0
- data/skeleton/remove_mids/lib/find_mids.rb +191 -0
- data/skeleton/remove_mids/lib/global_match.rb +97 -0
- data/skeleton/remove_mids/linear_implementation.rb +87 -0
- data/skeleton/remove_mids/main.rb +89 -0
- data/skeleton/remove_mids/my_worker.rb +59 -0
- data/skeleton/remove_mids/my_worker_manager.rb +68 -0
- data/skeleton/simple/README.txt +16 -0
- data/skeleton/simple/main.rb +41 -0
- data/skeleton/simple/my_worker.rb +53 -0
- data/skeleton/simple/my_worker_manager.rb +55 -0
- data/test/drb_test/main.rb +31 -0
- data/test/drb_test/my_worker.rb +36 -0
- data/test/drb_test/my_worker_manager.rb +41 -0
- data/test/drb_test/scbi_drb_checkpoint +1 -0
- data/test/drb_test/scbi_mapreduce_checkpoint +1 -0
- data/test/test_helper.rb +3 -0
- data/test/test_scbi_drb.rb +11 -0
- metadata +127 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# load required libraries
|
4
|
+
|
5
|
+
# modify include path
|
6
|
+
$: << File.join(File.dirname(__FILE__),'lib')
|
7
|
+
|
8
|
+
require 'calculations'
|
9
|
+
include Calculations
|
10
|
+
|
11
|
+
# process
|
12
|
+
|
13
|
+
times_to_calculate=1000
|
14
|
+
|
15
|
+
times_to_calculate.times do
|
16
|
+
|
17
|
+
do_dummy_calculations
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
puts "final"
|
22
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$: << File.join(File.dirname(__FILE__))
|
4
|
+
|
5
|
+
# load required libraries
|
6
|
+
require 'scbi_mapreduce'
|
7
|
+
require 'my_worker_manager'
|
8
|
+
|
9
|
+
# listen on all ips at port 50000
|
10
|
+
server_ip='10.243'
|
11
|
+
ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
|
12
|
+
|
13
|
+
ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
|
14
|
+
|
15
|
+
if !ip
|
16
|
+
ip='0.0.0.0'
|
17
|
+
end
|
18
|
+
|
19
|
+
port = 0
|
20
|
+
|
21
|
+
# set number of workers. You can also provide an array with worker names.
|
22
|
+
# Those workers names can be read from a file produced by the existing
|
23
|
+
# queue system, if any.
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
workers = 4
|
28
|
+
|
29
|
+
# read optional workers parameter
|
30
|
+
input_workers = ARGV.shift
|
31
|
+
if !input_workers.nil?
|
32
|
+
# if it is a file
|
33
|
+
if File.exists?(input_workers)
|
34
|
+
# read workers into array
|
35
|
+
workers=File.read(input_workers).split("\n").map{|w| w.chomp}
|
36
|
+
else
|
37
|
+
# workers is a number
|
38
|
+
workers = input_workers.to_i
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# we need the path to my_worker in order to launch it when necessary
|
43
|
+
custom_worker_file = File.join(File.dirname(__FILE__),'my_worker.rb')
|
44
|
+
|
45
|
+
# initialize the work manager. Here you can pass parameters like file names
|
46
|
+
MyWorkerManager.init_work_manager
|
47
|
+
|
48
|
+
# launch processor server
|
49
|
+
mgr = ScbiMapreduce::Manager.new(ip, port, workers, MyWorkerManager, custom_worker_file, STDOUT)
|
50
|
+
|
51
|
+
# you can set additional properties
|
52
|
+
# =================================
|
53
|
+
|
54
|
+
# if you want basic checkpointing. Some performance drop should be expected
|
55
|
+
# mgr.checkpointing=true
|
56
|
+
|
57
|
+
# if you want to keep the order of input data. Some performance drop should be expected
|
58
|
+
# mgr.keep_order=true
|
59
|
+
|
60
|
+
# you can set the size of packets of data sent to workers
|
61
|
+
mgr.chunk_size=1
|
62
|
+
|
63
|
+
# start processing
|
64
|
+
mgr.start_server
|
65
|
+
|
66
|
+
# this line is reached when all data has been processed
|
67
|
+
puts "Program finished"
|
@@ -0,0 +1,56 @@
|
|
1
|
+
$: << File.join(File.dirname(__FILE__),'lib')
|
2
|
+
|
3
|
+
require 'calculations'
|
4
|
+
include Calculations
|
5
|
+
|
6
|
+
# MyWorker defines the behaviour of workers.
|
7
|
+
# Here is where the real processing takes place
|
8
|
+
class MyWorker < ScbiMapreduce::Worker
|
9
|
+
|
10
|
+
# starting_worker method is called one time at initialization
|
11
|
+
# and allows you to initialize your variables
|
12
|
+
def starting_worker
|
13
|
+
|
14
|
+
# You can use worker logs at any time in this way:
|
15
|
+
# $WORKER_LOG.info "Starting a worker"
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
# receive_initial_config is called only once just after
|
21
|
+
# the first connection, when initial parameters are
|
22
|
+
# received from manager
|
23
|
+
def receive_initial_config(parameters)
|
24
|
+
|
25
|
+
# Reads the parameters
|
26
|
+
|
27
|
+
# You can use worker logs at any time in this way:
|
28
|
+
# $WORKER_LOG.info "Params received"
|
29
|
+
|
30
|
+
# save received parameters, if any
|
31
|
+
# @params = parameters
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
# process_object method is called for each received object.
|
36
|
+
# Be aware that objs is always an array, and you must iterate
|
37
|
+
# over it if you need to process it independently
|
38
|
+
#
|
39
|
+
# The value returned here will be received by the work_received
|
40
|
+
# method at your worker_manager subclass.
|
41
|
+
def process_object(objs)
|
42
|
+
|
43
|
+
# iterate over all objects received
|
44
|
+
# objs.each do |obj|
|
45
|
+
# convert to uppercase
|
46
|
+
do_dummy_calculations
|
47
|
+
# end
|
48
|
+
|
49
|
+
# return objs back to manager
|
50
|
+
return objs
|
51
|
+
end
|
52
|
+
|
53
|
+
def closing_worker
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
# MyWorkerManager class is used to implement the methods
|
4
|
+
# to send and receive the data to or from workers
|
5
|
+
class MyWorkerManager < ScbiMapreduce::WorkManager
|
6
|
+
|
7
|
+
# init_work_manager is executed at the start, prior to any processing.
|
8
|
+
# You can use init_work_manager to initialize global variables, open files, etc...
|
9
|
+
# Note that an instance of MyWorkerManager will be created for each
|
10
|
+
# worker connection, and thus, all global variables here should be
|
11
|
+
# class variables (starting with @@)
|
12
|
+
def self.init_work_manager
|
13
|
+
|
14
|
+
# execute dummy_calc in workers @remaining_data times
|
15
|
+
@@remaining_data = 1000
|
16
|
+
end
|
17
|
+
|
18
|
+
# end_work_manager is executed at the end, when all the process is done.
|
19
|
+
# You can use it to close files opened in init_work_manager
|
20
|
+
def self.end_work_manager
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
# worker_initial_config is used to send initial parameters to workers.
|
25
|
+
# The method is executed once per each worker
|
26
|
+
def worker_initial_config
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
# next_work method is called every time a worker needs a new work
|
31
|
+
# Here you can read data from disk
|
32
|
+
# This method must return the work data or nil if no more data is available
|
33
|
+
def next_work
|
34
|
+
@@remaining_data -= 1
|
35
|
+
|
36
|
+
e = @@remaining_data
|
37
|
+
|
38
|
+
e = nil if @@remaining_data<=0
|
39
|
+
|
40
|
+
return e
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
# work_received is executed each time a worker has finished a job.
|
46
|
+
# Here you can write results down to disk, perform some aggregated statistics, etc...
|
47
|
+
def work_received(results)
|
48
|
+
|
49
|
+
# write_data_to_disk(results)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# load required libraries
|
4
|
+
|
5
|
+
# modify include path
|
6
|
+
$: << File.join(File.dirname(__FILE__),'lib')
|
7
|
+
|
8
|
+
require 'thread_pool'
|
9
|
+
require 'calculations'
|
10
|
+
include Calculations
|
11
|
+
|
12
|
+
if ARGV.count!=1
|
13
|
+
puts "use: #{$0} threads"
|
14
|
+
exit
|
15
|
+
end
|
16
|
+
|
17
|
+
@pool=ThreadPool.new(ARGV[0].to_i)
|
18
|
+
|
19
|
+
# process
|
20
|
+
|
21
|
+
times_to_calculate=1000
|
22
|
+
|
23
|
+
times_to_calculate.times do
|
24
|
+
|
25
|
+
@pool.process {do_dummy_calculations}
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
puts "wait"
|
31
|
+
@pool.join
|
32
|
+
puts "final"
|
33
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
A simple scbi_mapreduce application demo
|
2
|
+
========================================
|
3
|
+
|
4
|
+
This application is a basic sequence processing template. It processes all
|
5
|
+
sequences in fastq_file (a file in FastQ format) removing a MIDs from it. It
|
6
|
+
needs some external requisites to work:
|
7
|
+
|
8
|
+
EXTERNAL REQUISITES
|
9
|
+
===================
|
10
|
+
|
11
|
+
* scbi_blast gem installed
|
12
|
+
|
13
|
+
|
14
|
+
At lib/db you can find a preformated MID database.
|
15
|
+
|
16
|
+
You can modify the files to perform more complicated processing.
|
17
|
+
There are other templates available, you can list them by issuing this command:
|
18
|
+
|
19
|
+
scbi_mapreduce
|
20
|
+
|
21
|
+
You can launch the application right now with the following command using 4 cpus/cores and chunks of 100 sequences at a time:
|
22
|
+
|
23
|
+
ruby main.rb fastq_file 4 100
|
24
|
+
|
25
|
+
A server and some workers will be launched, and all sequences in fastq_file will
|
26
|
+
be processed in blocks of 100 sequences.
|
27
|
+
|
28
|
+
A sequential lineal example is also provided, you can launch it by issuing:
|
29
|
+
|
30
|
+
ruby linear_implementation.rb fastq_file
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$: << File.dirname(__FILE__)
|
4
|
+
|
5
|
+
require "logger"
|
6
|
+
|
7
|
+
$: << '/Users/dariogf/progs/ruby/gems/scbi_mapreduce/lib'
|
8
|
+
|
9
|
+
require 'scbi_mapreduce'
|
10
|
+
require 'my_worker_manager'
|
11
|
+
|
12
|
+
|
13
|
+
$LOG = Logger.new(STDOUT)
|
14
|
+
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
15
|
+
|
16
|
+
ip='10.247.255.5'
|
17
|
+
port = 50000
|
18
|
+
workers = 8
|
19
|
+
|
20
|
+
custom_worker_file = File.join(File.dirname(__FILE__),'my_worker.rb')
|
21
|
+
|
22
|
+
$LOG.info 'Starting server'
|
23
|
+
|
24
|
+
|
25
|
+
worker_launcher = ScbiMapreduce::WorkerLauncher.new(ip,port, workers, custom_worker_file, STDOUT)
|
26
|
+
worker_launcher.launch_workers_and_wait
|
27
|
+
|
28
|
+
# launch processor server
|
29
|
+
$LOG.info 'Closing workers'
|
@@ -0,0 +1,120 @@
|
|
1
|
+
>RL1
|
2
|
+
ACACGACGACT
|
3
|
+
>RL2
|
4
|
+
ACACGTAGTAT
|
5
|
+
>RL3
|
6
|
+
ACACTACTCGT
|
7
|
+
>RL4
|
8
|
+
ACGACACGTAT
|
9
|
+
>RL5
|
10
|
+
ACGAGTAGACT
|
11
|
+
>RL6
|
12
|
+
ACGCGTCTAGT
|
13
|
+
>RL7
|
14
|
+
ACGTACACACT
|
15
|
+
>RL8
|
16
|
+
ACGTACTGTGT
|
17
|
+
>RL9
|
18
|
+
ACGTAGATCGT
|
19
|
+
>RL10
|
20
|
+
ACTACGTCTCT
|
21
|
+
>RL11
|
22
|
+
ACTATACGAGT
|
23
|
+
>RL12
|
24
|
+
ACTCGCGTCGT
|
25
|
+
>MID1
|
26
|
+
ACGAGTGCGT
|
27
|
+
>MID2
|
28
|
+
ACGCTCGACA
|
29
|
+
>MID3
|
30
|
+
AGACGCACTC
|
31
|
+
>MID5
|
32
|
+
ATCAGACACG
|
33
|
+
>MID6
|
34
|
+
ATATCGCGAG
|
35
|
+
>MID7
|
36
|
+
CGTGTCTCTA
|
37
|
+
>MID8
|
38
|
+
CTCGCGTGTC
|
39
|
+
>MID10
|
40
|
+
TCTCTATGCG
|
41
|
+
>MID11
|
42
|
+
TGATACGTCT
|
43
|
+
>MID13
|
44
|
+
CATAGTAGTG
|
45
|
+
>MID14
|
46
|
+
CGAGAGATAC
|
47
|
+
>MID15
|
48
|
+
ATACGACGTA
|
49
|
+
>MID16
|
50
|
+
TCACGTACTA
|
51
|
+
>MID17
|
52
|
+
CGTCTAGTAC
|
53
|
+
>MID18
|
54
|
+
TCTACGTAGC
|
55
|
+
>MID19
|
56
|
+
TGTACTACTC
|
57
|
+
>MID20
|
58
|
+
ACGACTACAG
|
59
|
+
>MID21
|
60
|
+
CGTAGACTAG
|
61
|
+
>MID22
|
62
|
+
TACGAGTATG
|
63
|
+
>MID23
|
64
|
+
TACTCTCGTG
|
65
|
+
>MID24
|
66
|
+
TAGAGACGAG
|
67
|
+
>MID25
|
68
|
+
TCGTCGCTCG
|
69
|
+
>MID26
|
70
|
+
ACATACGCGT
|
71
|
+
>MID27
|
72
|
+
ACGCGAGTAT
|
73
|
+
>MID28
|
74
|
+
ACTACTATGT
|
75
|
+
>MID68
|
76
|
+
TCGCTGCGTA
|
77
|
+
>MID30
|
78
|
+
AGACTATACT
|
79
|
+
>MID31
|
80
|
+
AGCGTCGTCT
|
81
|
+
>MID32
|
82
|
+
AGTACGCTAT
|
83
|
+
>MID33
|
84
|
+
ATAGAGTACT
|
85
|
+
>MID34
|
86
|
+
CACGCTACGT
|
87
|
+
>MID35
|
88
|
+
CAGTAGACGT
|
89
|
+
>MID36
|
90
|
+
CGACGTGACT
|
91
|
+
>MID37
|
92
|
+
TACACACACT
|
93
|
+
>MID38
|
94
|
+
TACACGTGAT
|
95
|
+
>MID39
|
96
|
+
TACAGATCGT
|
97
|
+
>MID40
|
98
|
+
TACGCTGTCT
|
99
|
+
>MID69
|
100
|
+
TCTGACGTCA
|
101
|
+
>MID42
|
102
|
+
TCGATCACGT
|
103
|
+
>MID43
|
104
|
+
TCGCACTAGT
|
105
|
+
>MID44
|
106
|
+
TCTAGCGACT
|
107
|
+
>MID45
|
108
|
+
TCTATACTAT
|
109
|
+
>MID46
|
110
|
+
TGACGTATGT
|
111
|
+
>MID47
|
112
|
+
TGTGAGTAGT
|
113
|
+
>MID48
|
114
|
+
ACAGTATATA
|
115
|
+
>MID49
|
116
|
+
ACGCGATCGA
|
117
|
+
>MID50
|
118
|
+
ACTAGCAGTA
|
119
|
+
>MID67
|
120
|
+
TCGATAGTGA
|
@@ -0,0 +1,191 @@
|
|
1
|
+
require 'scbi_blast'
|
2
|
+
require 'global_match'
|
3
|
+
# require 'json'
|
4
|
+
|
5
|
+
# Module to find Mids in a set of sequences
|
6
|
+
module FindMids
|
7
|
+
|
8
|
+
# find mids using blast+ as an external tool
|
9
|
+
def find_mid_with_blast(seqs)
|
10
|
+
t=Time.now
|
11
|
+
|
12
|
+
# Create blast machine agains mid database
|
13
|
+
blast = BatchBlast.new("-db #{File.expand_path(File.join(File.dirname(__FILE__),'db/mids.fasta'))}",'blastn'," -task blastn-short -perc_identity 95 -max_target_seqs 4 ") #get mids
|
14
|
+
|
15
|
+
# build fastas to blast
|
16
|
+
fastas=[]
|
17
|
+
|
18
|
+
seqs.each do |name,fasta,qual,comments|
|
19
|
+
fastas.push ">"+name
|
20
|
+
fastas.push fasta
|
21
|
+
end
|
22
|
+
|
23
|
+
# execute blast
|
24
|
+
blast_table_results = blast.do_blast(fastas)
|
25
|
+
|
26
|
+
# puts blast_table_results.inspect
|
27
|
+
|
28
|
+
# Iterate over blast results and sequences
|
29
|
+
i=0
|
30
|
+
seqs.each do |name,fasta,qual,comments|
|
31
|
+
parse_seq(blast_table_results.querys[i],name,fasta,qual,comments)
|
32
|
+
i+=1
|
33
|
+
end
|
34
|
+
|
35
|
+
elapsed=Time.now-t
|
36
|
+
|
37
|
+
puts "T:#{elapsed}, rate#{elapsed/seqs.count}"
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
# parse blast results and sequences to remove found MIDS
|
43
|
+
def parse_seq(query,name,fasta,qual,comments)
|
44
|
+
|
45
|
+
# find_polys('TN',fasta)
|
46
|
+
# find_polys('AN',fasta)
|
47
|
+
|
48
|
+
query.hits.each do |found_mid|
|
49
|
+
|
50
|
+
if found_mid.align_len>1
|
51
|
+
|
52
|
+
# modify comments by appending removed mid
|
53
|
+
comments << found_mid.subject_id
|
54
|
+
|
55
|
+
# keep fasta from pos to end
|
56
|
+
fasta.slice!(0, found_mid.q_beg + found_mid.align_len)
|
57
|
+
|
58
|
+
# keep qual from pos to end
|
59
|
+
qual.slice!(0, found_mid.q_beg + found_mid.align_len)
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def find_mid_without_blast(seqs)
|
66
|
+
# those are the mids found in database
|
67
|
+
t=Time.now
|
68
|
+
|
69
|
+
mids={}
|
70
|
+
mids['RL1']='ACACGACGACT'
|
71
|
+
mids['RL2']='ACACGTAGTAT'
|
72
|
+
mids['RL3']='ACACTACTCGT'
|
73
|
+
mids['RL4']='ACGACACGTAT'
|
74
|
+
mids['RL5']='ACGAGTAGACT'
|
75
|
+
mids['RL6']='ACGCGTCTAGT'
|
76
|
+
mids['RL7']='ACGTACACACT'
|
77
|
+
mids['RL8']='ACGTACTGTGT'
|
78
|
+
mids['RL9']='ACGTAGATCGT'
|
79
|
+
mids['RL10']='ACTACGTCTCT'
|
80
|
+
mids['RL11']='ACTATACGAGT'
|
81
|
+
mids['RL12']='ACTCGCGTCGT'
|
82
|
+
mids['MID1']='ACGAGTGCGT'
|
83
|
+
mids['MID2']='ACGCTCGACA'
|
84
|
+
mids['MID3']='AGACGCACTC'
|
85
|
+
mids['MID5']='ATCAGACACG'
|
86
|
+
mids['MID6']='ATATCGCGAG'
|
87
|
+
mids['MID7']='CGTGTCTCTA'
|
88
|
+
mids['MID8']='CTCGCGTGTC'
|
89
|
+
mids['MID10']='TCTCTATGCG'
|
90
|
+
mids['MID11']='TGATACGTCT'
|
91
|
+
mids['MID13']='CATAGTAGTG'
|
92
|
+
mids['MID14']='CGAGAGATAC'
|
93
|
+
mids['MID15']='ATACGACGTA'
|
94
|
+
mids['MID16']='TCACGTACTA'
|
95
|
+
mids['MID17']='CGTCTAGTAC'
|
96
|
+
mids['MID18']='TCTACGTAGC'
|
97
|
+
mids['MID19']='TGTACTACTC'
|
98
|
+
mids['MID20']='ACGACTACAG'
|
99
|
+
mids['MID21']='CGTAGACTAG'
|
100
|
+
mids['MID22']='TACGAGTATG'
|
101
|
+
mids['MID23']='TACTCTCGTG'
|
102
|
+
mids['MID24']='TAGAGACGAG'
|
103
|
+
mids['MID25']='TCGTCGCTCG'
|
104
|
+
mids['MID26']='ACATACGCGT'
|
105
|
+
mids['MID27']='ACGCGAGTAT'
|
106
|
+
mids['MID28']='ACTACTATGT'
|
107
|
+
mids['MID68']='TCGCTGCGTA'
|
108
|
+
mids['MID30']='AGACTATACT'
|
109
|
+
mids['MID31']='AGCGTCGTCT'
|
110
|
+
mids['MID32']='AGTACGCTAT'
|
111
|
+
mids['MID33']='ATAGAGTACT'
|
112
|
+
mids['MID34']='CACGCTACGT'
|
113
|
+
mids['MID35']='CAGTAGACGT'
|
114
|
+
mids['MID36']='CGACGTGACT'
|
115
|
+
mids['MID37']='TACACACACT'
|
116
|
+
mids['MID38']='TACACGTGAT'
|
117
|
+
mids['MID39']='TACAGATCGT'
|
118
|
+
mids['MID40']='TACGCTGTCT'
|
119
|
+
mids['MID69']='TCTGACGTCA'
|
120
|
+
mids['MID42']='TCGATCACGT'
|
121
|
+
mids['MID43']='TCGCACTAGT'
|
122
|
+
mids['MID44']='TCTAGCGACT'
|
123
|
+
mids['MID45']='TCTATACTAT'
|
124
|
+
mids['MID46']='TGACGTATGT'
|
125
|
+
mids['MID47']='TGTGAGTAGT'
|
126
|
+
mids['MID48']='ACAGTATATA'
|
127
|
+
mids['MID49']='ACGCGATCGA'
|
128
|
+
mids['MID50']='ACTAGCAGTA'
|
129
|
+
mids['MID67']='TCGATAGTGA'
|
130
|
+
|
131
|
+
# for each sequence
|
132
|
+
seqs.each do |name,fasta,qual,comment|
|
133
|
+
|
134
|
+
# find all mids
|
135
|
+
mids.each do |mid_name,mid|
|
136
|
+
# puts "."
|
137
|
+
# find a known MID position
|
138
|
+
found_mid=fasta[0..20].lcs(mid)
|
139
|
+
# puts "."
|
140
|
+
# puts pos.to_json
|
141
|
+
if found_mid.length>5
|
142
|
+
|
143
|
+
pos=fasta[0..20].index(found_mid)
|
144
|
+
# puts found_mid,pos
|
145
|
+
# keep fasta from pos to end
|
146
|
+
fasta.slice!(0,pos+found_mid.length)
|
147
|
+
|
148
|
+
# keep qual from pos to end
|
149
|
+
qual.slice!(0,pos+found_mid.length)
|
150
|
+
|
151
|
+
comment << "mid_name #{mid_name}\n"
|
152
|
+
# puts comment
|
153
|
+
break
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
elapsed=Time.now-t
|
159
|
+
|
160
|
+
puts "T:#{elapsed}, rate#{elapsed/seqs.count}"
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
def do_dummy_calculation
|
166
|
+
numer_of_calcs=250000
|
167
|
+
|
168
|
+
t=Time.now
|
169
|
+
|
170
|
+
x1=1
|
171
|
+
x2=1
|
172
|
+
|
173
|
+
# do a loop with calculations
|
174
|
+
numer_of_calcs.times do |i|
|
175
|
+
x=x1+x2
|
176
|
+
|
177
|
+
x1=x2
|
178
|
+
x2=x
|
179
|
+
|
180
|
+
# puts some info at regular intervals
|
181
|
+
if (i % 100000)==0
|
182
|
+
puts "Calculated #{i} by thread #{n}"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
puts Time.now-t
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
|
191
|
+
end
|