mapp2g 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/mapp2g +6 -0
- data/lib/mapp2g/report.rb +75 -0
- data/lib/mapp2g/version.rb +1 -1
- data/lib/mapp2g.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea23c58705cef813135bf96b23383cdeca589643ff08a25ba8b95fd473e26449
|
4
|
+
data.tar.gz: 7be36a76318408344b4402fafda2393ba51d6e2e36fe2c50e6ad393817ff6cbb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cfed44714ac742bd9cc8a17169f85c691292e2fe1d7c4d62960d3babe0cd61cf59b01c0b333af063975aef50f17c99aab9f06103b662da03e00813d456f45e02
|
7
|
+
data.tar.gz: c4f3711b154e3ad3d730dde9e5637061514563f49607a814c5bbf2a674de3b06a81f96926aac9d8c32bc5feedb06c7a3dab8a481a8f122b138d60530b923af7c
|
data/exe/mapp2g
CHANGED
@@ -62,6 +62,9 @@ rescue => e
|
|
62
62
|
end
|
63
63
|
|
64
64
|
begin
|
65
|
+
unless File.exist?(genome)
|
66
|
+
raise "genome file (#{genome}) not found"
|
67
|
+
end
|
65
68
|
unless File.exist?("#{genome}.nsq") && File.exist?("#{genome}.nos")
|
66
69
|
raise "genome is not indexed. Please run 'makeblastdb -in #{genome} -dbtype nucl -parse_seqids'"
|
67
70
|
end
|
@@ -87,4 +90,7 @@ Bio::FlatFile.open(Bio::FastaFormat, query).each_with_index do |fas, i|
|
|
87
90
|
res = mapper.run(query_file_path, genome)
|
88
91
|
File.open(out_file_path, "w"){|o| o.puts res}
|
89
92
|
|
93
|
+
gff3 = Mapp2g::ExonerateOutput.new(res).to_gff3()
|
94
|
+
out_file_path = "#{outdir}/#{id}.exonerate.gff3"
|
95
|
+
File.open(out_file_path, "w"){|o| o.puts gff3}
|
90
96
|
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Mapp2g
|
2
|
+
|
3
|
+
class ExonerateOutput
|
4
|
+
|
5
|
+
def self.load(file)
|
6
|
+
self.new(File.read(file))
|
7
|
+
end
|
8
|
+
|
9
|
+
# @param exonerate_out [String] exonerate output text, not file path
|
10
|
+
def initialize(exonerate_out)
|
11
|
+
@exonerate_out = exonerate_out
|
12
|
+
@query_name = nil
|
13
|
+
@target = nil
|
14
|
+
@cigar = nil
|
15
|
+
@gff2_lines = []
|
16
|
+
#vulgar = nil
|
17
|
+
parse()
|
18
|
+
end
|
19
|
+
|
20
|
+
attr_reader :query_name, :target, :cigar, :gff2_lines
|
21
|
+
|
22
|
+
def parse(opt={})
|
23
|
+
@exonerate_out.each_line do |l|
|
24
|
+
if m = /\s+Query:\s/.match(l)
|
25
|
+
@query_name = m.post_match.chomp.split[0]
|
26
|
+
elsif m = /\s+Target:\s/.match(l)
|
27
|
+
@target = m.post_match.split[0]
|
28
|
+
elsif m = /^cigar:\s/.match(l)
|
29
|
+
@cigar = m.post_match.chomp
|
30
|
+
elsif /^#{@target}/ =~ l &&
|
31
|
+
(/\texonerate:est2genome\t/.match(l) || /\texonerate:protein2genome:local\t/.match(l)) &&
|
32
|
+
(/\texon\t/.match(l) || /\tgene\t/.match(l))
|
33
|
+
@gff2_lines << l.chomp
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_gff3(opt={})
|
39
|
+
gff3_lines = []
|
40
|
+
@gff2_lines.each do |l|
|
41
|
+
a = l.chomp.split(/\t/)
|
42
|
+
b = Array.new(9)
|
43
|
+
a.each_with_index{|x, i| b[i] = x}
|
44
|
+
if b[2] == "gene"
|
45
|
+
b[2] = "match"
|
46
|
+
orig_attribute = b[8].split(";").map{|x| x.strip.split(/\s+/)}.to_h
|
47
|
+
# p orig_attribute
|
48
|
+
c = @cigar.split(/\s+/)
|
49
|
+
cigar_pairs = c[9..-1].join.scan(/[MDI]\d+/)
|
50
|
+
attribute = {'ID' => @query_name,
|
51
|
+
'Target' => [@query_name, c[1].to_i + 1, c[2]].join(" "),
|
52
|
+
'Gap'=> cigar_pairs.join(" "),
|
53
|
+
'identity' => orig_attribute['identity'],
|
54
|
+
'similarity' => orig_attribute['similarity']}
|
55
|
+
b[8] = attribute.map{|k, v| "#{k}=#{v}"}.join(";")
|
56
|
+
elsif b[2] == "exon"
|
57
|
+
b[2] = "match_part"
|
58
|
+
orig_attribute = b[8].split(";").map{|x| x.strip.split(/\s+/)}.to_h
|
59
|
+
attribute = {'Parent' => @query_name,
|
60
|
+
'identity' => orig_attribute['identity'],
|
61
|
+
'similarity' => orig_attribute['similarity']}
|
62
|
+
b[8] = attribute.map{|k, v| "#{k}=#{v}"}.join(";")
|
63
|
+
|
64
|
+
else
|
65
|
+
raise
|
66
|
+
end
|
67
|
+
gff3_lines << b.join("\t")
|
68
|
+
end
|
69
|
+
return gff3_lines.join("\n")
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
data/lib/mapp2g/version.rb
CHANGED
data/lib/mapp2g.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mapp2g
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuji Shigenobu
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-07-
|
11
|
+
date: 2023-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: mapp2g is a bioinformatics software, which map and align protein sequences
|
14
14
|
(amino acid sequences) to genome references in a splicing-aware way. mapp2g alignment
|
@@ -28,6 +28,7 @@ files:
|
|
28
28
|
- exe/mapp2g
|
29
29
|
- lib/mapp2g.rb
|
30
30
|
- lib/mapp2g/mapper.rb
|
31
|
+
- lib/mapp2g/report.rb
|
31
32
|
- lib/mapp2g/version.rb
|
32
33
|
- mapp2g.gemspec
|
33
34
|
- scripts/add_annotation_from_uniprot_fasta_to_gff.rb
|
@@ -57,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
57
58
|
- !ruby/object:Gem::Version
|
58
59
|
version: '0'
|
59
60
|
requirements: []
|
60
|
-
rubygems_version: 3.4.
|
61
|
+
rubygems_version: 3.4.15
|
61
62
|
signing_key:
|
62
63
|
specification_version: 4
|
63
64
|
summary: mapp2g is the tool to map protein sequences to genome references in a splicing-aware
|