mapp2g 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 80ed37b4687cdd64f5a292593e2b15addadbabe49223c79835b530e94ac23be6
4
- data.tar.gz: 37a038d19321a88e10f44261a3027bda85c40c29d529b710bb93ed760ada585e
3
+ metadata.gz: ea23c58705cef813135bf96b23383cdeca589643ff08a25ba8b95fd473e26449
4
+ data.tar.gz: 7be36a76318408344b4402fafda2393ba51d6e2e36fe2c50e6ad393817ff6cbb
5
5
  SHA512:
6
- metadata.gz: 79f3e7022e8532c1bb9fc0f333927042a4eead1a40d1ecbcb9408d9391a1f21e162830132c395411aac7065076fa14eed694f6ee5b6824a3edc1c193a99ff9ab
7
- data.tar.gz: b6c93c34a6576ea1e57fdc2eecdb04e767a595b6b6a037885ab3a89ebf46f36272b6017f55441788ca1cb89dd80cbe273d6a7049f067445b48d8464a486402b2
6
+ metadata.gz: cfed44714ac742bd9cc8a17169f85c691292e2fe1d7c4d62960d3babe0cd61cf59b01c0b333af063975aef50f17c99aab9f06103b662da03e00813d456f45e02
7
+ data.tar.gz: c4f3711b154e3ad3d730dde9e5637061514563f49607a814c5bbf2a674de3b06a81f96926aac9d8c32bc5feedb06c7a3dab8a481a8f122b138d60530b923af7c
data/exe/mapp2g CHANGED
@@ -62,6 +62,9 @@ rescue => e
62
62
  end
63
63
 
64
64
  begin
65
+ unless File.exist?(genome)
66
+ raise "genome file (#{genome}) not found"
67
+ end
65
68
  unless File.exist?("#{genome}.nsq") && File.exist?("#{genome}.nos")
66
69
  raise "genome is not indexed. Please run 'makeblastdb -in #{genome} -dbtype nucl -parse_seqids'"
67
70
  end
@@ -87,4 +90,7 @@ Bio::FlatFile.open(Bio::FastaFormat, query).each_with_index do |fas, i|
87
90
  res = mapper.run(query_file_path, genome)
88
91
  File.open(out_file_path, "w"){|o| o.puts res}
89
92
 
93
+ gff3 = Mapp2g::ExonerateOutput.new(res).to_gff3()
94
+ out_file_path = "#{outdir}/#{id}.exonerate.gff3"
95
+ File.open(out_file_path, "w"){|o| o.puts gff3}
90
96
  end
@@ -0,0 +1,75 @@
1
+ module Mapp2g
2
+
3
+ class ExonerateOutput
4
+
5
+ def self.load(file)
6
+ self.new(File.read(file))
7
+ end
8
+
9
+ # @param exonerate_out [String] exonerate output text, not file path
10
+ def initialize(exonerate_out)
11
+ @exonerate_out = exonerate_out
12
+ @query_name = nil
13
+ @target = nil
14
+ @cigar = nil
15
+ @gff2_lines = []
16
+ #vulgar = nil
17
+ parse()
18
+ end
19
+
20
+ attr_reader :query_name, :target, :cigar, :gff2_lines
21
+
22
+ def parse(opt={})
23
+ @exonerate_out.each_line do |l|
24
+ if m = /\s+Query:\s/.match(l)
25
+ @query_name = m.post_match.chomp.split[0]
26
+ elsif m = /\s+Target:\s/.match(l)
27
+ @target = m.post_match.split[0]
28
+ elsif m = /^cigar:\s/.match(l)
29
+ @cigar = m.post_match.chomp
30
+ elsif /^#{@target}/ =~ l &&
31
+ (/\texonerate:est2genome\t/.match(l) || /\texonerate:protein2genome:local\t/.match(l)) &&
32
+ (/\texon\t/.match(l) || /\tgene\t/.match(l))
33
+ @gff2_lines << l.chomp
34
+ end
35
+ end
36
+ end
37
+
38
+ def to_gff3(opt={})
39
+ gff3_lines = []
40
+ @gff2_lines.each do |l|
41
+ a = l.chomp.split(/\t/)
42
+ b = Array.new(9)
43
+ a.each_with_index{|x, i| b[i] = x}
44
+ if b[2] == "gene"
45
+ b[2] = "match"
46
+ orig_attribute = b[8].split(";").map{|x| x.strip.split(/\s+/)}.to_h
47
+ # p orig_attribute
48
+ c = @cigar.split(/\s+/)
49
+ cigar_pairs = c[9..-1].join.scan(/[MDI]\d+/)
50
+ attribute = {'ID' => @query_name,
51
+ 'Target' => [@query_name, c[1].to_i + 1, c[2]].join(" "),
52
+ 'Gap'=> cigar_pairs.join(" "),
53
+ 'identity' => orig_attribute['identity'],
54
+ 'similarity' => orig_attribute['similarity']}
55
+ b[8] = attribute.map{|k, v| "#{k}=#{v}"}.join(";")
56
+ elsif b[2] == "exon"
57
+ b[2] = "match_part"
58
+ orig_attribute = b[8].split(";").map{|x| x.strip.split(/\s+/)}.to_h
59
+ attribute = {'Parent' => @query_name,
60
+ 'identity' => orig_attribute['identity'],
61
+ 'similarity' => orig_attribute['similarity']}
62
+ b[8] = attribute.map{|k, v| "#{k}=#{v}"}.join(";")
63
+
64
+ else
65
+ raise
66
+ end
67
+ gff3_lines << b.join("\t")
68
+ end
69
+ return gff3_lines.join("\n")
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Mapp2g
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.5"
5
5
  end
data/lib/mapp2g.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "mapp2g/mapper"
4
+ require_relative "mapp2g/report"
4
5
  require_relative "mapp2g/version"
5
6
 
6
7
  module Mapp2g
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mapp2g
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuji Shigenobu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-09 00:00:00.000000000 Z
11
+ date: 2023-07-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: mapp2g is a bioinformatics software, which map and align protein sequences
14
14
  (amino acid sequences) to genome references in a splicing-aware way. mapp2g alignment
@@ -28,6 +28,7 @@ files:
28
28
  - exe/mapp2g
29
29
  - lib/mapp2g.rb
30
30
  - lib/mapp2g/mapper.rb
31
+ - lib/mapp2g/report.rb
31
32
  - lib/mapp2g/version.rb
32
33
  - mapp2g.gemspec
33
34
  - scripts/add_annotation_from_uniprot_fasta_to_gff.rb
@@ -57,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  - !ruby/object:Gem::Version
58
59
  version: '0'
59
60
  requirements: []
60
- rubygems_version: 3.4.10
61
+ rubygems_version: 3.4.15
61
62
  signing_key:
62
63
  specification_version: 4
63
64
  summary: mapp2g is the tool to map protein sequences to genome references in a splicing-aware