mapp2g 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 80ed37b4687cdd64f5a292593e2b15addadbabe49223c79835b530e94ac23be6
4
- data.tar.gz: 37a038d19321a88e10f44261a3027bda85c40c29d529b710bb93ed760ada585e
3
+ metadata.gz: ea23c58705cef813135bf96b23383cdeca589643ff08a25ba8b95fd473e26449
4
+ data.tar.gz: 7be36a76318408344b4402fafda2393ba51d6e2e36fe2c50e6ad393817ff6cbb
5
5
  SHA512:
6
- metadata.gz: 79f3e7022e8532c1bb9fc0f333927042a4eead1a40d1ecbcb9408d9391a1f21e162830132c395411aac7065076fa14eed694f6ee5b6824a3edc1c193a99ff9ab
7
- data.tar.gz: b6c93c34a6576ea1e57fdc2eecdb04e767a595b6b6a037885ab3a89ebf46f36272b6017f55441788ca1cb89dd80cbe273d6a7049f067445b48d8464a486402b2
6
+ metadata.gz: cfed44714ac742bd9cc8a17169f85c691292e2fe1d7c4d62960d3babe0cd61cf59b01c0b333af063975aef50f17c99aab9f06103b662da03e00813d456f45e02
7
+ data.tar.gz: c4f3711b154e3ad3d730dde9e5637061514563f49607a814c5bbf2a674de3b06a81f96926aac9d8c32bc5feedb06c7a3dab8a481a8f122b138d60530b923af7c
data/exe/mapp2g CHANGED
@@ -62,6 +62,9 @@ rescue => e
62
62
  end
63
63
 
64
64
  begin
65
+ unless File.exist?(genome)
66
+ raise "genome file (#{genome}) not found"
67
+ end
65
68
  unless File.exist?("#{genome}.nsq") && File.exist?("#{genome}.nos")
66
69
  raise "genome is not indexed. Please run 'makeblastdb -in #{genome} -dbtype nucl -parse_seqids'"
67
70
  end
@@ -87,4 +90,7 @@ Bio::FlatFile.open(Bio::FastaFormat, query).each_with_index do |fas, i|
87
90
  res = mapper.run(query_file_path, genome)
88
91
  File.open(out_file_path, "w"){|o| o.puts res}
89
92
 
93
+ gff3 = Mapp2g::ExonerateOutput.new(res).to_gff3()
94
+ out_file_path = "#{outdir}/#{id}.exonerate.gff3"
95
+ File.open(out_file_path, "w"){|o| o.puts gff3}
90
96
  end
@@ -0,0 +1,75 @@
1
+ module Mapp2g
2
+
3
+ class ExonerateOutput
4
+
5
+ def self.load(file)
6
+ self.new(File.read(file))
7
+ end
8
+
9
+ # @param exonerate_out [String] exonerate output text, not file path
10
+ def initialize(exonerate_out)
11
+ @exonerate_out = exonerate_out
12
+ @query_name = nil
13
+ @target = nil
14
+ @cigar = nil
15
+ @gff2_lines = []
16
+ #vulgar = nil
17
+ parse()
18
+ end
19
+
20
+ attr_reader :query_name, :target, :cigar, :gff2_lines
21
+
22
+ def parse(opt={})
23
+ @exonerate_out.each_line do |l|
24
+ if m = /\s+Query:\s/.match(l)
25
+ @query_name = m.post_match.chomp.split[0]
26
+ elsif m = /\s+Target:\s/.match(l)
27
+ @target = m.post_match.split[0]
28
+ elsif m = /^cigar:\s/.match(l)
29
+ @cigar = m.post_match.chomp
30
+ elsif /^#{@target}/ =~ l &&
31
+ (/\texonerate:est2genome\t/.match(l) || /\texonerate:protein2genome:local\t/.match(l)) &&
32
+ (/\texon\t/.match(l) || /\tgene\t/.match(l))
33
+ @gff2_lines << l.chomp
34
+ end
35
+ end
36
+ end
37
+
38
+ def to_gff3(opt={})
39
+ gff3_lines = []
40
+ @gff2_lines.each do |l|
41
+ a = l.chomp.split(/\t/)
42
+ b = Array.new(9)
43
+ a.each_with_index{|x, i| b[i] = x}
44
+ if b[2] == "gene"
45
+ b[2] = "match"
46
+ orig_attribute = b[8].split(";").map{|x| x.strip.split(/\s+/)}.to_h
47
+ # p orig_attribute
48
+ c = @cigar.split(/\s+/)
49
+ cigar_pairs = c[9..-1].join.scan(/[MDI]\d+/)
50
+ attribute = {'ID' => @query_name,
51
+ 'Target' => [@query_name, c[1].to_i + 1, c[2]].join(" "),
52
+ 'Gap'=> cigar_pairs.join(" "),
53
+ 'identity' => orig_attribute['identity'],
54
+ 'similarity' => orig_attribute['similarity']}
55
+ b[8] = attribute.map{|k, v| "#{k}=#{v}"}.join(";")
56
+ elsif b[2] == "exon"
57
+ b[2] = "match_part"
58
+ orig_attribute = b[8].split(";").map{|x| x.strip.split(/\s+/)}.to_h
59
+ attribute = {'Parent' => @query_name,
60
+ 'identity' => orig_attribute['identity'],
61
+ 'similarity' => orig_attribute['similarity']}
62
+ b[8] = attribute.map{|k, v| "#{k}=#{v}"}.join(";")
63
+
64
+ else
65
+ raise
66
+ end
67
+ gff3_lines << b.join("\t")
68
+ end
69
+ return gff3_lines.join("\n")
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Mapp2g
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.5"
5
5
  end
data/lib/mapp2g.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "mapp2g/mapper"
4
+ require_relative "mapp2g/report"
4
5
  require_relative "mapp2g/version"
5
6
 
6
7
  module Mapp2g
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mapp2g
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuji Shigenobu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-09 00:00:00.000000000 Z
11
+ date: 2023-07-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: mapp2g is a bioinformatics software, which map and align protein sequences
14
14
  (amino acid sequences) to genome references in a splicing-aware way. mapp2g alignment
@@ -28,6 +28,7 @@ files:
28
28
  - exe/mapp2g
29
29
  - lib/mapp2g.rb
30
30
  - lib/mapp2g/mapper.rb
31
+ - lib/mapp2g/report.rb
31
32
  - lib/mapp2g/version.rb
32
33
  - mapp2g.gemspec
33
34
  - scripts/add_annotation_from_uniprot_fasta_to_gff.rb
@@ -57,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  - !ruby/object:Gem::Version
58
59
  version: '0'
59
60
  requirements: []
60
- rubygems_version: 3.4.10
61
+ rubygems_version: 3.4.15
61
62
  signing_key:
62
63
  specification_version: 4
63
64
  summary: mapp2g is the tool to map protein sequences to genome references in a splicing-aware