mitab 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/mitab.rb +142 -0
- metadata +44 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ce2fc8ccf6fdfae5dab43d559f44e67bb72bbe3c
|
4
|
+
data.tar.gz: 8abfa26555a589173fdfe8dd2b8dff019f7b4f11
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f880d900c11fc5f021fe2bf2a8cdff56e495bc3cf91df40536af5b09fef65f347dcd7cc268ff2018eb8a7f37791bef8de686b977e922222c6cd193ec85640a68
|
7
|
+
data.tar.gz: 5f996e45eeb1d0b32daf4db82f7e2197db806f07214fa39c753cab55721b0ab6c45599d1e52e11c62793160cfd3d76f458d714628e468a24b26ad0a7a6b9974c
|
data/lib/mitab.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
module Mitab
|
3
|
+
class MitabParser
|
4
|
+
attr_reader :nodes, :links, :scores, :mitab
|
5
|
+
|
6
|
+
# regex
|
7
|
+
$textInParenthesis = /\((.*?)\)/
|
8
|
+
$textInQuotes = /\"(.*?)\"/
|
9
|
+
$textInTax = /\:(.*?)\(/
|
10
|
+
$geneName = /\((gene name)\)/
|
11
|
+
$geneNameSynonym = /\((gene name synonym)\)/
|
12
|
+
|
13
|
+
def initialize(text)
|
14
|
+
@nodes = {}
|
15
|
+
@links= []
|
16
|
+
@scores = {}
|
17
|
+
@mitab
|
18
|
+
|
19
|
+
|
20
|
+
$lines = text.split("\n")
|
21
|
+
|
22
|
+
$interactions = $lines.map{ |l| parse(l)}
|
23
|
+
nodeval = @nodes.values
|
24
|
+
|
25
|
+
@links = $interactions
|
26
|
+
|
27
|
+
@mitab = {
|
28
|
+
links: $interactions,
|
29
|
+
nodes: nodeval,
|
30
|
+
ids: nodeval.map { |h| h[:id] },
|
31
|
+
taxa: nodeval.reduce([]){ |union, x| union | x[:taxonomy]}.compact,
|
32
|
+
scores: @scores.values
|
33
|
+
}
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
def mapPub(pubStr)
|
38
|
+
arr = pubStr.split(':')
|
39
|
+
return {name:arr[0], value:arr[1]}
|
40
|
+
end
|
41
|
+
|
42
|
+
def mapField(fieldStr)
|
43
|
+
|
44
|
+
if(fieldStr.match($textInQuotes).nil? || fieldStr.match($textInParenthesis).nil?)
|
45
|
+
arr = fieldStr.split(':')
|
46
|
+
return {name:arr[0], score:arr[1]}
|
47
|
+
end
|
48
|
+
return {name:fieldStr.match($textInQuotes)[1], value:fieldStr.match($textInParenthesis)[1]}
|
49
|
+
end
|
50
|
+
|
51
|
+
def addScore(score)
|
52
|
+
if( !score[:score].to_f.nan?)
|
53
|
+
if(@scores.key?(score[:name]))
|
54
|
+
if(@scores[score[:name]][:min].to_f > score[:score].to_f)
|
55
|
+
@scores[score[:name]][:min] = score[:score].to_f
|
56
|
+
end
|
57
|
+
if(@scores[score[:name]][:max].to_f < score[:score].to_f)
|
58
|
+
@scores[score[:name]][:max] = score[:score].to_f
|
59
|
+
end
|
60
|
+
else
|
61
|
+
@scores[score[:name]] = {name:score[:name], min:score[:score], max:score[:score]}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def mapScore(scoreStr)
|
67
|
+
arr = scoreStr.split(':')
|
68
|
+
score = {name:arr[0], score:arr[1]}
|
69
|
+
addScore(score)
|
70
|
+
return score
|
71
|
+
end
|
72
|
+
|
73
|
+
def mapTaxonomy(taxStr)
|
74
|
+
|
75
|
+
if(taxStr != '-')
|
76
|
+
return (taxStr.match($textInTax).nil?) ? taxStr.split(':')[1] : taxStr.match($textInTax)[1]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def getNode(idStr, altIdsStr, aliasStr, taxStr)
|
81
|
+
gNameAliases = aliasStr.split("|")
|
82
|
+
|
83
|
+
gNameStr = gNameAliases.select{ |gNameAlias| gNameAlias.match($geneName)}
|
84
|
+
|
85
|
+
gNameStr = (gNameStr.nil?) ? gNameAliases.select{ |gNameAlias| gNameAlias.match($geneNameSynonym)} : gNameStr
|
86
|
+
ids = idStr.split("|") + altIdsStr.split("|") + aliasStr.split("|")
|
87
|
+
ids = ids.map{|x| mapPub(x)}
|
88
|
+
id = ids.select{|id| id[:name] == "uniprotkb"}
|
89
|
+
node = {
|
90
|
+
id: ids[0][:value],
|
91
|
+
ids: ids,
|
92
|
+
uniprot: (id.nil?) ? '' : id,
|
93
|
+
geneName: (gNameStr.nil?) ? '' : gNameStr.map{|gStr| gStr.match($textInTax)[1]},
|
94
|
+
altIds: altIdsStr.split('|').map{|x| mapPub(x)},
|
95
|
+
taxonomy: taxStr.split('|').uniq{|x| mapTaxonomy(x)},
|
96
|
+
}
|
97
|
+
return node
|
98
|
+
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
def parse(line)
|
103
|
+
if (!line.is_a? String)
|
104
|
+
puts 'MITab cannot parse line '
|
105
|
+
return {}
|
106
|
+
end
|
107
|
+
fields = line.split("\t")
|
108
|
+
if(fields.length < 15)
|
109
|
+
puts "MITab cannot parse line "
|
110
|
+
return {}
|
111
|
+
end
|
112
|
+
|
113
|
+
nodeA = getNode(fields[0], fields[2], fields[4], fields[9])
|
114
|
+
nodeB = getNode(fields[1], fields[3], fields[5], fields[10])
|
115
|
+
|
116
|
+
interaction = {
|
117
|
+
source: nodeA[:id],
|
118
|
+
target: nodeB[:id],
|
119
|
+
detMethods: fields[6].split('|').map{|x| mapField(x)},
|
120
|
+
firstAuthor: fields[7].split('|'),
|
121
|
+
publications: fields[8].split('|').map{|x| mapField(x)},
|
122
|
+
intTypes: fields[11].split('|').map{|x| mapField(x)},
|
123
|
+
sourceDbs: fields[12].split('|').map{|x| mapField(x)},
|
124
|
+
intIds: fields[13].split('|').map{|x| mapPub(x)},
|
125
|
+
scores: fields[14].split('|').map{|x| mapScore(x)},
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
@nodes[nodeA[:id]] = nodeA
|
131
|
+
@nodes[nodeB[:id]] = nodeB
|
132
|
+
|
133
|
+
nodeval = @nodes.values
|
134
|
+
|
135
|
+
return interaction
|
136
|
+
end
|
137
|
+
|
138
|
+
def print
|
139
|
+
puts @mitab
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
metadata
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mitab
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Prasun Anand
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-03-30 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A ruby parser for MITab file format.
|
14
|
+
email: prasunanand.bitsp@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/mitab.rb
|
20
|
+
homepage: http://rubygems.org/gems/mitab
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.4.6
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: MITab parser
|
44
|
+
test_files: []
|