mitab 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/mitab.rb +142 -0
  3. metadata +44 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ce2fc8ccf6fdfae5dab43d559f44e67bb72bbe3c
4
+ data.tar.gz: 8abfa26555a589173fdfe8dd2b8dff019f7b4f11
5
+ SHA512:
6
+ metadata.gz: f880d900c11fc5f021fe2bf2a8cdff56e495bc3cf91df40536af5b09fef65f347dcd7cc268ff2018eb8a7f37791bef8de686b977e922222c6cd193ec85640a68
7
+ data.tar.gz: 5f996e45eeb1d0b32daf4db82f7e2197db806f07214fa39c753cab55721b0ab6c45599d1e52e11c62793160cfd3d76f458d714628e468a24b26ad0a7a6b9974c
@@ -0,0 +1,142 @@
1
+ require 'open-uri'
2
+ module Mitab
3
+ class MitabParser
4
+ attr_reader :nodes, :links, :scores, :mitab
5
+
6
+ # regex
7
+ $textInParenthesis = /\((.*?)\)/
8
+ $textInQuotes = /\"(.*?)\"/
9
+ $textInTax = /\:(.*?)\(/
10
+ $geneName = /\((gene name)\)/
11
+ $geneNameSynonym = /\((gene name synonym)\)/
12
+
13
+ def initialize(text)
14
+ @nodes = {}
15
+ @links= []
16
+ @scores = {}
17
+ @mitab
18
+
19
+
20
+ $lines = text.split("\n")
21
+
22
+ $interactions = $lines.map{ |l| parse(l)}
23
+ nodeval = @nodes.values
24
+
25
+ @links = $interactions
26
+
27
+ @mitab = {
28
+ links: $interactions,
29
+ nodes: nodeval,
30
+ ids: nodeval.map { |h| h[:id] },
31
+ taxa: nodeval.reduce([]){ |union, x| union | x[:taxonomy]}.compact,
32
+ scores: @scores.values
33
+ }
34
+
35
+ end
36
+
37
+ def mapPub(pubStr)
38
+ arr = pubStr.split(':')
39
+ return {name:arr[0], value:arr[1]}
40
+ end
41
+
42
+ def mapField(fieldStr)
43
+
44
+ if(fieldStr.match($textInQuotes).nil? || fieldStr.match($textInParenthesis).nil?)
45
+ arr = fieldStr.split(':')
46
+ return {name:arr[0], score:arr[1]}
47
+ end
48
+ return {name:fieldStr.match($textInQuotes)[1], value:fieldStr.match($textInParenthesis)[1]}
49
+ end
50
+
51
+ def addScore(score)
52
+ if( !score[:score].to_f.nan?)
53
+ if(@scores.key?(score[:name]))
54
+ if(@scores[score[:name]][:min].to_f > score[:score].to_f)
55
+ @scores[score[:name]][:min] = score[:score].to_f
56
+ end
57
+ if(@scores[score[:name]][:max].to_f < score[:score].to_f)
58
+ @scores[score[:name]][:max] = score[:score].to_f
59
+ end
60
+ else
61
+ @scores[score[:name]] = {name:score[:name], min:score[:score], max:score[:score]}
62
+ end
63
+ end
64
+ end
65
+
66
+ def mapScore(scoreStr)
67
+ arr = scoreStr.split(':')
68
+ score = {name:arr[0], score:arr[1]}
69
+ addScore(score)
70
+ return score
71
+ end
72
+
73
+ def mapTaxonomy(taxStr)
74
+
75
+ if(taxStr != '-')
76
+ return (taxStr.match($textInTax).nil?) ? taxStr.split(':')[1] : taxStr.match($textInTax)[1]
77
+ end
78
+ end
79
+
80
+ def getNode(idStr, altIdsStr, aliasStr, taxStr)
81
+ gNameAliases = aliasStr.split("|")
82
+
83
+ gNameStr = gNameAliases.select{ |gNameAlias| gNameAlias.match($geneName)}
84
+
85
+ gNameStr = (gNameStr.nil?) ? gNameAliases.select{ |gNameAlias| gNameAlias.match($geneNameSynonym)} : gNameStr
86
+ ids = idStr.split("|") + altIdsStr.split("|") + aliasStr.split("|")
87
+ ids = ids.map{|x| mapPub(x)}
88
+ id = ids.select{|id| id[:name] == "uniprotkb"}
89
+ node = {
90
+ id: ids[0][:value],
91
+ ids: ids,
92
+ uniprot: (id.nil?) ? '' : id,
93
+ geneName: (gNameStr.nil?) ? '' : gNameStr.map{|gStr| gStr.match($textInTax)[1]},
94
+ altIds: altIdsStr.split('|').map{|x| mapPub(x)},
95
+ taxonomy: taxStr.split('|').uniq{|x| mapTaxonomy(x)},
96
+ }
97
+ return node
98
+
99
+
100
+ end
101
+
102
+ def parse(line)
103
+ if (!line.is_a? String)
104
+ puts 'MITab cannot parse line '
105
+ return {}
106
+ end
107
+ fields = line.split("\t")
108
+ if(fields.length < 15)
109
+ puts "MITab cannot parse line "
110
+ return {}
111
+ end
112
+
113
+ nodeA = getNode(fields[0], fields[2], fields[4], fields[9])
114
+ nodeB = getNode(fields[1], fields[3], fields[5], fields[10])
115
+
116
+ interaction = {
117
+ source: nodeA[:id],
118
+ target: nodeB[:id],
119
+ detMethods: fields[6].split('|').map{|x| mapField(x)},
120
+ firstAuthor: fields[7].split('|'),
121
+ publications: fields[8].split('|').map{|x| mapField(x)},
122
+ intTypes: fields[11].split('|').map{|x| mapField(x)},
123
+ sourceDbs: fields[12].split('|').map{|x| mapField(x)},
124
+ intIds: fields[13].split('|').map{|x| mapPub(x)},
125
+ scores: fields[14].split('|').map{|x| mapScore(x)},
126
+ }
127
+
128
+
129
+
130
+ @nodes[nodeA[:id]] = nodeA
131
+ @nodes[nodeB[:id]] = nodeB
132
+
133
+ nodeval = @nodes.values
134
+
135
+ return interaction
136
+ end
137
+
138
+ def print
139
+ puts @mitab
140
+ end
141
+ end
142
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mitab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Prasun Anand
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-30 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A ruby parser for MITab file format.
14
+ email: prasunanand.bitsp@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/mitab.rb
20
+ homepage: http://rubygems.org/gems/mitab
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.6
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: MITab parser
44
+ test_files: []