mitab 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/mitab.rb +142 -0
  3. metadata +44 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ce2fc8ccf6fdfae5dab43d559f44e67bb72bbe3c
4
+ data.tar.gz: 8abfa26555a589173fdfe8dd2b8dff019f7b4f11
5
+ SHA512:
6
+ metadata.gz: f880d900c11fc5f021fe2bf2a8cdff56e495bc3cf91df40536af5b09fef65f347dcd7cc268ff2018eb8a7f37791bef8de686b977e922222c6cd193ec85640a68
7
+ data.tar.gz: 5f996e45eeb1d0b32daf4db82f7e2197db806f07214fa39c753cab55721b0ab6c45599d1e52e11c62793160cfd3d76f458d714628e468a24b26ad0a7a6b9974c
@@ -0,0 +1,142 @@
1
+ require 'open-uri'
2
+ module Mitab
3
+ class MitabParser
4
+ attr_reader :nodes, :links, :scores, :mitab
5
+
6
+ # regex
7
+ $textInParenthesis = /\((.*?)\)/
8
+ $textInQuotes = /\"(.*?)\"/
9
+ $textInTax = /\:(.*?)\(/
10
+ $geneName = /\((gene name)\)/
11
+ $geneNameSynonym = /\((gene name synonym)\)/
12
+
13
+ def initialize(text)
14
+ @nodes = {}
15
+ @links= []
16
+ @scores = {}
17
+ @mitab
18
+
19
+
20
+ $lines = text.split("\n")
21
+
22
+ $interactions = $lines.map{ |l| parse(l)}
23
+ nodeval = @nodes.values
24
+
25
+ @links = $interactions
26
+
27
+ @mitab = {
28
+ links: $interactions,
29
+ nodes: nodeval,
30
+ ids: nodeval.map { |h| h[:id] },
31
+ taxa: nodeval.reduce([]){ |union, x| union | x[:taxonomy]}.compact,
32
+ scores: @scores.values
33
+ }
34
+
35
+ end
36
+
37
+ def mapPub(pubStr)
38
+ arr = pubStr.split(':')
39
+ return {name:arr[0], value:arr[1]}
40
+ end
41
+
42
+ def mapField(fieldStr)
43
+
44
+ if(fieldStr.match($textInQuotes).nil? || fieldStr.match($textInParenthesis).nil?)
45
+ arr = fieldStr.split(':')
46
+ return {name:arr[0], score:arr[1]}
47
+ end
48
+ return {name:fieldStr.match($textInQuotes)[1], value:fieldStr.match($textInParenthesis)[1]}
49
+ end
50
+
51
+ def addScore(score)
52
+ if( !score[:score].to_f.nan?)
53
+ if(@scores.key?(score[:name]))
54
+ if(@scores[score[:name]][:min].to_f > score[:score].to_f)
55
+ @scores[score[:name]][:min] = score[:score].to_f
56
+ end
57
+ if(@scores[score[:name]][:max].to_f < score[:score].to_f)
58
+ @scores[score[:name]][:max] = score[:score].to_f
59
+ end
60
+ else
61
+ @scores[score[:name]] = {name:score[:name], min:score[:score], max:score[:score]}
62
+ end
63
+ end
64
+ end
65
+
66
+ def mapScore(scoreStr)
67
+ arr = scoreStr.split(':')
68
+ score = {name:arr[0], score:arr[1]}
69
+ addScore(score)
70
+ return score
71
+ end
72
+
73
+ def mapTaxonomy(taxStr)
74
+
75
+ if(taxStr != '-')
76
+ return (taxStr.match($textInTax).nil?) ? taxStr.split(':')[1] : taxStr.match($textInTax)[1]
77
+ end
78
+ end
79
+
80
+ def getNode(idStr, altIdsStr, aliasStr, taxStr)
81
+ gNameAliases = aliasStr.split("|")
82
+
83
+ gNameStr = gNameAliases.select{ |gNameAlias| gNameAlias.match($geneName)}
84
+
85
+ gNameStr = (gNameStr.nil?) ? gNameAliases.select{ |gNameAlias| gNameAlias.match($geneNameSynonym)} : gNameStr
86
+ ids = idStr.split("|") + altIdsStr.split("|") + aliasStr.split("|")
87
+ ids = ids.map{|x| mapPub(x)}
88
+ id = ids.select{|id| id[:name] == "uniprotkb"}
89
+ node = {
90
+ id: ids[0][:value],
91
+ ids: ids,
92
+ uniprot: (id.nil?) ? '' : id,
93
+ geneName: (gNameStr.nil?) ? '' : gNameStr.map{|gStr| gStr.match($textInTax)[1]},
94
+ altIds: altIdsStr.split('|').map{|x| mapPub(x)},
95
+ taxonomy: taxStr.split('|').uniq{|x| mapTaxonomy(x)},
96
+ }
97
+ return node
98
+
99
+
100
+ end
101
+
102
+ def parse(line)
103
+ if (!line.is_a? String)
104
+ puts 'MITab cannot parse line '
105
+ return {}
106
+ end
107
+ fields = line.split("\t")
108
+ if(fields.length < 15)
109
+ puts "MITab cannot parse line "
110
+ return {}
111
+ end
112
+
113
+ nodeA = getNode(fields[0], fields[2], fields[4], fields[9])
114
+ nodeB = getNode(fields[1], fields[3], fields[5], fields[10])
115
+
116
+ interaction = {
117
+ source: nodeA[:id],
118
+ target: nodeB[:id],
119
+ detMethods: fields[6].split('|').map{|x| mapField(x)},
120
+ firstAuthor: fields[7].split('|'),
121
+ publications: fields[8].split('|').map{|x| mapField(x)},
122
+ intTypes: fields[11].split('|').map{|x| mapField(x)},
123
+ sourceDbs: fields[12].split('|').map{|x| mapField(x)},
124
+ intIds: fields[13].split('|').map{|x| mapPub(x)},
125
+ scores: fields[14].split('|').map{|x| mapScore(x)},
126
+ }
127
+
128
+
129
+
130
+ @nodes[nodeA[:id]] = nodeA
131
+ @nodes[nodeB[:id]] = nodeB
132
+
133
+ nodeval = @nodes.values
134
+
135
+ return interaction
136
+ end
137
+
138
+ def print
139
+ puts @mitab
140
+ end
141
+ end
142
+ end
metadata ADDED
@@ -0,0 +1,44 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mitab
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Prasun Anand
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-03-30 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A ruby parser for MITab file format.
14
+ email: prasunanand.bitsp@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/mitab.rb
20
+ homepage: http://rubygems.org/gems/mitab
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.4.6
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: MITab parser
44
+ test_files: []