semtools 0.1.6 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -1
- data/README.md +2 -0
- data/bin/semtools.rb +521 -0
- data/bin/strsimnet.rb +1 -2
- data/external_data/ontologies.txt +4 -0
- data/lib/semtools/ontology.rb +1241 -2002
- data/lib/semtools/parsers/file_parser.rb +32 -0
- data/lib/semtools/parsers/json_parser.rb +84 -0
- data/lib/semtools/parsers/oboparser.rb +511 -0
- data/lib/semtools/sim_handler.rb +1 -1
- data/lib/semtools/version.rb +1 -1
- data/lib/semtools.rb +3 -1
- data/semtools.gemspec +3 -1
- metadata +40 -6
- data/lib/semtools/math_methods.rb +0 -148
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: semtools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- seoanezonjic
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2025-09-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: text
|
@@ -25,6 +25,34 @@ dependencies:
|
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: down
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: expcalc
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :runtime
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
28
56
|
- !ruby/object:Gem::Dependency
|
29
57
|
name: rake
|
30
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,6 +103,7 @@ email:
|
|
75
103
|
executables:
|
76
104
|
- console
|
77
105
|
- onto2json.rb
|
106
|
+
- semtools.rb
|
78
107
|
- setup
|
79
108
|
- strsimnet.rb
|
80
109
|
extensions: []
|
@@ -90,13 +119,17 @@ files:
|
|
90
119
|
- Rakefile
|
91
120
|
- bin/console
|
92
121
|
- bin/onto2json.rb
|
122
|
+
- bin/semtools.rb
|
93
123
|
- bin/setup
|
94
124
|
- bin/strsimnet.rb
|
125
|
+
- external_data/ontologies.txt
|
95
126
|
- lib/data/hp.obo
|
96
127
|
- lib/data/phenotype_annotation.tab
|
97
128
|
- lib/semtools.rb
|
98
|
-
- lib/semtools/math_methods.rb
|
99
129
|
- lib/semtools/ontology.rb
|
130
|
+
- lib/semtools/parsers/file_parser.rb
|
131
|
+
- lib/semtools/parsers/json_parser.rb
|
132
|
+
- lib/semtools/parsers/oboparser.rb
|
100
133
|
- lib/semtools/sim_handler.rb
|
101
134
|
- lib/semtools/version.rb
|
102
135
|
- semtools.gemspec
|
@@ -119,9 +152,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
119
152
|
- !ruby/object:Gem::Version
|
120
153
|
version: '0'
|
121
154
|
requirements: []
|
122
|
-
rubygems_version: 3.
|
155
|
+
rubygems_version: 3.3.7
|
123
156
|
signing_key:
|
124
157
|
specification_version: 4
|
125
|
-
summary:
|
126
|
-
as GO or
|
158
|
+
summary: 'DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/py_semtools.
|
159
|
+
Gem to handle semantic based calculations in text and defined ontologies as GO or
|
160
|
+
HPO.'
|
127
161
|
test_files: []
|
@@ -1,148 +0,0 @@
|
|
1
|
-
# TODO: Make a pull request to https://rubygems.org/gems/ruby-statistics, with all the statistic code implemented here.
|
2
|
-
#to cmpute fisher exact test
|
3
|
-
#Fisher => http://www.biostathandbook.com/fishers.html
|
4
|
-
def get_fisher_exact_test(listA, listB, all_elements_count, tail ='two_sided', weigths=nil, partial_weigths=true)
|
5
|
-
#puts '-', listA.inspect, listB.inspect, '-'
|
6
|
-
listA_listB = listA & listB
|
7
|
-
listA_nolistB = listA - listB
|
8
|
-
nolistA_listB = listB - listA
|
9
|
-
if weigths.nil?
|
10
|
-
listA_listB_count = listA_listB.length
|
11
|
-
listA_nolistB_count = listA_nolistB.length
|
12
|
-
nolistA_listB_count = nolistA_listB.length
|
13
|
-
nolistA_nolistB_count = all_elements_count - (listA | listB).length
|
14
|
-
else
|
15
|
-
# Fisher exact test weigthed as proposed in Improved scoring of functional groups from gene expression data by decorrelating GO graph structure
|
16
|
-
# https://academic.oup.com/bioinformatics/article/22/13/1600/193669
|
17
|
-
listA_listB_count = listA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
|
18
|
-
listA_nolistB_count = listA_nolistB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
|
19
|
-
nolistA_listB_count = nolistA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
|
20
|
-
|
21
|
-
if partial_weigths
|
22
|
-
nolistA_nolistB_count = all_elements_count - (listA | listB).length
|
23
|
-
all_elements_count = nolistA_nolistB_count + listA_listB_count + listA_nolistB_count + nolistA_listB_count
|
24
|
-
else
|
25
|
-
nolistA_nolistB_count = (weigths.keys - (listA | listB)).map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
|
26
|
-
all_elements_count = weigths.values.inject(0){|sum, n| sum + n}.ceil
|
27
|
-
end
|
28
|
-
end
|
29
|
-
#puts [listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count].inspect
|
30
|
-
if tail == 'two_sided'
|
31
|
-
accumulated_prob = get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
|
32
|
-
elsif tail == 'less'
|
33
|
-
accumulated_prob = get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
|
34
|
-
end
|
35
|
-
return accumulated_prob
|
36
|
-
end
|
37
|
-
|
38
|
-
def get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
|
39
|
-
#https://www.sheffield.ac.uk/polopoly_fs/1.43998!/file/tutorial-9-fishers.pdf
|
40
|
-
accumulated_prob = 0
|
41
|
-
ref_prob = compute_hyper_prob(
|
42
|
-
listA_listB_count,
|
43
|
-
listA_nolistB_count,
|
44
|
-
nolistA_listB_count,
|
45
|
-
nolistA_nolistB_count,
|
46
|
-
all_elements_count
|
47
|
-
)
|
48
|
-
accumulated_prob += ref_prob
|
49
|
-
[listA_listB_count, nolistA_nolistB_count].min.times do |n| #less
|
50
|
-
n += 1
|
51
|
-
prob = compute_hyper_prob(
|
52
|
-
listA_listB_count - n,
|
53
|
-
listA_nolistB_count + n,
|
54
|
-
nolistA_listB_count + n,
|
55
|
-
nolistA_nolistB_count - n,
|
56
|
-
all_elements_count
|
57
|
-
)
|
58
|
-
prob <= ref_prob ? accumulated_prob += prob : break
|
59
|
-
end
|
60
|
-
|
61
|
-
[listA_nolistB_count, nolistA_listB_count].min.times do |n| #greater
|
62
|
-
n += 1
|
63
|
-
prob = compute_hyper_prob(
|
64
|
-
listA_listB_count + n,
|
65
|
-
listA_nolistB_count - n,
|
66
|
-
nolistA_listB_count - n,
|
67
|
-
nolistA_nolistB_count + n,
|
68
|
-
all_elements_count
|
69
|
-
)
|
70
|
-
accumulated_prob += prob if prob <= ref_prob
|
71
|
-
end
|
72
|
-
|
73
|
-
return accumulated_prob
|
74
|
-
end
|
75
|
-
|
76
|
-
def get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
|
77
|
-
accumulated_prob = 0
|
78
|
-
[listA_listB_count, nolistA_nolistB_count].min.times do |n|
|
79
|
-
accumulated_prob += compute_hyper_prob(
|
80
|
-
listA_listB_count - n,
|
81
|
-
listA_nolistB_count + n,
|
82
|
-
nolistA_listB_count + n,
|
83
|
-
nolistA_nolistB_count - n,
|
84
|
-
all_elements_count
|
85
|
-
)
|
86
|
-
end
|
87
|
-
return accumulated_prob
|
88
|
-
end
|
89
|
-
|
90
|
-
def compute_hyper_prob(a, b, c, d, n)
|
91
|
-
# https://en.wikipedia.org/wiki/Fisher%27s_exact_test
|
92
|
-
binomA = binom(a + b, a)
|
93
|
-
binomC = binom(c + d, c)
|
94
|
-
divisor = binom(n, a + c)
|
95
|
-
return (binomA * binomC).fdiv(divisor)
|
96
|
-
end
|
97
|
-
|
98
|
-
def binom(n,k)
|
99
|
-
if k > 0 && k < n
|
100
|
-
res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
|
101
|
-
else
|
102
|
-
res = 1
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
#to cmpute adjusted pvalues
|
107
|
-
#https://rosettacode.org/wiki/P-value_correction#Ruby
|
108
|
-
def get_benjaminiHochberg_pvalues(arr_pvalues)
|
109
|
-
n = arr_pvalues.length
|
110
|
-
arr_o = order(arr_pvalues, true)
|
111
|
-
arr_cummin_input = []
|
112
|
-
(0..(n - 1)).each do |i|
|
113
|
-
arr_cummin_input[i] = (n / (n - i).to_f) * arr_pvalues[arr_o[i]]
|
114
|
-
end
|
115
|
-
arr_ro = order(arr_o)
|
116
|
-
arr_cummin = cummin(arr_cummin_input)
|
117
|
-
arr_pmin = pmin(arr_cummin)
|
118
|
-
return arr_pmin.values_at(*arr_ro)
|
119
|
-
end
|
120
|
-
|
121
|
-
def order(array, decreasing = false)
|
122
|
-
if decreasing == false
|
123
|
-
array.sort.map { |n| array.index(n) }
|
124
|
-
else
|
125
|
-
array.sort.map { |n| array.index(n) }.reverse
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def cummin(array)
|
130
|
-
cumulative_min = array.first
|
131
|
-
arr_cummin = []
|
132
|
-
array.each do |p|
|
133
|
-
cumulative_min = [p, cumulative_min].min
|
134
|
-
arr_cummin << cumulative_min
|
135
|
-
end
|
136
|
-
return arr_cummin
|
137
|
-
end
|
138
|
-
|
139
|
-
def pmin(array)
|
140
|
-
x = 1
|
141
|
-
pmin_array = []
|
142
|
-
array.each_index do |i|
|
143
|
-
pmin_array[i] = [array[i], x].min
|
144
|
-
abort if pmin_array[i] > 1
|
145
|
-
end
|
146
|
-
return pmin_array
|
147
|
-
end
|
148
|
-
|