semtools 0.1.6 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: semtools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - seoanezonjic
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-05-27 00:00:00.000000000 Z
12
+ date: 2025-09-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: text
@@ -25,6 +25,34 @@ dependencies:
25
25
  - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: down
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: expcalc
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
28
56
  - !ruby/object:Gem::Dependency
29
57
  name: rake
30
58
  requirement: !ruby/object:Gem::Requirement
@@ -75,6 +103,7 @@ email:
75
103
  executables:
76
104
  - console
77
105
  - onto2json.rb
106
+ - semtools.rb
78
107
  - setup
79
108
  - strsimnet.rb
80
109
  extensions: []
@@ -90,13 +119,17 @@ files:
90
119
  - Rakefile
91
120
  - bin/console
92
121
  - bin/onto2json.rb
122
+ - bin/semtools.rb
93
123
  - bin/setup
94
124
  - bin/strsimnet.rb
125
+ - external_data/ontologies.txt
95
126
  - lib/data/hp.obo
96
127
  - lib/data/phenotype_annotation.tab
97
128
  - lib/semtools.rb
98
- - lib/semtools/math_methods.rb
99
129
  - lib/semtools/ontology.rb
130
+ - lib/semtools/parsers/file_parser.rb
131
+ - lib/semtools/parsers/json_parser.rb
132
+ - lib/semtools/parsers/oboparser.rb
100
133
  - lib/semtools/sim_handler.rb
101
134
  - lib/semtools/version.rb
102
135
  - semtools.gemspec
@@ -119,9 +152,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
152
  - !ruby/object:Gem::Version
120
153
  version: '0'
121
154
  requirements: []
122
- rubygems_version: 3.2.3
155
+ rubygems_version: 3.3.7
123
156
  signing_key:
124
157
  specification_version: 4
125
- summary: Gem to handle semantic based calculations in text and defined ontologies
126
- as GO or HPO.
158
+ summary: 'DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/py_semtools.
159
+ Gem to handle semantic based calculations in text and defined ontologies as GO or
160
+ HPO.'
127
161
  test_files: []
@@ -1,148 +0,0 @@
1
- # TODO: Make a pull request to https://rubygems.org/gems/ruby-statistics, with all the statistic code implemented here.
2
- #to cmpute fisher exact test
3
- #Fisher => http://www.biostathandbook.com/fishers.html
4
- def get_fisher_exact_test(listA, listB, all_elements_count, tail ='two_sided', weigths=nil, partial_weigths=true)
5
- #puts '-', listA.inspect, listB.inspect, '-'
6
- listA_listB = listA & listB
7
- listA_nolistB = listA - listB
8
- nolistA_listB = listB - listA
9
- if weigths.nil?
10
- listA_listB_count = listA_listB.length
11
- listA_nolistB_count = listA_nolistB.length
12
- nolistA_listB_count = nolistA_listB.length
13
- nolistA_nolistB_count = all_elements_count - (listA | listB).length
14
- else
15
- # Fisher exact test weigthed as proposed in Improved scoring of functional groups from gene expression data by decorrelating GO graph structure
16
- # https://academic.oup.com/bioinformatics/article/22/13/1600/193669
17
- listA_listB_count = listA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
18
- listA_nolistB_count = listA_nolistB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
19
- nolistA_listB_count = nolistA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
20
-
21
- if partial_weigths
22
- nolistA_nolistB_count = all_elements_count - (listA | listB).length
23
- all_elements_count = nolistA_nolistB_count + listA_listB_count + listA_nolistB_count + nolistA_listB_count
24
- else
25
- nolistA_nolistB_count = (weigths.keys - (listA | listB)).map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
26
- all_elements_count = weigths.values.inject(0){|sum, n| sum + n}.ceil
27
- end
28
- end
29
- #puts [listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count].inspect
30
- if tail == 'two_sided'
31
- accumulated_prob = get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
32
- elsif tail == 'less'
33
- accumulated_prob = get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
34
- end
35
- return accumulated_prob
36
- end
37
-
38
- def get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
39
- #https://www.sheffield.ac.uk/polopoly_fs/1.43998!/file/tutorial-9-fishers.pdf
40
- accumulated_prob = 0
41
- ref_prob = compute_hyper_prob(
42
- listA_listB_count,
43
- listA_nolistB_count,
44
- nolistA_listB_count,
45
- nolistA_nolistB_count,
46
- all_elements_count
47
- )
48
- accumulated_prob += ref_prob
49
- [listA_listB_count, nolistA_nolistB_count].min.times do |n| #less
50
- n += 1
51
- prob = compute_hyper_prob(
52
- listA_listB_count - n,
53
- listA_nolistB_count + n,
54
- nolistA_listB_count + n,
55
- nolistA_nolistB_count - n,
56
- all_elements_count
57
- )
58
- prob <= ref_prob ? accumulated_prob += prob : break
59
- end
60
-
61
- [listA_nolistB_count, nolistA_listB_count].min.times do |n| #greater
62
- n += 1
63
- prob = compute_hyper_prob(
64
- listA_listB_count + n,
65
- listA_nolistB_count - n,
66
- nolistA_listB_count - n,
67
- nolistA_nolistB_count + n,
68
- all_elements_count
69
- )
70
- accumulated_prob += prob if prob <= ref_prob
71
- end
72
-
73
- return accumulated_prob
74
- end
75
-
76
- def get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
77
- accumulated_prob = 0
78
- [listA_listB_count, nolistA_nolistB_count].min.times do |n|
79
- accumulated_prob += compute_hyper_prob(
80
- listA_listB_count - n,
81
- listA_nolistB_count + n,
82
- nolistA_listB_count + n,
83
- nolistA_nolistB_count - n,
84
- all_elements_count
85
- )
86
- end
87
- return accumulated_prob
88
- end
89
-
90
- def compute_hyper_prob(a, b, c, d, n)
91
- # https://en.wikipedia.org/wiki/Fisher%27s_exact_test
92
- binomA = binom(a + b, a)
93
- binomC = binom(c + d, c)
94
- divisor = binom(n, a + c)
95
- return (binomA * binomC).fdiv(divisor)
96
- end
97
-
98
- def binom(n,k)
99
- if k > 0 && k < n
100
- res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
101
- else
102
- res = 1
103
- end
104
- end
105
-
106
- #to cmpute adjusted pvalues
107
- #https://rosettacode.org/wiki/P-value_correction#Ruby
108
- def get_benjaminiHochberg_pvalues(arr_pvalues)
109
- n = arr_pvalues.length
110
- arr_o = order(arr_pvalues, true)
111
- arr_cummin_input = []
112
- (0..(n - 1)).each do |i|
113
- arr_cummin_input[i] = (n / (n - i).to_f) * arr_pvalues[arr_o[i]]
114
- end
115
- arr_ro = order(arr_o)
116
- arr_cummin = cummin(arr_cummin_input)
117
- arr_pmin = pmin(arr_cummin)
118
- return arr_pmin.values_at(*arr_ro)
119
- end
120
-
121
- def order(array, decreasing = false)
122
- if decreasing == false
123
- array.sort.map { |n| array.index(n) }
124
- else
125
- array.sort.map { |n| array.index(n) }.reverse
126
- end
127
- end
128
-
129
- def cummin(array)
130
- cumulative_min = array.first
131
- arr_cummin = []
132
- array.each do |p|
133
- cumulative_min = [p, cumulative_min].min
134
- arr_cummin << cumulative_min
135
- end
136
- return arr_cummin
137
- end
138
-
139
- def pmin(array)
140
- x = 1
141
- pmin_array = []
142
- array.each_index do |i|
143
- pmin_array[i] = [array[i], x].min
144
- abort if pmin_array[i] > 1
145
- end
146
- return pmin_array
147
- end
148
-