semtools 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +9 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +17 -0
- data/bin/console +14 -0
- data/bin/onto2json.rb +45 -0
- data/bin/setup +8 -0
- data/bin/strsimnet.rb +134 -0
- data/lib/data/hp.obo +152267 -0
- data/lib/data/phenotype_annotation.tab +159504 -0
- data/lib/semtools.rb +8 -0
- data/lib/semtools/math_methods.rb +140 -0
- data/lib/semtools/ontology.rb +2041 -0
- data/lib/semtools/sim_handler.rb +113 -0
- data/lib/semtools/version.rb +3 -0
- data/semtools.gemspec +37 -0
- metadata +113 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
# @author Fernando Moreno Jabato <jabato(at)uma(dot)es>
|
2
|
+
# @description functionalities to handle similitude features
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
# Applies the WhiteSimilarity from 'text' package over two given texts
|
7
|
+
# Param:
|
8
|
+
# +textA+:: text to be compared with textB
|
9
|
+
# +textB+:: text to be compared with textA
|
10
|
+
# Returns the similarity percentage between [0,1]
|
11
|
+
def text_similitude(textA, textB)
|
12
|
+
# Check special cases
|
13
|
+
return -1.0 if (textA.nil?) | (textB.nil?)
|
14
|
+
return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
|
15
|
+
return -1.0 if (textA.length <= 0) | (textB.length <= 0)
|
16
|
+
# Calculate similitude
|
17
|
+
require 'text'
|
18
|
+
white = Text::WhiteSimilarity.new
|
19
|
+
return white.similarity(textA.lstrip, textB.lstrip)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Applies the WhiteSimilarity from 'text' package over two given text sets and returns the similitudes
|
23
|
+
# of the each element of the first set over the second set
|
24
|
+
# Param:
|
25
|
+
# +textsA+:: text set to be compared with textsB
|
26
|
+
# +textsB+:: text set to be compared with textsA
|
27
|
+
# Returns the maximum similarity percentage between [0,1] for each element of textsA against all elements of textsB
|
28
|
+
def ctext_AtoB(textsA, textsB)
|
29
|
+
# Check special cases
|
30
|
+
return [-1.0] if (textsA.nil?) | (textsB.nil?)
|
31
|
+
return [-1.0] if (!textsA.is_a? Array) | (!textsB.is_a? Array)
|
32
|
+
return [-1.0] if (textsA.length <= 0) | (textsB.length <= 0)
|
33
|
+
# Calculate similitude
|
34
|
+
similitudesA = []
|
35
|
+
textsA.each do |fragA|
|
36
|
+
frag_A_similitudes = []
|
37
|
+
textsB.each do |fragB|
|
38
|
+
frag_A_similitudes << text_similitude(fragA, fragB)
|
39
|
+
end
|
40
|
+
begin
|
41
|
+
similitudesA << frag_A_similitudes.max
|
42
|
+
rescue => e
|
43
|
+
STDERR.puts frag_A_similitudes.inspect
|
44
|
+
STDERR.puts textsA.inspect , textsB.inspect
|
45
|
+
STDERR.puts e.message
|
46
|
+
STDERR.puts e.backtrace
|
47
|
+
Process.exit
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return similitudesA
|
51
|
+
end
|
52
|
+
|
53
|
+
# Applies the WhiteSimilarity from 'text' package over two given complex texts.
|
54
|
+
# Complex texts will be splitted and compared one by one from A to B and B to A
|
55
|
+
# Param:
|
56
|
+
# +textA+:: text to be compared with textB
|
57
|
+
# +textB+:: text to be compared with textA
|
58
|
+
# +splitChar+:: char to split text* complex names
|
59
|
+
# +charsToRemove+:: char (or chars set) to be removed from text to be compared
|
60
|
+
# Returns the similarity percentage between [0,1] obtained by bidirectional all Vs all similarity
|
61
|
+
def complex_text_similitude(textA, textB, splitChar = ";", charsToRemove = "")
|
62
|
+
# Check special cases
|
63
|
+
return -1.0 if (textA.nil?) | (textB.nil?)
|
64
|
+
return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
|
65
|
+
return -1.0 if (textA.length <= 0) | (textB.length <= 0)
|
66
|
+
# Split&Clean both sets
|
67
|
+
textA_splitted = textA.split(splitChar)
|
68
|
+
textB_splitted = textB.split(splitChar)
|
69
|
+
if !charsToRemove.empty?
|
70
|
+
textA_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
|
71
|
+
textA_splitted.select! {|str| str.length > 0}
|
72
|
+
textB_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
|
73
|
+
textB_splitted.select! {|str| str.length > 0}
|
74
|
+
end
|
75
|
+
# Per each X elemnt, compare against all Y elements
|
76
|
+
similitudesA = ctext_AtoB(textA_splitted, textB_splitted)
|
77
|
+
similitudesB = ctext_AtoB(textB_splitted, textA_splitted)
|
78
|
+
# Obtain bidirectional similitude
|
79
|
+
similitudesA = similitudesA.inject{ |sum, el| sum + el }.to_f / similitudesA.size
|
80
|
+
similitudesB = similitudesB.inject{ |sum, el| sum + el }.to_f / similitudesB.size
|
81
|
+
# Obtain bidirectional similitude
|
82
|
+
bidirectional_sim = (similitudesA + similitudesB) / 2
|
83
|
+
# Return info
|
84
|
+
return bidirectional_sim
|
85
|
+
end
|
86
|
+
|
87
|
+
# Applies the WhiteSimilarity from 'text' package over all complex text stored into an array.
|
88
|
+
# Complex texts will be splitted and compared one by one from A to B and B to A
|
89
|
+
# Param:
|
90
|
+
# +items_array+:: text elements to be compared all against others
|
91
|
+
# +splitChar+:: char to split text* complex names
|
92
|
+
# +charsToRemove+:: char (or chars set) to be removed from texts to be compared
|
93
|
+
# +unique+:: boolean flag which indicates if repeated elements must be removed
|
94
|
+
# Returns the similarity percentage for all elements into array
|
95
|
+
def similitude_network(items_array, splitChar = ";", charsToRemove = "", unique = false)
|
96
|
+
# Special cases
|
97
|
+
return nil if items_array.nil?
|
98
|
+
return nil if !items_array.is_a? Array
|
99
|
+
return nil if items_array.length <= 0
|
100
|
+
# Remove repeated elements
|
101
|
+
items_array.uniq! if unique
|
102
|
+
# Define hash to be filled
|
103
|
+
sims = {}
|
104
|
+
# Per each item into array => Calculate similitude
|
105
|
+
while(items_array.length > 1)
|
106
|
+
current = items_array.shift
|
107
|
+
sims[current] = {}
|
108
|
+
items_array.each do |item|
|
109
|
+
sims[current][item] = complex_text_similitude(current,item,splitChar,charsToRemove)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
return sims
|
113
|
+
end
|
data/semtools.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "semtools/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "semtools"
|
8
|
+
spec.version = Semtools::VERSION
|
9
|
+
spec.authors = ["seoanezonjic", "fmjabato"]
|
10
|
+
spec.email = ["seoanezonjic@hotmail.com", "fmjabato@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Gem to handle semantic based calculations in text and defined ontologies as GO or HPO.}
|
13
|
+
spec.description = %q{This gem allows to perform ontology based operations and calculation of Semantic similarity and information coefficient using different implementations.}
|
14
|
+
spec.homepage = "https://github.com/seoanezonjic/semtools"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
# else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
23
|
+
# "public gem pushes."
|
24
|
+
# end
|
25
|
+
|
26
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
28
|
+
end
|
29
|
+
spec.bindir = "bin"
|
30
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_dependency "text"
|
34
|
+
|
35
|
+
spec.add_development_dependency "rake"
|
36
|
+
spec.add_development_dependency "rspec"
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: semtools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- seoanezonjic
|
8
|
+
- fmjabato
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2021-02-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: text
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rspec
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
description: This gem allows to perform ontology based operations and calculation
|
57
|
+
of Semantic similarity and information coefficient using different implementations.
|
58
|
+
email:
|
59
|
+
- seoanezonjic@hotmail.com
|
60
|
+
- fmjabato@gmail.com
|
61
|
+
executables:
|
62
|
+
- console
|
63
|
+
- onto2json.rb
|
64
|
+
- setup
|
65
|
+
- strsimnet.rb
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- ".gitignore"
|
70
|
+
- ".rspec"
|
71
|
+
- ".travis.yml"
|
72
|
+
- CODE_OF_CONDUCT.md
|
73
|
+
- Gemfile
|
74
|
+
- LICENSE.txt
|
75
|
+
- README.md
|
76
|
+
- Rakefile
|
77
|
+
- bin/console
|
78
|
+
- bin/onto2json.rb
|
79
|
+
- bin/setup
|
80
|
+
- bin/strsimnet.rb
|
81
|
+
- lib/data/hp.obo
|
82
|
+
- lib/data/phenotype_annotation.tab
|
83
|
+
- lib/semtools.rb
|
84
|
+
- lib/semtools/math_methods.rb
|
85
|
+
- lib/semtools/ontology.rb
|
86
|
+
- lib/semtools/sim_handler.rb
|
87
|
+
- lib/semtools/version.rb
|
88
|
+
- semtools.gemspec
|
89
|
+
homepage: https://github.com/seoanezonjic/semtools
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata: {}
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
requirements: []
|
108
|
+
rubygems_version: 3.2.3
|
109
|
+
signing_key:
|
110
|
+
specification_version: 4
|
111
|
+
summary: Gem to handle semantic based calculations in text and defined ontologies
|
112
|
+
as GO or HPO.
|
113
|
+
test_files: []
|