semtools 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +9 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +17 -0
- data/bin/console +14 -0
- data/bin/onto2json.rb +45 -0
- data/bin/setup +8 -0
- data/bin/strsimnet.rb +134 -0
- data/lib/data/hp.obo +152267 -0
- data/lib/data/phenotype_annotation.tab +159504 -0
- data/lib/semtools.rb +8 -0
- data/lib/semtools/math_methods.rb +140 -0
- data/lib/semtools/ontology.rb +2041 -0
- data/lib/semtools/sim_handler.rb +113 -0
- data/lib/semtools/version.rb +3 -0
- data/semtools.gemspec +37 -0
- metadata +113 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
# @author Fernando Moreno Jabato <jabato(at)uma(dot)es>
|
2
|
+
# @description functionalities to handle similitude features
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
# Applies the WhiteSimilarity from 'text' package over two given texts
|
7
|
+
# Param:
|
8
|
+
# +textA+:: text to be compared with textB
|
9
|
+
# +textB+:: text to be compared with textA
|
10
|
+
# Returns the similarity percentage between [0,1]
|
11
|
+
def text_similitude(textA, textB)
|
12
|
+
# Check special cases
|
13
|
+
return -1.0 if (textA.nil?) | (textB.nil?)
|
14
|
+
return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
|
15
|
+
return -1.0 if (textA.length <= 0) | (textB.length <= 0)
|
16
|
+
# Calculate similitude
|
17
|
+
require 'text'
|
18
|
+
white = Text::WhiteSimilarity.new
|
19
|
+
return white.similarity(textA.lstrip, textB.lstrip)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Applies the WhiteSimilarity from 'text' package over two given text sets and returns the similitudes
|
23
|
+
# of the each element of the first set over the second set
|
24
|
+
# Param:
|
25
|
+
# +textsA+:: text set to be compared with textsB
|
26
|
+
# +textsB+:: text set to be compared with textsA
|
27
|
+
# Returns the maximum similarity percentage between [0,1] for each element of textsA against all elements of textsB
|
28
|
+
def ctext_AtoB(textsA, textsB)
|
29
|
+
# Check special cases
|
30
|
+
return [-1.0] if (textsA.nil?) | (textsB.nil?)
|
31
|
+
return [-1.0] if (!textsA.is_a? Array) | (!textsB.is_a? Array)
|
32
|
+
return [-1.0] if (textsA.length <= 0) | (textsB.length <= 0)
|
33
|
+
# Calculate similitude
|
34
|
+
similitudesA = []
|
35
|
+
textsA.each do |fragA|
|
36
|
+
frag_A_similitudes = []
|
37
|
+
textsB.each do |fragB|
|
38
|
+
frag_A_similitudes << text_similitude(fragA, fragB)
|
39
|
+
end
|
40
|
+
begin
|
41
|
+
similitudesA << frag_A_similitudes.max
|
42
|
+
rescue => e
|
43
|
+
STDERR.puts frag_A_similitudes.inspect
|
44
|
+
STDERR.puts textsA.inspect , textsB.inspect
|
45
|
+
STDERR.puts e.message
|
46
|
+
STDERR.puts e.backtrace
|
47
|
+
Process.exit
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return similitudesA
|
51
|
+
end
|
52
|
+
|
53
|
+
# Applies the WhiteSimilarity from 'text' package over two given complex texts.
|
54
|
+
# Complex texts will be splitted and compared one by one from A to B and B to A
|
55
|
+
# Param:
|
56
|
+
# +textA+:: text to be compared with textB
|
57
|
+
# +textB+:: text to be compared with textA
|
58
|
+
# +splitChar+:: char to split text* complex names
|
59
|
+
# +charsToRemove+:: char (or chars set) to be removed from text to be compared
|
60
|
+
# Returns the similarity percentage between [0,1] obtained by bidirectional all Vs all similarity
|
61
|
+
def complex_text_similitude(textA, textB, splitChar = ";", charsToRemove = "")
|
62
|
+
# Check special cases
|
63
|
+
return -1.0 if (textA.nil?) | (textB.nil?)
|
64
|
+
return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
|
65
|
+
return -1.0 if (textA.length <= 0) | (textB.length <= 0)
|
66
|
+
# Split&Clean both sets
|
67
|
+
textA_splitted = textA.split(splitChar)
|
68
|
+
textB_splitted = textB.split(splitChar)
|
69
|
+
if !charsToRemove.empty?
|
70
|
+
textA_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
|
71
|
+
textA_splitted.select! {|str| str.length > 0}
|
72
|
+
textB_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
|
73
|
+
textB_splitted.select! {|str| str.length > 0}
|
74
|
+
end
|
75
|
+
# Per each X elemnt, compare against all Y elements
|
76
|
+
similitudesA = ctext_AtoB(textA_splitted, textB_splitted)
|
77
|
+
similitudesB = ctext_AtoB(textB_splitted, textA_splitted)
|
78
|
+
# Obtain bidirectional similitude
|
79
|
+
similitudesA = similitudesA.inject{ |sum, el| sum + el }.to_f / similitudesA.size
|
80
|
+
similitudesB = similitudesB.inject{ |sum, el| sum + el }.to_f / similitudesB.size
|
81
|
+
# Obtain bidirectional similitude
|
82
|
+
bidirectional_sim = (similitudesA + similitudesB) / 2
|
83
|
+
# Return info
|
84
|
+
return bidirectional_sim
|
85
|
+
end
|
86
|
+
|
87
|
+
# Applies the WhiteSimilarity from 'text' package over all complex text stored into an array.
|
88
|
+
# Complex texts will be splitted and compared one by one from A to B and B to A
|
89
|
+
# Param:
|
90
|
+
# +items_array+:: text elements to be compared all against others
|
91
|
+
# +splitChar+:: char to split text* complex names
|
92
|
+
# +charsToRemove+:: char (or chars set) to be removed from texts to be compared
|
93
|
+
# +unique+:: boolean flag which indicates if repeated elements must be removed
|
94
|
+
# Returns the similarity percentage for all elements into array
|
95
|
+
def similitude_network(items_array, splitChar = ";", charsToRemove = "", unique = false)
|
96
|
+
# Special cases
|
97
|
+
return nil if items_array.nil?
|
98
|
+
return nil if !items_array.is_a? Array
|
99
|
+
return nil if items_array.length <= 0
|
100
|
+
# Remove repeated elements
|
101
|
+
items_array.uniq! if unique
|
102
|
+
# Define hash to be filled
|
103
|
+
sims = {}
|
104
|
+
# Per each item into array => Calculate similitude
|
105
|
+
while(items_array.length > 1)
|
106
|
+
current = items_array.shift
|
107
|
+
sims[current] = {}
|
108
|
+
items_array.each do |item|
|
109
|
+
sims[current][item] = complex_text_similitude(current,item,splitChar,charsToRemove)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
return sims
|
113
|
+
end
|
data/semtools.gemspec
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "semtools/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "semtools"
|
8
|
+
spec.version = Semtools::VERSION
|
9
|
+
spec.authors = ["seoanezonjic", "fmjabato"]
|
10
|
+
spec.email = ["seoanezonjic@hotmail.com", "fmjabato@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Gem to handle semantic based calculations in text and defined ontologies as GO or HPO.}
|
13
|
+
spec.description = %q{This gem allows to perform ontology based operations and calculation of Semantic similarity and information coefficient using different implementations.}
|
14
|
+
spec.homepage = "https://github.com/seoanezonjic/semtools"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
18
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
19
|
+
# if spec.respond_to?(:metadata)
|
20
|
+
# spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
21
|
+
# else
|
22
|
+
# raise "RubyGems 2.0 or newer is required to protect against " \
|
23
|
+
# "public gem pushes."
|
24
|
+
# end
|
25
|
+
|
26
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
27
|
+
f.match(%r{^(test|spec|features)/})
|
28
|
+
end
|
29
|
+
spec.bindir = "bin"
|
30
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
31
|
+
spec.require_paths = ["lib"]
|
32
|
+
|
33
|
+
spec.add_dependency "text"
|
34
|
+
|
35
|
+
spec.add_development_dependency "rake"
|
36
|
+
spec.add_development_dependency "rspec"
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: semtools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- seoanezonjic
|
8
|
+
- fmjabato
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2021-02-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: text
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - ">="
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: rake
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
- !ruby/object:Gem::Dependency
|
43
|
+
name: rspec
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
type: :development
|
50
|
+
prerelease: false
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
description: This gem allows to perform ontology based operations and calculation
|
57
|
+
of Semantic similarity and information coefficient using different implementations.
|
58
|
+
email:
|
59
|
+
- seoanezonjic@hotmail.com
|
60
|
+
- fmjabato@gmail.com
|
61
|
+
executables:
|
62
|
+
- console
|
63
|
+
- onto2json.rb
|
64
|
+
- setup
|
65
|
+
- strsimnet.rb
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- ".gitignore"
|
70
|
+
- ".rspec"
|
71
|
+
- ".travis.yml"
|
72
|
+
- CODE_OF_CONDUCT.md
|
73
|
+
- Gemfile
|
74
|
+
- LICENSE.txt
|
75
|
+
- README.md
|
76
|
+
- Rakefile
|
77
|
+
- bin/console
|
78
|
+
- bin/onto2json.rb
|
79
|
+
- bin/setup
|
80
|
+
- bin/strsimnet.rb
|
81
|
+
- lib/data/hp.obo
|
82
|
+
- lib/data/phenotype_annotation.tab
|
83
|
+
- lib/semtools.rb
|
84
|
+
- lib/semtools/math_methods.rb
|
85
|
+
- lib/semtools/ontology.rb
|
86
|
+
- lib/semtools/sim_handler.rb
|
87
|
+
- lib/semtools/version.rb
|
88
|
+
- semtools.gemspec
|
89
|
+
homepage: https://github.com/seoanezonjic/semtools
|
90
|
+
licenses:
|
91
|
+
- MIT
|
92
|
+
metadata: {}
|
93
|
+
post_install_message:
|
94
|
+
rdoc_options: []
|
95
|
+
require_paths:
|
96
|
+
- lib
|
97
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - ">="
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0'
|
107
|
+
requirements: []
|
108
|
+
rubygems_version: 3.2.3
|
109
|
+
signing_key:
|
110
|
+
specification_version: 4
|
111
|
+
summary: Gem to handle semantic based calculations in text and defined ontologies
|
112
|
+
as GO or HPO.
|
113
|
+
test_files: []
|