semtools 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,113 @@
1
+ # @author Fernando Moreno Jabato <jabato(at)uma(dot)es>
2
+ # @description functionalities to handle similitude features
3
+
4
+
5
+
6
+ # Applies the WhiteSimilarity from 'text' package over two given texts
7
+ # Param:
8
+ # +textA+:: text to be compared with textB
9
+ # +textB+:: text to be compared with textA
10
+ # Returns the similarity percentage between [0,1]
11
+ def text_similitude(textA, textB)
12
+ # Check special cases
13
+ return -1.0 if (textA.nil?) | (textB.nil?)
14
+ return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
15
+ return -1.0 if (textA.length <= 0) | (textB.length <= 0)
16
+ # Calculate similitude
17
+ require 'text'
18
+ white = Text::WhiteSimilarity.new
19
+ return white.similarity(textA.lstrip, textB.lstrip)
20
+ end
21
+
22
+ # Applies the WhiteSimilarity from 'text' package over two given text sets and returns the similitudes
23
+ # of the each element of the first set over the second set
24
+ # Param:
25
+ # +textsA+:: text set to be compared with textsB
26
+ # +textsB+:: text set to be compared with textsA
27
+ # Returns the maximum similarity percentage between [0,1] for each element of textsA against all elements of textsB
28
+ def ctext_AtoB(textsA, textsB)
29
+ # Check special cases
30
+ return [-1.0] if (textsA.nil?) | (textsB.nil?)
31
+ return [-1.0] if (!textsA.is_a? Array) | (!textsB.is_a? Array)
32
+ return [-1.0] if (textsA.length <= 0) | (textsB.length <= 0)
33
+ # Calculate similitude
34
+ similitudesA = []
35
+ textsA.each do |fragA|
36
+ frag_A_similitudes = []
37
+ textsB.each do |fragB|
38
+ frag_A_similitudes << text_similitude(fragA, fragB)
39
+ end
40
+ begin
41
+ similitudesA << frag_A_similitudes.max
42
+ rescue => e
43
+ STDERR.puts frag_A_similitudes.inspect
44
+ STDERR.puts textsA.inspect , textsB.inspect
45
+ STDERR.puts e.message
46
+ STDERR.puts e.backtrace
47
+ Process.exit
48
+ end
49
+ end
50
+ return similitudesA
51
+ end
52
+
53
+ # Applies the WhiteSimilarity from 'text' package over two given complex texts.
54
+ # Complex texts will be splitted and compared one by one from A to B and B to A
55
+ # Param:
56
+ # +textA+:: text to be compared with textB
57
+ # +textB+:: text to be compared with textA
58
+ # +splitChar+:: char to split text* complex names
59
+ # +charsToRemove+:: char (or chars set) to be removed from text to be compared
60
+ # Returns the similarity percentage between [0,1] obtained by bidirectional all Vs all similarity
61
+ def complex_text_similitude(textA, textB, splitChar = ";", charsToRemove = "")
62
+ # Check special cases
63
+ return -1.0 if (textA.nil?) | (textB.nil?)
64
+ return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
65
+ return -1.0 if (textA.length <= 0) | (textB.length <= 0)
66
+ # Split&Clean both sets
67
+ textA_splitted = textA.split(splitChar)
68
+ textB_splitted = textB.split(splitChar)
69
+ if !charsToRemove.empty?
70
+ textA_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
71
+ textA_splitted.select! {|str| str.length > 0}
72
+ textB_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
73
+ textB_splitted.select! {|str| str.length > 0}
74
+ end
75
+ # Per each X elemnt, compare against all Y elements
76
+ similitudesA = ctext_AtoB(textA_splitted, textB_splitted)
77
+ similitudesB = ctext_AtoB(textB_splitted, textA_splitted)
78
+ # Obtain bidirectional similitude
79
+ similitudesA = similitudesA.inject{ |sum, el| sum + el }.to_f / similitudesA.size
80
+ similitudesB = similitudesB.inject{ |sum, el| sum + el }.to_f / similitudesB.size
81
+ # Obtain bidirectional similitude
82
+ bidirectional_sim = (similitudesA + similitudesB) / 2
83
+ # Return info
84
+ return bidirectional_sim
85
+ end
86
+
87
+ # Applies the WhiteSimilarity from 'text' package over all complex text stored into an array.
88
+ # Complex texts will be splitted and compared one by one from A to B and B to A
89
+ # Param:
90
+ # +items_array+:: text elements to be compared all against others
91
+ # +splitChar+:: char to split text* complex names
92
+ # +charsToRemove+:: char (or chars set) to be removed from texts to be compared
93
+ # +unique+:: boolean flag which indicates if repeated elements must be removed
94
+ # Returns the similarity percentage for all elements into array
95
+ def similitude_network(items_array, splitChar = ";", charsToRemove = "", unique = false)
96
+ # Special cases
97
+ return nil if items_array.nil?
98
+ return nil if !items_array.is_a? Array
99
+ return nil if items_array.length <= 0
100
+ # Remove repeated elements
101
+ items_array.uniq! if unique
102
+ # Define hash to be filled
103
+ sims = {}
104
+ # Per each item into array => Calculate similitude
105
+ while(items_array.length > 1)
106
+ current = items_array.shift
107
+ sims[current] = {}
108
+ items_array.each do |item|
109
+ sims[current][item] = complex_text_similitude(current,item,splitChar,charsToRemove)
110
+ end
111
+ end
112
+ return sims
113
+ end
@@ -0,0 +1,3 @@
1
+ module Semtools
2
+ VERSION = "0.1.1"
3
+ end
data/semtools.gemspec ADDED
@@ -0,0 +1,37 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "semtools/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "semtools"
8
+ spec.version = Semtools::VERSION
9
+ spec.authors = ["seoanezonjic", "fmjabato"]
10
+ spec.email = ["seoanezonjic@hotmail.com", "fmjabato@gmail.com"]
11
+
12
+ spec.summary = %q{Gem to handle semantic based calculations in text and defined ontologies as GO or HPO.}
13
+ spec.description = %q{This gem allows to perform ontology based operations and calculation of Semantic similarity and information coefficient using different implementations.}
14
+ spec.homepage = "https://github.com/seoanezonjic/semtools"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against " \
23
+ # "public gem pushes."
24
+ # end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "bin"
30
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_dependency "text"
34
+
35
+ spec.add_development_dependency "rake"
36
+ spec.add_development_dependency "rspec"
37
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: semtools
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - seoanezonjic
8
+ - fmjabato
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2021-02-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: text
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ description: This gem allows to perform ontology based operations and calculation
57
+ of Semantic similarity and information coefficient using different implementations.
58
+ email:
59
+ - seoanezonjic@hotmail.com
60
+ - fmjabato@gmail.com
61
+ executables:
62
+ - console
63
+ - onto2json.rb
64
+ - setup
65
+ - strsimnet.rb
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - ".gitignore"
70
+ - ".rspec"
71
+ - ".travis.yml"
72
+ - CODE_OF_CONDUCT.md
73
+ - Gemfile
74
+ - LICENSE.txt
75
+ - README.md
76
+ - Rakefile
77
+ - bin/console
78
+ - bin/onto2json.rb
79
+ - bin/setup
80
+ - bin/strsimnet.rb
81
+ - lib/data/hp.obo
82
+ - lib/data/phenotype_annotation.tab
83
+ - lib/semtools.rb
84
+ - lib/semtools/math_methods.rb
85
+ - lib/semtools/ontology.rb
86
+ - lib/semtools/sim_handler.rb
87
+ - lib/semtools/version.rb
88
+ - semtools.gemspec
89
+ homepage: https://github.com/seoanezonjic/semtools
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubygems_version: 3.2.3
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Gem to handle semantic based calculations in text and defined ontologies
112
+ as GO or HPO.
113
+ test_files: []