semtools 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ # @author Fernando Moreno Jabato <jabato(at)uma(dot)es>
2
+ # @description functionalities to handle similitude features
3
+
4
+
5
+
6
+ # Applies the WhiteSimilarity from 'text' package over two given texts
7
+ # Param:
8
+ # +textA+:: text to be compared with textB
9
+ # +textB+:: text to be compared with textA
10
+ # Returns the similarity percentage between [0,1]
11
+ def text_similitude(textA, textB)
12
+ # Check special cases
13
+ return -1.0 if (textA.nil?) | (textB.nil?)
14
+ return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
15
+ return -1.0 if (textA.length <= 0) | (textB.length <= 0)
16
+ # Calculate similitude
17
+ require 'text'
18
+ white = Text::WhiteSimilarity.new
19
+ return white.similarity(textA.lstrip, textB.lstrip)
20
+ end
21
+
22
+ # Applies the WhiteSimilarity from 'text' package over two given text sets and returns the similitudes
23
+ # of the each element of the first set over the second set
24
+ # Param:
25
+ # +textsA+:: text set to be compared with textsB
26
+ # +textsB+:: text set to be compared with textsA
27
+ # Returns the maximum similarity percentage between [0,1] for each element of textsA against all elements of textsB
28
+ def ctext_AtoB(textsA, textsB)
29
+ # Check special cases
30
+ return [-1.0] if (textsA.nil?) | (textsB.nil?)
31
+ return [-1.0] if (!textsA.is_a? Array) | (!textsB.is_a? Array)
32
+ return [-1.0] if (textsA.length <= 0) | (textsB.length <= 0)
33
+ # Calculate similitude
34
+ similitudesA = []
35
+ textsA.each do |fragA|
36
+ frag_A_similitudes = []
37
+ textsB.each do |fragB|
38
+ frag_A_similitudes << text_similitude(fragA, fragB)
39
+ end
40
+ begin
41
+ similitudesA << frag_A_similitudes.max
42
+ rescue => e
43
+ STDERR.puts frag_A_similitudes.inspect
44
+ STDERR.puts textsA.inspect , textsB.inspect
45
+ STDERR.puts e.message
46
+ STDERR.puts e.backtrace
47
+ Process.exit
48
+ end
49
+ end
50
+ return similitudesA
51
+ end
52
+
53
+ # Applies the WhiteSimilarity from 'text' package over two given complex texts.
54
+ # Complex texts will be splitted and compared one by one from A to B and B to A
55
+ # Param:
56
+ # +textA+:: text to be compared with textB
57
+ # +textB+:: text to be compared with textA
58
+ # +splitChar+:: char to split text* complex names
59
+ # +charsToRemove+:: char (or chars set) to be removed from text to be compared
60
+ # Returns the similarity percentage between [0,1] obtained by bidirectional all Vs all similarity
61
+ def complex_text_similitude(textA, textB, splitChar = ";", charsToRemove = "")
62
+ # Check special cases
63
+ return -1.0 if (textA.nil?) | (textB.nil?)
64
+ return -1.0 if (!textA.is_a? String) | (!textB.is_a? String)
65
+ return -1.0 if (textA.length <= 0) | (textB.length <= 0)
66
+ # Split&Clean both sets
67
+ textA_splitted = textA.split(splitChar)
68
+ textB_splitted = textB.split(splitChar)
69
+ if !charsToRemove.empty?
70
+ textA_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
71
+ textA_splitted.select! {|str| str.length > 0}
72
+ textB_splitted.map! {|str| str.gsub(/[#{charsToRemove}]/,'')}
73
+ textB_splitted.select! {|str| str.length > 0}
74
+ end
75
+ # Per each X elemnt, compare against all Y elements
76
+ similitudesA = ctext_AtoB(textA_splitted, textB_splitted)
77
+ similitudesB = ctext_AtoB(textB_splitted, textA_splitted)
78
+ # Obtain bidirectional similitude
79
+ similitudesA = similitudesA.inject{ |sum, el| sum + el }.to_f / similitudesA.size
80
+ similitudesB = similitudesB.inject{ |sum, el| sum + el }.to_f / similitudesB.size
81
+ # Obtain bidirectional similitude
82
+ bidirectional_sim = (similitudesA + similitudesB) / 2
83
+ # Return info
84
+ return bidirectional_sim
85
+ end
86
+
87
+ # Applies the WhiteSimilarity from 'text' package over all complex text stored into an array.
88
+ # Complex texts will be splitted and compared one by one from A to B and B to A
89
+ # Param:
90
+ # +items_array+:: text elements to be compared all against others
91
+ # +splitChar+:: char to split text* complex names
92
+ # +charsToRemove+:: char (or chars set) to be removed from texts to be compared
93
+ # +unique+:: boolean flag which indicates if repeated elements must be removed
94
+ # Returns the similarity percentage for all elements into array
95
+ def similitude_network(items_array, splitChar = ";", charsToRemove = "", unique = false)
96
+ # Special cases
97
+ return nil if items_array.nil?
98
+ return nil if !items_array.is_a? Array
99
+ return nil if items_array.length <= 0
100
+ # Remove repeated elements
101
+ items_array.uniq! if unique
102
+ # Define hash to be filled
103
+ sims = {}
104
+ # Per each item into array => Calculate similitude
105
+ while(items_array.length > 1)
106
+ current = items_array.shift
107
+ sims[current] = {}
108
+ items_array.each do |item|
109
+ sims[current][item] = complex_text_similitude(current,item,splitChar,charsToRemove)
110
+ end
111
+ end
112
+ return sims
113
+ end
@@ -0,0 +1,3 @@
1
+ module Semtools
2
+ VERSION = "0.1.1"
3
+ end
data/semtools.gemspec ADDED
@@ -0,0 +1,37 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "semtools/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "semtools"
8
+ spec.version = Semtools::VERSION
9
+ spec.authors = ["seoanezonjic", "fmjabato"]
10
+ spec.email = ["seoanezonjic@hotmail.com", "fmjabato@gmail.com"]
11
+
12
+ spec.summary = %q{Gem to handle semantic based calculations in text and defined ontologies as GO or HPO.}
13
+ spec.description = %q{This gem allows to perform ontology based operations and calculation of Semantic similarity and information coefficient using different implementations.}
14
+ spec.homepage = "https://github.com/seoanezonjic/semtools"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ # if spec.respond_to?(:metadata)
20
+ # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
21
+ # else
22
+ # raise "RubyGems 2.0 or newer is required to protect against " \
23
+ # "public gem pushes."
24
+ # end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "bin"
30
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_dependency "text"
34
+
35
+ spec.add_development_dependency "rake"
36
+ spec.add_development_dependency "rspec"
37
+ end
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: semtools
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - seoanezonjic
8
+ - fmjabato
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2021-02-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: text
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rspec
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ description: This gem allows to perform ontology based operations and calculation
57
+ of Semantic similarity and information coefficient using different implementations.
58
+ email:
59
+ - seoanezonjic@hotmail.com
60
+ - fmjabato@gmail.com
61
+ executables:
62
+ - console
63
+ - onto2json.rb
64
+ - setup
65
+ - strsimnet.rb
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - ".gitignore"
70
+ - ".rspec"
71
+ - ".travis.yml"
72
+ - CODE_OF_CONDUCT.md
73
+ - Gemfile
74
+ - LICENSE.txt
75
+ - README.md
76
+ - Rakefile
77
+ - bin/console
78
+ - bin/onto2json.rb
79
+ - bin/setup
80
+ - bin/strsimnet.rb
81
+ - lib/data/hp.obo
82
+ - lib/data/phenotype_annotation.tab
83
+ - lib/semtools.rb
84
+ - lib/semtools/math_methods.rb
85
+ - lib/semtools/ontology.rb
86
+ - lib/semtools/sim_handler.rb
87
+ - lib/semtools/version.rb
88
+ - semtools.gemspec
89
+ homepage: https://github.com/seoanezonjic/semtools
90
+ licenses:
91
+ - MIT
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubygems_version: 3.2.3
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Gem to handle semantic based calculations in text and defined ontologies
112
+ as GO or HPO.
113
+ test_files: []