semtools 0.1.2 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,2041 +1,2559 @@
1
+ require 'expcalc'
1
2
  require 'json'
3
+ require 'colorize'
2
4
 
3
5
 
4
6
  class Ontology
5
- #########################################################
6
- # AUTHOR NOTES
7
- #########################################################
8
-
9
- # 1 - Store @profiles as @stanzas[:instances]
10
- # 2 - Items values (not keys) are imported as strings, not as symbols (maybe add a flag which indicates if values are, or not, symbols?)
11
-
12
-
13
- #############################################
14
- # FIELDS
15
- #############################################
16
- # Handled class variables
17
- # => @@basic_tags :: hash with main OBO structure tags
18
- # => @@allowed_calcs :: hash with allowed ICs and similaritites calcs
19
- # => @@symbolizable_ids :: tags which can be symbolized
20
- # => @@tags_with_trailing_modifiers :: tags which can include extra info after specific text modifiers
21
- #
22
- # Handled object variables
23
- # => @header :: file header (if is available)
24
- # => @stanzas :: OBO stanzas {:terms,:typedefs,:instances}
25
- # => @ancestors_index :: hash of ancestors per each term handled with any structure relationships
26
- # => @descendants_index :: hash of descendants per each term handled with any structure relationships
27
- # => @alternatives_index :: has of alternative IDs (include alt_id and obsoletes)
28
- # => @obsoletes_index :: hash of obsoletes and it's new ids
29
- # => @special_tags :: set of special tags to be expanded (:is_a, :obsolete, :alt_id)
30
- # => @structureType :: type of ontology structure depending on ancestors relationship. Allowed: {atomic, sparse, circular, hierarchical}
31
- # => @ics :: already calculated ICs for handled terms and IC types
32
- # => @meta :: meta_information about handled terms like [ancestors, descendants, struct_freq, observed_freq]
33
- # => @max_freqs :: maximum freqs found for structural and observed freqs
34
- # => @dicts :: bidirectional dictionaries with three levels <key|value>: 1º) <tag|hash2>; 2º) <(:byTerm/:byValue)|hash3>; 3º) dictionary <k|v>
35
- # => @profiles :: set of terms assigned to an ID
36
- # => @profilesDict :: set of profile IDs assigned to a term
37
- # => @items :: hash with items relations to terms
38
- # => @removable_terms :: array of terms to not be considered
39
- # => @term_paths :: metainfo about parental paths of each term
40
-
41
- @@basic_tags = {ancestors: [:is_a], obsolete: :is_obsolete, alternative: [:alt_id,:replaced_by,:consider]}
42
- @@allowed_calcs = {ics: [:resnik, :resnik_observed, :seco, :zhou, :sanchez], sims: [:resnik, :lin, :jiang_conrath]}
43
- @@symbolizable_ids = [:id, :alt_id, :replaced_by, :consider]
44
- @@tags_with_trailing_modifiers = [:is_a, :union_of, :disjoint_from, :relationship, :subsetdef, :synonymtypedef, :property_value]
45
- @@multivalue_tags = [:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider, :subsetdef, :synonymtypedef, :property_value, :remark]
46
- @@symbolizable_ids.concat(@@tags_with_trailing_modifiers)
47
-
48
- #############################################
49
- # CONSTRUCTOR
50
- #############################################
51
-
52
- # Instantiate a OBO_Handler object
53
- # ===== Parameters
54
- # +file+:: with info to be loaded (.obo ; .json)
55
- # +load_file+:: activate load process automatically (only for .obo)
56
- # +removable_terms+: term to be removed from calcs
57
- def initialize(file: nil, load_file: false, removable_terms: [])
58
- # Initialize object variables
59
- @header = nil
60
- @stanzas = {terms: {}, typedefs: {}, instances: {}}
61
- @ancestors_index = {}
62
- @descendants_index = {}
63
- @alternatives_index = {}
64
- @obsoletes_index = {}
65
- @structureType = nil
66
- @ics = Hash[@@allowed_calcs[:ics].map{|ictype| [ictype, {}]}]
67
- @meta = {}
68
- @special_tags = @@basic_tags.clone
69
- @max_freqs = {:struct_freq => -1.0, :observed_freq => -1.0, :max_depth => -1.0}
70
- @dicts = {}
71
- @profiles = {}
72
- @profilesDict = {}
73
- @items = {}
74
- @removable_terms = []
75
- @term_paths = {}
76
- # Load if proceeds
77
- add_removable_terms(removable_terms) if !removable_terms.empty?
78
- load(file) if load_file
79
- end
80
-
81
-
82
- #############################################
83
- # CLASS METHODS
84
- #############################################
85
-
86
- # Expand a (starting) term using a specific tag and return all extended terms into an array and
87
- # the relationship structuture observed (hierarchical or circular). If circular structure is
88
- # foumd, extended array will be an unique vector without starting term (no loops).
89
- # +Note+: we extremly recomend use get_related_ids_by_tag function instead of it (directly)
90
- # ===== Parameters
91
- # +start+:: term where start to expand
92
- # +terms+:: set to be used to expand
93
- # +target_tag+:: tag used to expand
94
- # +eexpansion+:: already expanded info
95
- # +split_info_char+:: special regex used to split info (if it is necessary)
96
- # +split_info_indx+:: special index to take splitted info (if it is necessary)
97
- # +alt_ids+:: set of alternative IDs
98
- # ===== Returns
99
- # A vector with the observed structure (string) and the array with extended terms.
100
- def self.get_related_ids(start_id, terms, target_tag, related_ids = {}, alt_ids = {})
101
- # Take start_id term available info and already accumulated info
102
- current_associations = related_ids[start_id]
103
- current_associations = [] if current_associations.nil?
104
- return [:no_term,[]] if terms[start_id].nil?
105
- id_relations = terms[start_id][target_tag]
106
- return [:source,[]] if id_relations.nil?
107
-
108
- # Prepare auxiliar variables
109
- struct = :hierarchical
110
-
111
- # Study direct extensions
112
- id_relations = id_relations.clone
113
- while id_relations.length > 0
114
- id = id_relations.shift
115
- id = alt_ids[id].first if alt_ids.include?(id) # NOTE: if you want to persist current ID instead source ID, re-implement this
116
-
117
- # Handle
118
- if current_associations.include?(id) # Check if already have been included into this expansion
119
- struct = :circular
120
- else
121
- current_associations << id
122
- if related_ids.include?(id) # Check if current already has been expanded
123
- current_associations = current_associations | related_ids[id]
124
- if current_associations.include?(start_id) # Check circular case
125
- struct = :circular
126
- [id, start_id].each{|repeated| current_associations.delete(repeated)}
127
- end
128
- else # Expand
129
- related_ids[start_id] = current_associations
130
- structExp, current_related_ids = self.get_related_ids(id, terms, target_tag, related_ids, alt_ids) # Expand current
131
- current_associations = current_associations | current_related_ids
132
- struct = :circular if structExp == :circular # Check struct
133
- if current_associations.include?(start_id) # Check circular case
134
- struct = :circular
135
- current_associations.delete(start_id)
136
- end
137
- end
138
- end
139
- end
140
- related_ids[start_id] = current_associations
141
-
142
- return struct, current_associations
143
- end
144
-
145
-
146
- # Expand terms using a specific tag and return all extended terms into an array and
147
- # the relationship structuture observed (hierarchical or circular). If circular structure is
148
- # foumd, extended array will be an unique vector without starting term (no loops)
149
- # ===== Parameters
150
- # +terms+:: set to be used to expand
151
- # +target_tag+:: tag used to expand
152
- # +split_info_char+:: special regex used to split info (if it is necessary)
153
- # +split_info_indx+:: special index to take splitted info (if it is necessary)
154
- # +alt_ids+:: set of alternative IDs
155
- # +obsoletes+:: integer with the number of obsolete IDs. used to calculate structure type.
156
- # ===== Returns
157
- # A vector with the observed structure (string) and the hash with extended terms
158
- def self.get_related_ids_by_tag(terms:,target_tag:, alt_ids: {}, obsoletes: 0)
159
- # Define structure type
160
- structType = :hierarchical
161
- related_ids = {}
162
- terms.each do |id, tags|
163
- # Check if target tag is defined
164
- if !tags[target_tag].nil?
165
- # Obtain related terms
166
- set_structure, _ = self.get_related_ids(id, terms, target_tag, related_ids, alt_ids)
167
- # Check structure
168
- structType = :circular if set_structure == :circular
169
- end
170
- end
171
-
172
- # Check special case
173
- structType = :atomic if related_ids.length <= 0
174
- structType = :sparse if related_ids.length > 0 && ((terms.length - related_ids.length - obsoletes) >= 2)
175
- # Return type and hash with related_ids
176
- return structType, related_ids
177
- end
178
-
179
-
180
- # Class method to transform string with <tag : info> into hash structure
181
- # ===== Parameters
182
- # +attributes+:: array tuples with info to be transformed into hash format
183
- # ===== Returns
184
- # Attributes stored into hash structure
185
- def self.info2hash(attributes, split_char = " ! ", selected_field = 0)
186
- # Load info
187
- info_hash = {}
188
- # Only TERMS multivalue tags (future add Typedefs and Instance)
189
- # multivalue_tags = [:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider]
190
- attributes.each do |tag, value|
191
- # Check
192
- raise EncodingError, 'Info element incorrect format' if (tag.nil?) || (value.nil?)
193
- # Prepare
194
- tag = tag.lstrip.to_sym
195
- value.lstrip!
196
- value = value.split(split_char)[selected_field].to_sym if @@tags_with_trailing_modifiers.include?(tag)
197
-
198
- # Store
199
- query = info_hash[tag]
200
- if !query.nil? # Tag already exists
201
- if !query.kind_of?(Array) # Check that tag is multivalue
202
- raise('Attempt to concatenate plain text with another. The tag is not declared as multivalue. [' + tag.to_s + '](' + query + ')')
203
- else
204
- query << value # Add new value to tag
205
- end
206
- else # New entry
207
- if @@multivalue_tags.include?(tag)
208
- info_hash[tag] = [value]
209
- else
210
- info_hash[tag] = value
211
- end
212
- end
213
- end
214
- self.symbolize_ids(info_hash)
215
- return info_hash
216
- end
217
-
218
-
219
- # Class method to load an OBO format file (based on OBO 1.4 format). Specially focused on load
220
- # the Header, the Terms, the Typedefs and the Instances.
221
- # ===== Parameters
222
- # +file+:: OBO file to be loaded
223
- # ===== Returns
224
- # Hash with FILE, HEADER and STANZAS info
225
- def self.load_obo(file) #TODO: Send to obo_parser class
226
- raise("File is not defined") if file.nil?
227
- # Data variables
228
- header = ''
229
- stanzas = {terms: {}, typedefs: {}, instances: {}}
230
- # Auxiliar variables
231
- infoType = 'Header'
232
- currInfo = []
233
- stanzas_flags = %w[[Term] [Typedef] [Instance]]
234
- # Read file
235
- File.open(file).each do |line|
236
- line.chomp!
237
- next if line.empty?
238
- fields = line.split(':', 2)
239
- # Check if new instance is found
240
- if stanzas_flags.include?(line)
241
- header = self.process_entity(header, infoType, stanzas, currInfo)
242
- # Update info variables
243
- currInfo = []
244
- infoType = line.gsub!(/[\[\]]/, '')
245
- next
246
- end
247
- # Concat info
248
- currInfo << fields
249
- end
250
- # Store last loaded info
251
- header = self.process_entity(header, infoType, stanzas, currInfo) if !currInfo.empty?
252
-
253
- # Prepare to return
254
- finfo = {:file => file, :name => File.basename(file, File.extname(file))}
255
- return finfo, header, stanzas
256
- end
257
-
258
-
259
- # Handle OBO loaded info and stores it into correct container and format
260
- # ===== Parameters
261
- # +header+:: container
262
- # +infoType+:: current ontology item type detected
263
- # +stanzas+:: container
264
- # +currInfo+:: info to be stored
265
- # ===== Returns
266
- # header newly/already stored
267
- def self.process_entity(header, infoType, stanzas, currInfo)
268
- info = self.info2hash(currInfo)
269
- # Store current info
270
- if infoType.eql?('Header')
271
- header = info
272
- else
273
- id = info[:id]
274
- case infoType
275
- when 'Term'
276
- stanzas[:terms][id] = info
277
- when 'Typedef'
278
- stanzas[:typedefs][id] = info
279
- when 'Instance'
280
- stanzas[:instances][id] = info
281
- end
282
- end
283
- return header
284
- end
285
-
286
-
287
- # Symboliza all values into hashs using symbolizable tags as keys
288
- # ===== Parameters
289
- # +item_hash+:: hash to be checked
290
- def self.symbolize_ids(item_hash)
291
- @@symbolizable_ids.each do |tag|
292
- query = item_hash[tag]
293
- if !query.nil?
294
- if query.kind_of?(Array)
295
- query.map!{|item| item.to_sym}
296
- else
297
- item_hash[tag] = query.to_sym if !query.nil?
298
- end
299
- end
300
- end
301
- end
302
-
303
-
304
- #
305
- # ===== Parameters
306
- # +root+:: main term to expand
307
- # +ontology+:: to be cutted
308
- # +clone+:: if true, given ontology object will not be mutated
309
- # +remove_up+:: if true, stores only the root term given an it descendants. If false, only root ancestors will be stored
310
- # ===== Returns
311
- # An Ontology object with terms after cut the ontology.
312
- def self.mutate(root, ontology, clone: true, remove_up: true)
313
- ontology = ontology.clone if clone
314
- # Obtain affected IDs
315
- descendants = ontology.descendants_index[root]
316
- descendants << root # Store itself to do not remove it
317
- # Remove unnecesary terms
318
- ontology.stanzas[:terms] = ontology.stanzas[:terms].select{|id,v| remove_up ? descendants.include?(id) : !descendants.include?(id)}
319
- ontology.ics = Hash[@@allowed_calcs[:ics].map{|ictype| [ictype, {}]}]
320
- ontology.max_freqs = {:struct_freq => -1.0, :observed_freq => -1.0, :max_depth => -1.0}
321
- ontology.dicts = {}
322
- ontology.removable_terms = []
323
- ontology.term_paths = {}
324
- # Recalculate metadata
325
- ontology.build_index
326
- ontology.add_observed_terms_from_profiles
327
- # Finish
328
- return ontology
329
- end
330
-
331
-
332
-
333
- #############################################
334
- # GENERAL METHODS
335
- #############################################
336
-
337
- # Include removable terms to current removable terms list
338
- # ===== Parameters
339
- # +terms+:: terms array to be concatenated
340
- def add_removable_terms(terms)
341
- terms = terms.map{|term| term.to_sym}
342
- @removable_terms.concat(terms)
343
- end
344
-
345
-
346
- # Include removable terms to current removable terms list loading new
347
- # terms from a one column plain text file
348
- # ===== Parameters
349
- # +file+:: to be loaded
350
- def add_removable_terms_from_file(file)
351
- File.open(excluded_codes_file).each do |line|
352
- line.chomp!
353
- @removable_terms << line.to_sym
354
- end
355
- end
356
-
357
-
358
- # Increase observed frequency for a specific term
359
- # ===== Parameters
360
- # +term+:: term which frequency is going to be increased
361
- # +increas+:: frequency rate to be increased. Default = 1
362
- # ===== Return
363
- # true if process ends without errors, false in other cases
364
- def add_observed_term(term:,increase: 1.0)
365
- # Check
366
- raise ArgumentError, "Term given is NIL" if term.nil?
367
- return false unless @stanzas[:terms].include?(term)
368
- return false if @removable_terms.include?(term)
369
- if @alternatives_index.include?(term)
370
- alt_id = @alternatives_index[term]
371
- @meta[alt_id] = {:ancestors => -1.0,:descendants => -1.0,:struct_freq => 0.0,:observed_freq => 0.0} if @meta[alt_id].nil?
372
- @meta[term] = @meta[alt_id]
373
- end
374
- # Check if exists
375
- @meta[term] = {:ancestors => -1.0,:descendants => -1.0,:struct_freq => 0.0,:observed_freq => 0.0} if @meta[term].nil?
376
- # Add frequency
377
- @meta[term][:observed_freq] = 0 if @meta[term][:observed_freq] == -1
378
- @meta[term][:observed_freq] += increase
379
- # Check maximum frequency
380
- @max_freqs[:observed_freq] = @meta[term][:observed_freq] if @max_freqs[:observed_freq] < @meta[term][:observed_freq]
381
- return true
382
- end
383
-
384
-
385
- # Increase the arbitrary frequency of a given term set
386
- # ===== Parameters
387
- # +terms+:: set of terms to be updated
388
- # +increase+:: amount to be increased
389
- # +transform_to_sym+:: if true, transform observed terms to symbols. Default: false
390
- # ===== Return
391
- # true if process ends without errors and false in other cases
392
- def add_observed_terms(terms:, increase: 1.0, transform_to_sym: false)
393
- # Check
394
- raise ArgumentError, 'Terms array given is NIL' if terms.nil?
395
- raise ArgumentError, 'Terms given is not an array' if !terms.is_a? Array
396
- # Add observations
397
- if transform_to_sym
398
- checks = terms.map{|id| self.add_observed_term(term: id.to_sym,increase: increase)}
399
- else
400
- checks = terms.map{|id| self.add_observed_term(term: id,increase: increase)}
401
- end
402
- return checks
403
- end
404
-
405
-
406
- # Compare to terms sets
407
- # ===== Parameters
408
- # +termsA+:: set to be compared
409
- # +termsB+:: set to be compared
410
- # +sim_type+:: similitude method to be used. Default: resnik
411
- # +ic_type+:: ic type to be used. Default: resnik
412
- # +bidirectional+:: calculate bidirectional similitude. Default: false
413
- # ===== Return
414
- # similitude calculated
415
- def compare(termsA, termsB, sim_type: :resnik, ic_type: :resnik, bidirectional: true)
416
- # Check
417
- raise ArgumentError, "Terms sets given are NIL" if termsA.nil? | termsB.nil?
418
- raise ArgumentError, "Set given is empty. Aborting similarity calc" if termsA.empty? | termsB.empty?
419
- micasA = []
420
- # Compare A -> B
421
- termsA.each do |tA|
422
- micas = termsB.map{|tB| self.get_similarity(tA, tB, type: sim_type, ic_type: ic_type)}
423
- # Remove special cases
424
- [false,nil].each do |err_value| micas.delete(err_value) end
425
- # Obtain maximum value
426
- micasA << micas.max if micas.length > 0
427
- micasA << 0 if micas.length <= 0
428
- end
429
- means_sim = micasA.inject{ |sum, el| sum + el }.to_f / micasA.size
430
- # Compare B -> A
431
- if bidirectional
432
- means_simA = means_sim * micasA.size
433
- means_simB = self.compare(termsB, termsA, sim_type: sim_type, ic_type: ic_type, bidirectional: false) * termsB.size
434
- means_sim = (means_simA + means_simB) / (termsA.size + termsB.size)
435
- end
436
- # Return
437
- return means_sim
438
- end
439
-
440
-
441
- # Compare internal stored profiles against another set of profiles. If an external set is not provided, internal profiles will be compared with itself
442
- # ===== Parameters
443
- # +external_profiles+:: set of external profiles. If nil, internal profiles will be compared with itself
444
- # +sim_type+:: similitude method to be used. Default: resnik
445
- # +ic_type+:: ic type to be used. Default: resnik
446
- # +bidirectional+:: calculate bidirectional similitude. Default: false
447
- # ===== Return
448
- # Similitudes calculated
449
- def compare_profiles(external_profiles: nil, sim_type: :resnik, ic_type: :resnik, bidirectional: true)
450
- profiles_similarity = {} #calculate similarity between patients profile
451
- profiles_ids = @profiles.keys
452
- if external_profiles.nil?
453
- comp_ids = profiles_ids
454
- comp_profiles = @profiles
455
- main_ids = comp_ids
456
- main_profiles = comp_profiles
457
- else
458
- comp_ids = external_profiles.keys
459
- comp_profiles = external_profiles
460
- main_ids = profiles_ids
461
- main_profiles = @profiles
462
- end
463
- # Compare
464
- while !main_ids.empty?
465
- curr_id = main_ids.shift
466
- current_profile = main_profiles[curr_id]
467
- comp_ids.each do |id|
468
- profile = comp_profiles[id]
469
- value = compare(current_profile, profile, sim_type: sim_type, ic_type: ic_type, bidirectional: bidirectional)
470
- query = profiles_similarity[curr_id]
471
- if query.nil?
472
- profiles_similarity[curr_id] = {id => value}
473
- else
474
- query[id] = value
475
- end
476
- end
477
- end
478
- return profiles_similarity
479
- end
480
-
481
-
482
- # Expand alternative IDs arround all already stored terms
483
- # ===== Parameters
484
- # +alt_tag+:: tag used to expand alternative IDs
485
- # ===== Returns
486
- # true if process ends without errors and false in other cases
487
- def get_index_alternatives(alt_tag: @@basic_tags[:alternative][0])
488
- # Check input
489
- raise('stanzas terms empty') if @stanzas[:terms].empty?
490
- # Take all alternative IDs
491
- alt_ids2add = {}
492
- @stanzas[:terms].each do |id, tags|
493
- alt_ids = tags[alt_tag]
494
- if !alt_ids.nil?
495
- alt_ids = alt_ids - @removable_terms
496
- # Update info
497
- alt_ids.each do |alt_term|
498
- @alternatives_index[alt_term] = id
499
- alt_ids2add[alt_term] = @stanzas[:terms][id] if !@stanzas[:terms].include?(alt_term)
500
- @ancestors_index[alt_term] = @ancestors_index[id] if !@ancestors_index[id].nil?
501
- end
502
- end
503
- end
504
- @stanzas[:terms].merge!(alt_ids2add)
505
- end
506
-
507
-
508
- # Executes basic expansions of tags (alternatives, obsoletes and parentals) with default values
509
- # ===== Returns
510
- # true if eprocess ends without errors and false in other cases
511
- def build_index()
512
- self.get_index_alternatives
513
- self.get_index_obsoletes
514
- self.get_index_child_parent_relations
515
- @alternatives_index.map{|k,v| @alternatives_index[k] = self.extract_id(v)}
516
- @alternatives_index.compact!
517
- @obsoletes_index.map{|k,v| @obsoletes_index[k] = self.extract_id(v)}
518
- @obsoletes_index.compact!
519
- @ancestors_index.map{|k,v| @ancestors_index[k] = v.map{|t| self.extract_id(t)}.compact}
520
- @ancestors_index.compact!
521
- @descendants_index.map{|k,v| @descendants_index[k] = v.map{|t| self.extract_id(t)}.compact}
522
- @descendants_index.compact!
523
- self.get_index_frequencies
524
- self.calc_dictionary(:name)
525
- self.calc_dictionary(:synonym, select_regex: /\"(.*)\"/)
526
- self.calc_term_levels(calc_paths: true)
527
- end
528
-
529
-
530
- # Calculates regular frequencies based on ontology structure (using parentals)
531
- # ===== Returns
532
- # true if everything end without errors and false in other cases
533
- def get_index_frequencies()
534
- # Check
535
- if @ancestors_index.empty?
536
- warn('ancestors_index object is empty')
537
- else
538
- # Prepare useful variables
539
- alternative_terms = @alternatives_index.keys
540
- # Per each term, add frequencies
541
- @stanzas[:terms].each do |id, tags|
542
- if @alternatives_index.include?(id)
543
- alt_id = @alternatives_index[id]
544
- query = @meta[alt_id] # Check if exist
545
- if query.nil?
546
- query = {ancestors: 0.0, descendants: 0.0, struct_freq: 0.0, observed_freq: 0.0}
547
- @meta[alt_id] = query
548
- end
549
- @meta[id] = query
550
- # Note: alternative terms do not increase structural frequencies
551
- else # Official term
552
- query = @meta[id] # Check if exist
553
- if query.nil?
554
- query = {ancestors: 0.0, descendants: 0.0, struct_freq: 0.0, observed_freq: 0.0}
555
- @meta[id] = query
556
- end
557
- # Store metadata
558
- query[:ancestors] = @ancestors_index.include?(id) ? @ancestors_index[id].count{|anc| !alternative_terms.include?(anc)}.to_f : 0.0
559
- query[:descendants] = @descendants_index.include?(id) ? @descendants_index[id].count{|desc| !alternative_terms.include?(desc)}.to_f : 0.0
560
- query[:struct_freq] = query[:descendants] + 1.0
561
- # Update maximums
562
- @max_freqs[:struct_freq] = query[:struct_freq] if @max_freqs[:struct_freq] < query[:struct_freq]
563
- @max_freqs[:max_depth] = query[:descendants] if @max_freqs[:max_depth] < query[:descendants]
564
- end
565
- end
566
- end
567
- end
568
-
569
-
570
- # Expand obsoletes set and link info to their alternative IDs
571
- # ===== Parameters
572
- # +obs_tags+:: tags to be used to find obsoletes
573
- # +alt_tags+:: tags to find alternative IDs (if are available)
574
- # +reset_obsoletes+:: flag to indicate if obsoletes set must be reset. Default: true
575
- # ===== Returns
576
- # true if process ends without errors and false in other cases
577
- def get_index_obsoletes(obs_tag: @@basic_tags[:obsolete], alt_tags: @@basic_tags[:alternative])
578
- if @stanzas[:terms].empty?
579
- warn('stanzas terms empty')
580
- else
581
- # Check obsoletes
582
- @stanzas[:terms].each do |id, term_tags|
583
- next if term_tags.nil?
584
- query = term_tags[obs_tag]
585
- if !query.nil? && query == 'true' # Obsolete tag presence
586
- next if !@obsoletes_index[id].nil? # Already stored
587
- # Check if alternative value is available
588
- alt_ids = alt_tags.map{|alt| term_tags[alt]}.compact
589
- if !alt_ids.empty?
590
- alt_id = alt_ids.first.first #FIRST tag, FIRST id
591
- # Store
592
- @alternatives_index[id] = alt_id
593
- @obsoletes_index[id] = alt_id
594
- end
595
- end
596
- end
597
- end
598
- end
599
-
600
-
601
- # Expand parentals set and link all info to their alternative IDs. Also launch frequencies process
602
- # ===== Parameters
603
- # +tag+:: tag used to expand parentals
604
- # +split_info_char+:: special regex used to split info (if it is necessary)
605
- # +split_info_indx+:: special index to take splitted info (if it is necessary)
606
- # ===== Returns
607
- # true if process ends without errors and false in other cases
608
- def get_index_child_parent_relations(tag: @@basic_tags[:ancestors][0])
609
- # Check
610
- if @stanzas[:terms].nil?
611
- warn('stanzas terms empty')
612
- else
613
- # Expand
614
- structType, parentals = self.class.get_related_ids_by_tag(terms: @stanzas[:terms],
615
- target_tag: tag,
616
- alt_ids: @alternatives_index,
617
- obsoletes: @obsoletes_index.length)
618
- # Check
619
- raise('Error expanding parentals') if (structType.nil?) || parentals.nil?
620
- # Prepare ancestors structure
621
- anc = {}
622
- des = {}
623
- parentals.each do |id, parents|
624
- parents = parents - @removable_terms
625
- anc[id] = parents
626
- parents.each do |anc_id| # Add descendants
627
- if !des.include?(anc_id)
628
- des[anc_id] = [id]
629
- else
630
- des[anc_id] << id
631
- end
632
- end
633
- end
634
- # Store alternatives
635
- @alternatives_index.each do |id,alt|
636
- anc[id] = anc[alt] if anc.include?(alt)
637
- des[id] = des[alt] if des.include?(alt)
638
- end
639
- # Check structure
640
- if ![:atomic,:sparse].include? structType
641
- structType = structType == :circular ? :circular : :hierarchical
642
- end
643
- # Store
644
- @ancestors_index = anc
645
- @descendants_index = des
646
- @structureType = structType
647
- end
648
- # Finish
649
- end
650
-
651
-
652
- # Find ancestors of a given term
653
- # ===== Parameters
654
- # +term+:: to be checked
655
- # +filter_alternatives+:: if true, remove alternatives from final results
656
- # ===== Returns
657
- # an array with all ancestors of given term or false if parents are not available yet
658
- def get_ancestors(term, filter_alternatives = false)
659
- return self.get_familiar(term, true, filter_alternatives)
660
- end
661
-
662
-
663
- # Find descendants of a given term
664
- # ===== Parameters
665
- # +term+:: to be checked
666
- # +filter_alternatives+:: if true, remove alternatives from final results
667
- # ===== Returns
668
- # an array with all descendants of given term or false if parents are not available yet
669
- def get_descendants(term, filter_alternatives = false)
670
- return self.get_familiar(term, false, filter_alternatives)
671
- end
672
-
673
-
674
- # Find ancestors/descendants of a given term
675
- # ===== Parameters
676
- # +term+:: to be checked
677
- # +return_ancestors+:: return ancestors if true or descendants if false
678
- # +filter_alternatives+:: if true, remove alternatives from final results
679
- # ===== Returns
680
- # an array with all ancestors/descendants of given term or nil if parents are not available yet
681
- def get_familiar(term, return_ancestors = true, filter_alternatives = false)
682
- # Find into parentals
683
- familiars = return_ancestors ? @ancestors_index[term] : @descendants_index[term]
684
- if !familiars.nil?
685
- familiars = familiars.clone
686
- if filter_alternatives
687
- familiars.reject!{|fm| @alternatives_index.include?(fm)}
688
- end
689
- else
690
- familiars = []
691
- end
692
- return familiars
693
- end
694
-
695
-
696
- # Obtain IC of an specific term
697
- # ===== Parameters
698
- # +term+:: which IC will be calculated
699
- # +type+:: of IC to be calculated. Default: resnik
700
- # +force+:: force re-calculate the IC. Do not check if it is already calculated
701
- # +zhou_k+:: special coeficient for Zhou IC method
702
- # ===== Returns
703
- # the IC calculated
704
- def get_IC(termRaw, type: :resnik, force: false, zhou_k: 0.5)
705
- term = termRaw.to_sym
706
- # Check
707
- raise ArgumentError, "IC type specified (#{type}) is not allowed" if !@@allowed_calcs[:ics].include?(type)
708
- # Check if it's already calculated
709
- return @ics[type][term] if (@ics[type].include? term) && !force
710
- # Calculate
711
- ic = - 1
712
- case type # https://arxiv.org/ftp/arxiv/papers/1310/1310.8059.pdf ||| https://sci-hub.st/https://doi.org/10.1016/j.eswa.2012.01.082
713
- ###########################################
714
- #### STRUCTURE BASED METRICS
715
- ###########################################
716
- # Shortest path
717
- # Weighted Link
718
- # Hirst and St-Onge Measure
719
- # Wu and Palmer
720
- # Slimani
721
- # Li
722
- # Leacock and Chodorow
723
- ###########################################
724
- #### INFORMATION CONTENT METRICS
725
- ###########################################
726
- when :resnik # Resnik P: Using Information Content to Evaluate Semantic Similarity in a Taxonomy
727
- # -log(Freq(x) / Max_Freq)
728
- ic = -Math.log10(@meta[term][:struct_freq].fdiv(@max_freqs[:struct_freq]))
729
- when :resnik_observed
730
- # -log(Freq(x) / Max_Freq)
731
- ic = -Math.log10(@meta[term][:observed_freq].fdiv(@max_freqs[:observed_freq]))
732
- # Lin
733
- # Jiang & Conrath
734
-
735
- ###########################################
736
- #### FEATURE-BASED METRICS
737
- ###########################################
738
- # Tversky
739
- # x-similarity
740
- # Rodirguez
741
-
742
- ###########################################
743
- #### HYBRID METRICS
744
- ###########################################
745
- when :seco, :zhou # SECO:: An intrinsic information content metric for semantic similarity in WordNet
746
- # 1 - ( log(hypo(x) + 1) / log(max_nodes) )
747
- ic = 1 - Math.log10(@meta[term][:struct_freq]).fdiv(Math.log10(@stanzas[:terms].length - @alternatives_index.length))
748
- if :zhou # New Model of Semantic Similarity Measuring in Wordnet
749
- # k*(IC_Seco(x)) + (1-k)*(log(depth(x))/log(max_depth))
750
- @ics[:seco][term] = ic # Special store
751
- ic = zhou_k * ic + (1.0 - zhou_k) * (Math.log10(@meta[term][:descendants]).fdiv(Math.log10(@max_freqs[:max_depth])))
752
- end
753
- when :sanchez # Semantic similarity estimation in the biomedical domain: An ontology-basedinformation-theoretic perspective
754
- ic = -Math.log10((@meta[term][:descendants].fdiv(@meta[term][:ancestors]) + 1.0).fdiv(@max_freqs[:max_depth] + 1.0))
755
- # Knappe
756
- end
757
- @ics[type][term] = ic
758
- return ic
759
- end
760
-
761
-
762
- # Calculates and return resnik ICs (by ontology and observed frequency) for observed terms
763
- # ===== Returns
764
- # two hashes with resnik and resnik_observed ICs for observed terms
765
- def get_observed_ics_by_onto_and_freq
766
- # Chech there are observed terms
767
- if @profiles.empty?
768
- resnik = {}
769
- resnik_observed = {}
770
- else
771
- # Calc ICs for all terms
772
- observed_terms = @profiles.values.flatten.uniq
773
- observed_terms.each{ |term| get_IC(term)}
774
- observed_terms.each{ |term| get_IC(term, type: :resnik_observed)}
775
- resnik = @ics[:resnik].select{|k,v| observed_terms.include?(k)}
776
- resnik_observed = @ics[:resnik_observed].select{|k,v| observed_terms.include?(k)}
777
- end
778
- return resnik.clone, resnik_observed.clone
779
- end
780
-
781
-
782
- # Find the IC of the Most Index Content shared Ancestor (MICA) of two given terms
783
- # ===== Parameters
784
- # +termA+:: term to be cheked
785
- # +termB+:: term to be checked
786
- # +ic_type+:: IC formula to be used
787
- # ===== Returns
788
- # the IC of the MICA(termA,termB)
789
- def get_ICMICA(termA, termB, ic_type = :resnik)
790
- mica = self.get_MICA(termA, termB, ic_type)
791
- return mica.first.nil? ? nil : mica.last
792
- end
793
-
794
-
795
- # Find the Most Index Content shared Ancestor (MICA) of two given terms
796
- # ===== Parameters
797
- # +termA+:: term to be cheked
798
- # +termB+:: term to be checked
799
- # +ic_type+:: IC formula to be used
800
- # ===== Returns
801
- # the MICA(termA,termB) and it's IC
802
- def get_MICA(termA, termB, ic_type = :resnik)
803
- termA = @alternatives_index[termA] if @alternatives_index.include?(termA)
804
- termB = @alternatives_index[termB] if @alternatives_index.include?(termB)
805
- mica = [nil,-1.0]
806
- # Special case
807
- if termA.eql?(termB)
808
- ic = self.get_IC(termA, type: ic_type)
809
- mica = [termA, ic]
810
- else
811
- # Obtain ancestors (include itselfs too)
812
- anc_A = self.get_ancestors(termA)
813
- anc_B = self.get_ancestors(termB)
814
-
815
- if !(anc_A.empty? && anc_B.empty?)
816
- anc_A << termA
817
- anc_B << termB
818
- # Find shared ancestors
819
- shared_ancestors = anc_A & anc_B
820
- # Find MICA
821
- if shared_ancestors.length > 0
822
- shared_ancestors.each do |anc|
823
- ic = self.get_IC(anc, type: ic_type)
824
- # Check
825
- mica = [anc,ic] if ic > mica[1]
826
- end
827
- end
828
- end
829
- end
830
- return mica
831
- end
832
-
833
-
834
- # Calculate similarity between two given terms
835
- # ===== Parameters
836
- # +termsA+:: to be compared
837
- # +termsB+:: to be compared
838
- # +type+:: similitude formula to be used
839
- # +ic_type+:: IC formula to be used
840
- # ===== Returns
841
- # the similarity between both sets or false if frequencies are not available yet
842
- def get_similarity(termA, termB, type: :resnik, ic_type: :resnik)
843
- # Check
844
- raise ArgumentError, "SIM type specified (#{type}) is not allowed" if !@@allowed_calcs[:sims].include?(type)
845
- sim = nil
846
- # Launch comparissons
847
- sim_res = get_ICMICA(termA, termB, ic_type)
848
- if !sim_res.nil?
849
- case type
850
- when :resnik
851
- sim = sim_res
852
- when :lin
853
- sim = (2.0 * sim_res).fdiv(self.get_IC(termA,type: ic_type) + self.get_IC(termB,type: ic_type))
854
- when :jiang_conrath # This is not a similarity, this is a disimilarity (distance)
855
- sim = (self.get_IC(termA, type: ic_type) + self.get_IC(termB, type: ic_type)) - (2.0 * sim_res)
856
- end
857
- end
858
- return sim
859
- end
860
-
861
-
862
- # Method used to load information stored into an OBO file and store it into this object.
863
- # If a file is specified by input parameter, current @file value is updated
864
- # ===== Parameters
865
- # +file+:: optional file to update object stored file
866
- def load(file, build: true)
867
- _, header, stanzas = self.class.load_obo(file)
868
- @header = header
869
- @stanzas = stanzas
870
- self.remove_removable()
871
- # @removable_terms.each{|removableID| @stanzas[:terms].delete(removableID)} if !@removable_terms.empty? # Remove if proceed
872
- self.build_index() if build
873
- end
874
-
875
- #
876
- def remove_removable()
877
- @removable_terms.each{|removableID| @stanzas[:terms].delete(removableID)} if !@removable_terms.empty? # Remove if proceed
878
- end
879
-
880
-
881
- # Exports an OBO_Handler object in json format
882
- # ===== Parameters
883
- # +file+:: where info will be stored
884
- def write(file)
885
- # Take object stored info
886
- obj_info = {header: @header,
887
- stanzas: @stanzas,
888
- ancestors_index: @ancestors_index,
889
- descendants_index: @descendants_index,
890
- alternatives_index: @alternatives_index,
891
- obsoletes_index: @obsoletes_index,
892
- structureType: @structureType,
893
- ics: @ics,
894
- meta: @meta,
895
- special_tags: @special_tags,
896
- max_freqs: @max_freqs,
897
- dicts: @dicts,
898
- profiles: @profiles,
899
- profilesDict: @profilesDict,
900
- items: @items,
901
- removable_terms: @removable_terms,
902
- term_paths: @term_paths}
903
- # Convert to JSON format & write
904
- File.open(file, "w") { |f| f.write obj_info.to_json }
905
- end
906
-
907
-
908
- def is_number? string
909
- true if Float(string) rescue false
910
- end
911
-
912
-
913
- # Read a JSON file with an OBO_Handler object stored
914
- # ===== Parameters
915
- # +file+:: with object info
916
- # ===== Return
917
- # OBO_Handler internal fields
918
- def read(file)
919
- # Read file
920
- jsonFile = File.open(file)
921
- jsonInfo = JSON.parse(jsonFile.read, :symbolize_names => true)
922
- # Pre-process (Symbolize some hashs values)
923
- jsonInfo[:stanzas][:terms].map{|id,info| self.class.symbolize_ids(info)} # STANZAS
924
- jsonInfo[:stanzas][:typedefs].map{|id,info| self.class.symbolize_ids(info)}
925
- jsonInfo[:stanzas][:instances].map{|id,info| self.class.symbolize_ids(info)}
926
- jsonInfo[:alternatives_index] = jsonInfo[:alternatives_index].map{|id,value| [id, value.to_sym]}.to_h
927
- jsonInfo[:ancestors_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}}
928
- jsonInfo[:descendants_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}}
929
- jsonInfo[:obsoletes_index] = jsonInfo[:obsoletes_index].map{|id,value| [id, value.to_sym]}.to_h
930
- jsonInfo[:dicts] = jsonInfo[:dicts].each do |flag, dictionaries|
931
- # Special case: byTerm
932
- dictionaries[:byTerm] = dictionaries[:byTerm].map do |term, value|
933
- if !term.to_s.scan(/\A[-+]?[0-9]*\.?[0-9]+\Z/).empty? # Numeric dictionary
934
- [term.to_s.to_i, value.map{|term| term.to_sym}]
935
- elsif value.is_a? Numeric # Numeric dictionary
936
- [term.to_sym, value]
937
- elsif value.kind_of?(Array) && flag == :is_a
938
- [term.to_sym, value.map{|v| v.to_sym}]
939
- else
940
- [term.to_sym, value]
941
- end
942
- end
943
- dictionaries[:byTerm] = dictionaries[:byTerm].to_h
944
- # By value
945
- dictionaries[:byValue] = dictionaries[:byValue].map do |value, term|
946
- if value.is_a? Numeric # Numeric dictionary
947
- [value, term.to_sym]
948
- elsif term.is_a? Numeric # Numeric dictionary
949
- [value.to_s.to_sym, term]
950
- elsif flag == :is_a
951
- [value.to_sym, term.map{|v| v.to_sym}]
952
- elsif term.kind_of?(Array)
953
- [value.to_sym, term.map{|t| t.to_sym}]
954
- else
955
- [value.to_s, term.to_sym]
956
- end
957
- end
958
- dictionaries[:byValue] = dictionaries[:byValue].to_h
959
- end
960
- jsonInfo[:profiles].map{|id,terms| terms.map!{|term| term.to_sym}}
961
- jsonInfo[:profiles].keys.map{|id| jsonInfo[:profiles][id.to_s.to_i] = jsonInfo[:profiles].delete(id) if self.is_number?(id.to_s)}
962
- jsonInfo[:profilesDict].map{|term,ids| ids.map!{|id| id.to_sym if !id.is_a?(Numeric)}}
963
- jsonInfo[:removable_terms] = jsonInfo[:removable_terms].map{|term| term.to_sym}
964
- jsonInfo[:special_tags] = jsonInfo[:special_tags].each do |k, v|
965
- if v.kind_of?(Array)
966
- jsonInfo[:special_tags][k] = v.map{|tag| tag.to_sym}
967
- else
968
- jsonInfo[:special_tags][k] = v.to_sym
969
- end
970
- end
971
- jsonInfo[:items].each{|k,v| jsonInfo[:items][k] = v.map{|item| item.to_sym}}
972
- jsonInfo[:term_paths].each{|term,info| jsonInfo[:term_paths][term][:paths] = info[:paths].map{|path| path.map{|t| t.to_sym}}}
973
- # Store info
974
- @header = jsonInfo[:header]
975
- @stanzas = jsonInfo[:stanzas]
976
- @ancestors_index = jsonInfo[:ancestors_index]
977
- @descendants_index = jsonInfo[:descendants_index]
978
- @alternatives_index = jsonInfo[:alternatives_index]
979
- @obsoletes_index = jsonInfo[:obsoletes_index]
980
- @structureType = jsonInfo[:structureType].to_sym
981
- @ics = jsonInfo[:ics]
982
- @meta = jsonInfo[:meta]
983
- @special_tags = jsonInfo[:special_tags]
984
- @max_freqs = jsonInfo[:max_freqs]
985
- @dicts = jsonInfo[:dicts]
986
- @profiles = jsonInfo[:profiles]
987
- @profilesDict = jsonInfo[:profilesDict]
988
- @items = jsonInfo[:items]
989
- @removable_terms = jsonInfo[:removable_terms]
990
- @term_paths = jsonInfo[:term_paths]
991
- end
992
-
993
-
994
- # Check if a given ID is stored as term into this object
995
- # ===== Parameters
996
- # +id+:: to be checked
997
- # ===== Return
998
- # True if term is allowed or false in other cases
999
- def exists? id
1000
- return stanzas[:terms].include?(id)
1001
- end
1002
-
1003
-
1004
- # This method assumes that a text given contains an allowed ID. And will try to obtain it splitting it
1005
- # ===== Parameters
1006
- # +text+:: to be checked
1007
- # ===== Return
1008
- # The correct ID if it can be found or nil in other cases
1009
- def extract_id(text, splitBy: ' ')
1010
- if self.exists?(text)
1011
- return text
1012
- else
1013
- splittedText = text.to_s.split(splitBy).first.to_sym
1014
- return self.exists?(splittedText) ? splittedText : nil
1015
- end
1016
- end
1017
-
1018
-
1019
- # Generate a bidirectinal dictionary set using a specific tag and terms stanzas set
1020
- # This functions stores calculated dictionary into @dicts field.
1021
- # This functions stores first value for multivalue tags
1022
- # This function does not handle synonyms for byValue dictionaries
1023
- # ===== Parameters
1024
- # +tag+:: to be used to calculate dictionary
1025
- # +select_regex+:: gives a regfex that can be used to modify value to be stored
1026
- # +substitute_alternatives+:: flag used to indicate if alternatives must, or not, be replaced by it official ID
1027
- # +store_tag+:: flag used to store dictionary. If nil, mandatory tag given will be used
1028
- # +multiterm+:: if true, byValue will allows multi-term linkage (array)
1029
- # +self_type_references+:: if true, program assumes that refrences will be between Ontology terms, and it term IDs will be checked
1030
- # ===== Return
1031
- # void. And stores calcualted bidirectional dictonary into dictionaries main container
1032
- def calc_dictionary(tag, select_regex: nil, substitute_alternatives: true, store_tag: nil, multiterm: false, self_type_references: false)
1033
- tag = tag.to_sym
1034
- store_tag = tag if store_tag.nil?
1035
- if @stanzas[:terms].empty?
1036
- warn('Terms are not already loaded. Aborting dictionary calc')
1037
- else
1038
- byTerm = {}
1039
- byValue = {}
1040
- # Calc per term
1041
- @stanzas[:terms].each do |term, tags|
1042
- referenceTerm = term
1043
- if @alternatives_index.include?(term) && substitute_alternatives # Special case
1044
- referenceTerm = @alternatives_index[term] if !@obsoletes_index.include?(@alternatives_index[term])
1045
- end
1046
- queryTag = tags[tag]
1047
- if !queryTag.nil?
1048
- # Pre-process
1049
- if !select_regex.nil?
1050
- if queryTag.kind_of?(Array)
1051
- queryTag = queryTag.map{|value| value.scan(select_regex).first}
1052
- queryTag.flatten!
1053
- else
1054
- queryTag = queryTag.scan(select_regex).first
1055
- end
1056
- queryTag.compact!
1057
- end
1058
- if queryTag.kind_of?(Array) # Store
1059
- if !queryTag.empty?
1060
- if byTerm.include?(referenceTerm)
1061
- byTerm[referenceTerm] = (byTerm[referenceTerm] + queryTag).uniq
1062
- else
1063
- byTerm[referenceTerm] = queryTag
1064
- end
1065
- if multiterm
1066
- queryTag.each do |value|
1067
- byValue[value] = [] if byValue[value].nil?
1068
- byValue[value] << referenceTerm
1069
- end
1070
- else
1071
- queryTag.each{|value| byValue[value] = referenceTerm}
1072
- end
1073
- end
1074
- else
1075
- if byTerm.include?(referenceTerm)
1076
- byTerm[referenceTerm] = (byTerm[referenceTerm] + [queryTag]).uniq
1077
- else
1078
- byTerm[referenceTerm] = [queryTag]
1079
- end
1080
- if multiterm
1081
- byValue[queryTag] = [] if byValue[queryTag].nil?
1082
- byValue[queryTag] << referenceTerm
1083
- else
1084
- byValue[queryTag] = referenceTerm
1085
- end
1086
- end
1087
- end
1088
- end
1089
-
1090
- # Check self-references
1091
- if self_type_references
1092
- byTerm.map do |term, references|
1093
- corrected_references = references.map do |t|
1094
- checked = self.extract_id(t)
1095
- if checked.nil?
1096
- t
1097
- else
1098
- byValue[checked] = byValue.delete(t) if checked != t && !byValue.keys.include?(checked) # Update in byValue
1099
- checked
1100
- end
1101
- end
1102
- byTerm[term] = corrected_references.uniq
1103
- end
1104
- end
1105
-
1106
- # Check order
1107
- byTerm.map do |term,values|
1108
- if self.exists?(term)
1109
- referenceValue = @stanzas[:terms][term][tag]
1110
- if !referenceValue.nil?
1111
- if !select_regex.nil?
1112
- if referenceValue.kind_of?(Array)
1113
- referenceValue = referenceValue.map{|value| value.scan(select_regex).first}
1114
- referenceValue.flatten!
1115
- else
1116
- referenceValue = referenceValue.scan(select_regex).first
1117
- end
1118
- referenceValue.compact!
1119
- end
1120
- if self_type_references
1121
- if referenceValue.kind_of?(Array)
1122
- aux = referenceValue.map{|t| self.extract_id(t)}
1123
- else
1124
- aux = self.extract_id(referenceValue)
1125
- end
1126
- referenceValue = aux if !aux.nil?
1127
- end
1128
- referenceValue = [referenceValue] if !referenceValue.kind_of?(Array)
1129
- byTerm[term] = referenceValue + (values - referenceValue)
1130
- end
1131
- end
1132
- end
1133
-
1134
- # Store
1135
- @dicts[store_tag] = {byTerm: byTerm, byValue: byValue}
1136
- end
1137
- end
1138
-
1139
-
1140
- # Calculates :is_a dictionary without alternatives substitution
1141
- def calc_ancestors_dictionary
1142
- self.calc_dictionary(:is_a, substitute_alternatives: false, self_type_references: true, multiterm: true)
1143
- end
1144
-
1145
-
1146
- # Translate a given value using an already calcualted dictionary
1147
- # ===== Parameters
1148
- # +toTranslate+:: value to be translated using dictiontionary
1149
- # +tag+:: used to generate the dictionary
1150
- # +byValue+:: boolean flag to indicate if dictionary must be used values as keys or terms as keys. Default: values as keys = true
1151
- # ===== Return
1152
- # translation
1153
- def translate(toTranslate, tag, byValue: true)
1154
- dict = byValue ? @dicts[tag][:byValue] : @dicts[tag][:byTerm]
1155
- toTranslate = get_main_id(toTranslate) if !byValue
1156
- return dict[toTranslate]
1157
- end
1158
-
1159
-
1160
- # Translate a name given
1161
- # ===== Parameters
1162
- # +name+:: to be translated
1163
- # ===== Return
1164
- # translated name or nil if it's not stored into this ontology
1165
- def translate_name(name)
1166
- term = self.translate(name, :name)
1167
- term = self.translate(name, :synonym) if term.nil?
1168
- return term
1169
- end
1170
-
1171
-
1172
- # Translate several names and return translations and a list of names which couldn't be translated
1173
- # ===== Parameters
1174
- # +names+:: array to be translated
1175
- # ===== Return
1176
- # two arrays with translations and names which couldn't be translated respectively
1177
- def translate_names(names)
1178
- translated = []
1179
- rejected = []
1180
- names.each do |name|
1181
- tr = self.translate_name(name)
1182
- if tr.nil?
1183
- rejected << name
1184
- else
1185
- translated << tr
1186
- end
1187
- end
1188
- return translated, rejected
1189
- end
1190
-
1191
-
1192
- # Translates a given ID to it assigned name
1193
- # ===== Parameters
1194
- # +id+:: to be translated
1195
- # ===== Return
1196
- # main name or nil if it's not included into this ontology
1197
- def translate_id(id)
1198
- name = self.translate(id, :name, byValue: false)
1199
- return name.nil? ? nil : name.first
1200
- end
1201
-
1202
-
1203
- # Translates several IDs and returns translations and not allowed IDs list
1204
- # ===== Parameters
1205
- # +ids+:: to be translated
1206
- # ===== Return
1207
- # two arrays with translations and names which couldn't be translated respectively
1208
- def translate_ids(ids)
1209
- translated = []
1210
- rejected = []
1211
- ids.each do |term_id|
1212
- tr = self.translate_id(term_id.to_sym)
1213
- if !tr.nil?
1214
- translated << tr
1215
- else
1216
- rejected << tr
1217
- end
1218
- end
1219
- return translated, rejected
1220
- end
1221
-
1222
-
1223
- # ===== Returns
1224
- # the main ID assigned to a given ID. If it's a non alternative/obsolete ID itself will be returned
1225
- # ===== Parameters
1226
- # +id+:: to be translated
1227
- # ===== Return
1228
- # main ID related to a given ID. Returns nil if given ID is not an allowed ID
1229
- def get_main_id(id)
1230
- return nil if !@stanzas[:terms].include? id
1231
- new_id = id
1232
- mainID = @alternatives_index[id]
1233
- new_id = mainID if !mainID.nil? & !@obsoletes_index.include?(mainID)
1234
- return new_id
1235
- end
1236
-
1237
-
1238
- # Check a pull of IDs and return allowed IDs removing which are not official terms on this ontology
1239
- # ===== Parameters
1240
- # +ids+:: to be checked
1241
- # ===== Return
1242
- # two arrays whit allowed and rejected IDs respectively
1243
- def check_ids(ids, substitute: true)
1244
- checked_codes = []
1245
- rejected_codes = []
1246
- ids.each do |id|
1247
- if @stanzas[:terms].include? id
1248
- if substitute
1249
- checked_codes << self.get_main_id(id)
1250
- else
1251
- checked_codes << id
1252
- end
1253
- else
1254
- rejected_codes << id
1255
- end
1256
- end
1257
- return checked_codes, rejected_codes
1258
- end
1259
-
1260
-
1261
- # Stores a given profile with an specific ID. If ID is already assigend to a profile, it will be replaced
1262
- # ===== Parameters
1263
- # +id+:: assigned to profile
1264
- # +terms+:: array of terms
1265
- # +substitute+:: subsstitute flag from check_ids
1266
- def add_profile(id, terms, substitute: true)
1267
- warn("Profile assigned to ID (#{id}) is going to be replaced") if @profiles.include? id
1268
- correct_terms, rejected_terms = self.check_ids(terms, substitute: substitute)
1269
- if !rejected_terms.empty?
1270
- warn('Given terms contains erroneus IDs. These IDs will be removed')
1271
- end
1272
- if id.is_a? Numeric
1273
- @profiles[id] = correct_terms
1274
- else
1275
- @profiles[id.to_sym] = correct_terms
1276
- end
1277
- end
1278
-
1279
-
1280
- # Method used to store a pull of profiles
1281
- # ===== Parameters
1282
- # +profiles+:: array/hash of profiles to be stored. If it's an array, numerical IDs will be assigned starting at 1
1283
- # +calc_metadata+:: if true, launch calc_profiles_dictionary process
1284
- # +reset_stored+:: if true, remove already stored profiles
1285
- # +substitute+:: subsstitute flag from check_ids
1286
- def load_profiles(profiles, calc_metadata: true, reset_stored: false, substitute: false)
1287
- self.reset_profiles if reset_stored
1288
- # Check
1289
- if profiles.kind_of?(Array)
1290
- profiles.each_with_index do |items, i|
1291
- self.add_profile(i, items.map {|item| item.to_sym}, substitute: substitute)
1292
- end
1293
- else # Hash
1294
- if !profiles.keys.select{|id| @profiles.include?(id)}.empty?
1295
- warn('Some profiles given are already stored. Stored version will be replaced')
1296
- end
1297
- profiles.each{|id, prof| self.add_profile(id, prof, substitute: substitute)}
1298
- end
1299
-
1300
- self.add_observed_terms_from_profiles(reset: true)
1301
-
1302
- if calc_metadata
1303
- self.calc_profiles_dictionary
1304
- end
1305
- end
1306
-
1307
-
1308
- # Internal method used to remove already stored profiles and restore observed frequencies
1309
- def reset_profiles
1310
- # Clean profiles storage
1311
- @profiles = {}
1312
- # Reset frequency observed
1313
- @meta.each{|term,info| info[:observed_freq] = 0}
1314
- @max_freqs[:observed_freq] = 0
1315
- end
1316
-
1317
-
1318
- # ===== Returns
1319
- # profiles assigned to a given ID
1320
- # ===== Parameters
1321
- # +id+:: profile ID
1322
- # ===== Return
1323
- # specific profile or nil if it's not stored
1324
- def get_profile(id)
1325
- return @profiles[id]
1326
- end
1327
-
1328
-
1329
- # ===== Returns
1330
- # an array of sizes for all stored profiles
1331
- # ===== Return
1332
- # array of profile sizes
1333
- def get_profiles_sizes()
1334
- return @profiles.map{|id,terms| terms.length}
1335
- end
1336
-
1337
-
1338
- # ===== Returns
1339
- # mean size of stored profiles
1340
- # ===== Parameters
1341
- # +round_digits+:: number of digits to round result. Default: 4
1342
- # ===== Returns
1343
- # mean size of stored profiles
1344
- def get_profiles_mean_size(round_digits: 4)
1345
- sizes = self.get_profiles_sizes
1346
- return sizes.inject(0){|sum, n| sum + n}.fdiv(@profiles.length).round(round_digits)
1347
- end
1348
-
1349
-
1350
- # Calculates profiles sizes and returns size assigned to percentile given
1351
- # ===== Parameters
1352
- # +perc+:: percentile to be returned
1353
- # +increasing_sort+:: flag to indicate if sizes order must be increasing. Default: false
1354
- # ===== Returns
1355
- # values assigned to percentile asked
1356
- def get_profile_length_at_percentile(perc=50, increasing_sort: false)
1357
- prof_lengths = self.get_profiles_sizes.sort
1358
- prof_lengths.reverse! if !increasing_sort
1359
- n_profiles = prof_lengths.length
1360
- percentile_index = ((perc * (n_profiles - 1)).fdiv(100) - 0.5).round # Take length which not overpass percentile selected
1361
- percentile_index = 0 if percentile_index < 0 # Special case (caused by literal calc)
1362
- return prof_lengths[percentile_index]
1363
- end
1364
-
1365
-
1366
- # Translate a given profile to terms names
1367
- # ===== Parameters
1368
- # +prof+:: array of terms to be translated
1369
- # ===== Returns
1370
- # array of translated terms. Can include nils if some IDs are not allowed
1371
- def profile_names(prof)
1372
- return prof.map{|term| self.translate_id(term)}
1373
- end
1374
-
1375
-
1376
- # Trnaslates a bunch of profiles to it sets of term names
1377
- # ===== Parameters
1378
- # +profs+:: array of profiles
1379
- # +asArray+:: flag to indicate if results must be returned as: true => an array of tuples [ProfID, ArrayOdNames] or ; false => hashs of translations
1380
- # ===== Returns
1381
- # translated profiles
1382
- def translate_profiles_ids(profs = [], asArray: true)
1383
- profs = @profiles if profs.empty?
1384
- profs = profs.each_with_index.map{|terms, index| [index, terms]}.to_h if profs.kind_of?(Array)
1385
- profs_names = profs.map{|id, terms| [id, self.profile_names(terms)]}.to_h
1386
- return asArray ? profs_names.values : profs_names
1387
- end
1388
-
1389
-
1390
- # Includes as "observed_terms" all terms included into stored profiles
1391
- # ===== Parameters
1392
- # +reset+:: if true, reset observed freqs alreeady stored befor re-calculate
1393
- def add_observed_terms_from_profiles(reset: false)
1394
- @meta.each{|term, freqs| freqs[:observed_freq] = -1} if reset
1395
- @profiles.each{|id, terms| self.add_observed_terms(terms: terms)}
1396
- end
1397
-
1398
-
1399
- # Get a term frequency
1400
- # ===== Parameters
1401
- # +term+:: term to be checked
1402
- # +type+:: type of frequency to be returned. Allowed: [:struct_freq, :observed_freq]
1403
- # ===== Returns
1404
- # frequency of term given or nil if term is not allowed
1405
- def get_frequency(term, type: :struct_freq)
1406
- queryFreq = @meta[term]
1407
- return queryFreq.nil? ? nil : queryFreq[type]
1408
- end
1409
-
1410
-
1411
- # Geys structural frequency of a term given
1412
- # ===== Parameters
1413
- # +term+:: to be checked
1414
- # ===== Returns
1415
- # structural frequency of given term or nil if term is not allowed
1416
- def get_structural_frequency(term)
1417
- return self.get_frequency(term, type: :struct_freq)
1418
- end
1419
-
1420
-
1421
- # Gets observed frequency of a term given
1422
- # ===== Parameters
1423
- # +term+:: to be checked
1424
- # ===== Returns
1425
- # observed frequency of given term or nil if term is not allowed
1426
- def get_observed_frequency(term)
1427
- return self.get_frequency(term, type: :observed_freq)
1428
- end
1429
-
1430
-
1431
- # Calculates frequencies of stored profiles terms
1432
- # ===== Parameters
1433
- # +ratio+:: if true, frequencies will be returned as ratios between 0 and 1.
1434
- # +literal+:: if true, literal terms will be used to calculate frequencies instead translate alternative terms
1435
- # +asArray+:: used to transform returned structure format from hash of Term-Frequency to an array of tuples [Term, Frequency]
1436
- # +translate+:: if true, term IDs will be translated to
1437
- # ===== Returns
1438
- # stored profiles terms frequencies
1439
- def get_profiles_terms_frequency(ratio: true, literal: true, asArray: true, translate: true)
1440
- n_profiles = @profiles.length
1441
- if literal
1442
- freqs = {}
1443
- @profiles.each do |id, terms|
1444
- terms.each do |literalTerm|
1445
- if freqs.include?(literalTerm)
1446
- freqs[literalTerm] += 1
1447
- else
1448
- freqs[literalTerm] = 1
1449
- end
1450
- end
1451
- end
1452
- if (ratio || translate)
1453
- aux_keys = freqs.keys
1454
- aux_keys.each do |term|
1455
- freqs[term] = freqs[term].fdiv(n_profiles) if ratio
1456
- if translate
1457
- tr = self.translate_id(term)
1458
- freqs[tr] = freqs.delete(term) if !tr.nil?
1459
- end
1460
- end
1461
- end
1462
- if asArray
1463
- freqs = freqs.map{|term, freq| [term, freq]}
1464
- freqs.sort!{|h1, h2| h2[1] <=> h1[1]}
1465
- end
1466
- else # Freqs translating alternatives
1467
- freqs = @meta.select{|id, freqs| freqs[:observed_freq] > 0}.map{|id, freqs| [id, ratio ? freqs[:observed_freq].fdiv(n_profiles) : freqs[:observed_freq]]}
1468
- freqs = freqs.to_h if !asArray
1469
- if translate
1470
- freqs = freqs.map do |term, freq|
1471
- tr = self.translate_id(term)
1472
- tr.nil? ? [term, freq] : [tr, freq]
1473
- end
1474
- end
1475
- if asArray
1476
- freqs = freqs.map{|term, freq| [term, freq]}
1477
- freqs.sort!{|h1, h2| h2[1] <=> h1[1]}
1478
- else
1479
- freqs = freqs.to_h
1480
- end
1481
- end
1482
- return freqs
1483
- end
1484
-
1485
-
1486
- # Clean a given profile returning cleaned set of terms and removed ancestors term.
1487
- # ===== Parameters
1488
- # +prof+:: array of terms to be checked
1489
- # ===== Returns
1490
- # two arrays, first is the cleaned profile and second is the removed elements array
1491
- def remove_ancestors_from_profile(prof)
1492
- ancestors = prof.map{|term| self.get_ancestors(term)}.flatten.uniq
1493
- redundant = prof.select{|term| ancestors.include?(term)}
1494
- return prof - redundant, redundant
1495
- end
1496
-
1497
-
1498
- # Remove alternative IDs if official ID is present. DOES NOT REMOVE synonyms or alternative IDs of the same official ID
1499
- # ===== Parameters
1500
- # +prof+:: array of terms to be checked
1501
- # ===== Returns
1502
- # two arrays, first is the cleaned profile and second is the removed elements array
1503
- def remove_alternatives_from_profile(prof)
1504
- alternatives = prof.select{|term| @alternatives_index.include?(term)}
1505
- redundant = alternatives.select{|alt_id| prof.include?(@alternatives_index[alt_id])}
1506
- return prof - redundant, redundant
1507
- end
1508
-
1509
-
1510
- # Remove alternatives (if official term is present) and ancestors terms of a given profile
1511
- # ===== Parameters
1512
- # +profile+:: profile to be cleaned
1513
- # +remove_alternatives+:: if true, clenaed profiles will replace already stored profiles
1514
- # ===== Returns
1515
- # cleaned profile
1516
- def clean_profile(profile, remove_alternatives: true)
1517
- terms_without_ancestors, _ = self.remove_ancestors_from_profile(profile)
1518
- if remove_alternatives
1519
- terms_without_ancestors_and_alternatices, _ = self.remove_alternatives_from_profile(terms_without_ancestors)
1520
- else
1521
- terms_without_ancestors_and_alternatices = terms_without_ancestors
1522
- end
1523
- return terms_without_ancestors_and_alternatices
1524
- end
1525
-
1526
-
1527
- # Remove alternatives (if official term is present) and ancestors terms of stored profiles
1528
- # ===== Parameters
1529
- # +store+:: if true, clenaed profiles will replace already stored profiles
1530
- # +remove_alternatives+:: if true, clenaed profiles will replace already stored profiles
1531
- # ===== Returns
1532
- # a hash with cleaned profiles
1533
- def clean_profiles(store: false, remove_alternatives: true)
1534
- cleaned_profiles = {}
1535
- @profiles.each{ |id, terms| cleaned_profiles[id] = self.clean_profile(terms, remove_alternatives: remove_alternatives)}
1536
- @profiles = cleaned_profiles if store
1537
- return cleaned_profiles
1538
- end
1539
-
1540
-
1541
- # Calculates number of ancestors present (redundant) in each profile stored
1542
- # ===== Returns
1543
- # array of parentals for each profile
1544
- def parentals_per_profile
1545
- cleaned_profiles = self.clean_profiles(remove_alternatives: false)
1546
- parentals = @profiles.map{ |id, terms| terms.length - cleaned_profiles[id].length}
1547
- return parentals
1548
- end
1549
-
1550
-
1551
- # Calculates mean IC of a given profile
1552
- # ===== Parameters
1553
- # +prof+:: profile to be checked
1554
- # +ic_type+:: ic_type to be used
1555
- # +zhou_k+:: special coeficient for Zhou IC method
1556
- # ===== Returns
1557
- # mean IC for a given profile
1558
- def get_profile_mean_IC(prof, ic_type: :resnik, zhou_k: 0.5)
1559
- return prof.map{|term| self.get_IC(term, type: ic_type, zhou_k: zhou_k)}.inject(0){|sum,x| sum + x}.fdiv(prof.length)
1560
- end
1561
-
1562
-
1563
- # Calculates resnik ontology, and resnik observed mean ICs for all profiles stored
1564
- # ===== Returns
1565
- # two hashes with Profiles and IC calculated for resnik and observed resnik respectively
1566
- def get_profiles_resnik_dual_ICs
1567
- struct_ics = {}
1568
- observ_ics = {}
1569
- @profiles.each do |id, terms|
1570
- struct_ics[id] = self.get_profile_mean_IC(terms, ic_type: :resnik)
1571
- observ_ics[id] = self.get_profile_mean_IC(terms, ic_type: :resnik_observed)
1572
- end
1573
- return struct_ics.clone, observ_ics.clone
1574
- end
1575
-
1576
-
1577
- # Calculates ontology structural levels for all ontology terms
1578
- # ===== Parameters
1579
- # +calc_paths+:: calculates term paths if it's not already calculated
1580
- # +shortest_path+:: if true, level is calculated with shortest path, largest path will be used in other cases
1581
- def calc_term_levels(calc_paths: false, shortest_path: true)
1582
- if @term_paths.empty?
1583
- if calc_paths
1584
- self.calc_term_paths
1585
- else
1586
- warn('Term paths are not already loaded. Aborting dictionary calc')
1587
- end
1588
- end
1589
- if !@term_paths.empty?
1590
- byTerm = {}
1591
- byValue = {}
1592
- # Calc per term
1593
- @term_paths.each do |term, info|
1594
- level = shortest_path ? info[:shortest_path] : info[:largest_path]
1595
- if level.nil?
1596
- level = -1
1597
- else
1598
- level = level.round(0)
1599
- end
1600
- byTerm[term] = level
1601
- queryLevels = byValue[level]
1602
- if queryLevels.nil?
1603
- byValue[level] = [term]
1604
- else
1605
- byValue[level] << term
1606
- end
1607
- end
1608
- @dicts[:level] = {byTerm: byValue, byValue: byTerm} # Note: in this case, value has multiplicity and term is unique value
1609
- # Update maximum depth
1610
- @max_freqs[:max_depth] = byValue.keys.max
1611
- end
1612
- end
1613
-
1614
-
1615
- # Check if a term given is marked as obsolete
1616
- def is_obsolete? term
1617
- return @obsoletes_index.include?(term)
1618
- end
1619
-
1620
- # Check if a term given is marked as alternative
1621
- def is_alternative? term
1622
- return @alternatives_index.include?(term)
1623
- end
1624
-
1625
- # Find paths of a term following it ancestors and stores all possible paths for it and it's parentals.
1626
- # Also calculates paths metadata and stores into @term_paths
1627
- def calc_term_paths
1628
- self.calc_ancestors_dictionary if !@dicts.keys.include?(:is_a) # Calculate direct parentals dictionary if it's not already calculated
1629
- visited_terms = []
1630
- @term_paths = {}
1631
- if [:hierarchical, :sparse].include? @structureType
1632
- terms = @stanzas[:terms].keys
1633
- terms.each do |term|
1634
- if self.is_obsolete?(term) || self.is_alternative?(term) # Special case (obsoletes)
1635
- special_term = term
1636
- term = self.is_obsolete?(term) ? @obsoletes_index[term] : @alternatives_index[term]
1637
- @term_paths[term] = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} if !@term_paths.include?(term)
1638
- @term_paths[special_term] = @term_paths[term]
1639
- visited_terms << special_term
1640
- end
1641
-
1642
- if !visited_terms.include?(term)
1643
- @term_paths[term] = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} if !@term_paths.include?(term)
1644
- parentals = @dicts[:is_a][:byTerm][term]
1645
- if parentals.nil?
1646
- @term_paths[term][:paths] << [term]
1647
- else
1648
- parentals.each do |direct_parental|
1649
- if visited_terms.include? direct_parental # Use direct_parental already calculated paths
1650
- new_paths = @term_paths[direct_parental][:paths].map{|path| [term, path].flatten}
1651
- else # Calculate new paths
1652
- self.expand_path(direct_parental, visited_terms)
1653
- new_paths = @term_paths[direct_parental][:paths].map{|path| [term, path].flatten}
1654
- end
1655
- new_paths.each{|path| @term_paths[term][:paths] << path}
1656
- end
1657
- end
1658
- visited_terms << term
1659
- end
1660
- # Update metadata
1661
- @term_paths[term][:total_paths] = @term_paths[term][:paths].length
1662
- paths_sizes = @term_paths[term][:paths].map{|path| path.length}
1663
- @term_paths[term][:largest_path] = paths_sizes.max
1664
- @term_paths[term][:shortest_path] = paths_sizes.min
1665
- end
1666
- else
1667
- warn('Ontology structure must be hierarchical or sparse to calculate term levels. Aborting paths calculation')
1668
- end
1669
- end
1670
-
1671
-
1672
- # Recursive function whic finds paths of a term following it ancestors and stores all possible paths for it and it's parentals
1673
- # ===== Parameters
1674
- # +curr_term+:: current visited term
1675
- # +visited_terms+:: already expanded terms
1676
- def expand_path(curr_term, visited_terms)
1677
- if !visited_terms.include?(curr_term) # Not already expanded
1678
- @term_paths[curr_term] = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} if @term_paths[curr_term].nil?
1679
- direct_parentals = @dicts[:is_a][:byTerm][curr_term]
1680
- if direct_parentals.nil? # No parents :: End of recurrence
1681
- @term_paths[curr_term][:paths] << [curr_term]
1682
- else # Expand and concat
1683
- direct_parentals.each do |ancestor|
1684
- self.expand_path(ancestor,visited_terms) if !visited_terms.include?(ancestor)
1685
- new_paths = @term_paths[ancestor][:paths].map{|path| [curr_term, path].flatten}
1686
- new_paths.each{|path| @term_paths[curr_term][:paths] << path}
1687
- end
1688
- end
1689
- visited_terms << curr_term
1690
- end
1691
- end
1692
-
1693
-
1694
- # Gets ontology levels calculated
1695
- # ===== Returns
1696
- # ontology levels calculated
1697
- def get_ontology_levels
1698
- return @dicts[:level][:byTerm].clone # By term, in this case, is Key::Level, Value::Terms
1699
- end
1700
-
1701
-
1702
- # Gets ontology level of a specific term
1703
- # ===== Returns
1704
- # Term level
1705
- def get_term_level(term)
1706
- return @dicts[:level][:byValue][term]
1707
- end
1708
-
1709
-
1710
- # Return ontology levels from profile terms
1711
- # ===== Returns
1712
- # hash of term levels (Key: level; Value: array of term IDs)
1713
- def get_ontology_levels_from_profiles(uniq = true) # TODO: remove uniq and check dependencies
1714
- profiles_terms = @profiles.values.flatten
1715
- profiles_terms.uniq! if uniq
1716
- term_freqs_byProfile = {}
1717
- profiles_terms.each do |term|
1718
- query = term_freqs_byProfile[term]
1719
- if query.nil?
1720
- term_freqs_byProfile[term] = 1
1721
- else
1722
- term_freqs_byProfile[term] += 1
1723
- end
1724
- end
1725
- levels_filtered = @dicts[:level][:byTerm].map{|level, terms| [level,terms.map{|t| profiles_terms.include?(t) ? Array.new(term_freqs_byProfile[t], t) : nil}.flatten.compact]}.select{|level, filteredTerms| !filteredTerms.empty?}.to_h
1726
- return levels_filtered
1727
- end
1728
-
1729
-
1730
- # Calculate profiles dictionary with Key= Term; Value = Profiles
1731
- def calc_profiles_dictionary
1732
- if @profiles.empty?
1733
- warn('Profiles are not already loaded. Aborting dictionary calc')
1734
- else
1735
- byTerm = {} # Key: Terms
1736
- # byValue -- Key: Profile == @profiles
1737
- @profiles.each do |id, terms|
1738
- terms.each do |term|
1739
- if byTerm.include?(term)
1740
- byTerm[term] << id
1741
- else
1742
- byTerm[term] = [id]
1743
- end
1744
- end
1745
- end
1746
- @profilesDict = byTerm
1747
- end
1748
- end
1749
-
1750
-
1751
- # Gets profiles dictionary calculated
1752
- # ===== Return
1753
- # profiles dictionary (clone)
1754
- def get_terms_linked_profiles
1755
- return @profilesDict.clone
1756
- end
1757
-
1758
-
1759
- # Get related profiles to a given term
1760
- # ===== Parameters
1761
- # +term+:: to be checked
1762
- # ===== Returns
1763
- # profiles which contains given term
1764
- def get_term_linked_profiles(term)
1765
- return @profilesDict[term]
1766
- end
1767
-
1768
-
1769
- # Gets metainfo table from a set of terms
1770
- # ===== Parameters
1771
- # +terms+:: IDs to be expanded
1772
- # +filter_alternatives+:: flag to be used in get_descendants method
1773
- # ===== Returns
1774
- # an array with triplets [TermID, TermName, DescendantsNames]
1775
- def get_childs_table(terms, filter_alternatives = false)
1776
- expanded_terms = []
1777
- terms.each do |t|
1778
- expanded_terms << [[t, self.translate_id(t)], self.get_descendants(t, filter_alternatives).map{|child| [child, self.translate_id(child)]}]
1779
- end
1780
- return expanded_terms
1781
- end
1782
-
1783
-
1784
- # Store specific relations hash given into ITEMS structure
1785
- # ===== Parameters
1786
- # +relations+:: to be stored
1787
- # +remove_old_relations+:: substitute ITEMS structure instead of merge new relations
1788
- # +expand+:: if true, already stored keys will be updated with the unique union of both sets
1789
- def load_item_relations_to_terms(relations, remove_old_relations = false, expand = false)
1790
- @items = {} if remove_old_relations
1791
- if !relations.select{|term, items| !@stanzas[:terms].include?(term)}.empty?
1792
- warn('Some terms specified are not stored into this ontology. These not correct terms will be stored too')
1793
- end
1794
- if !remove_old_relations
1795
- if !relations.select{|term, items| @items.include?(term)}.empty? && !expand
1796
- warn('Some terms given are already stored. Stored version will be replaced')
1797
- end
1798
- end
1799
- if expand
1800
- relations.each do |k,v|
1801
- if @items.keys.include?(k)
1802
- @items[k] = (@items[k] + v).uniq
1803
- else
1804
- @items[k] = v
1805
- end
1806
- end
1807
- else
1808
- @items.merge!(relations)
1809
- end
1810
- end
1811
-
1812
-
1813
- # Assign a dictionary already calculated as a items set.
1814
- # ===== Parameters
1815
- # +dictID+:: dictionary ID to be stored (:byTerm will be used)
1816
- def set_items_from_dict(dictID, remove_old_relations = false)
1817
- @items = {} if remove_old_relations
1818
- if(@dicts.keys.include?(dictID))
1819
- @items.merge(@dicts[dictID][:byTerm])
1820
- else
1821
- warn('Specified ID is not calculated. Dict will not be added as a items set')
1822
- end
1823
- end
1824
-
1825
-
1826
- # This method computes childs similarity and impute items to it parentals. To do that Item keys must be this ontology allowed terms.
1827
- # Similarity will be calculated by text extact similarity unless an ontology object will be provided. In this case, MICAs will be used
1828
- # ===== Parameters
1829
- # +ontology+:: (Optional) ontology object which items given belongs
1830
- # +minimum_childs+:: minimum of childs needed to infer relations to parental. Default: 2
1831
- # +clean_profiles+:: if true, clena_profiles ontology method will be used over inferred profiles. Only if an ontology object is provided
1832
- # ===== Returns
1833
- # void and update items object
1834
- def expand_items_to_parentals(ontology: nil, minimum_childs: 2, clean_profiles: true)
1835
- # Check item keys
1836
- if @items.empty?
1837
- warn('Items have been not provided yet')
1838
- return nil
1839
- end
1840
- targetKeys = @items.keys.select{|k| self.exists?(k)}
1841
- if targetKeys.length == 0
1842
- warn('Any item key is allowed')
1843
- return nil
1844
- elsif targetKeys.length < @items.keys.length
1845
- warn('Some item keys are not allowed')
1846
- end
1847
-
1848
- # Expand to parentals
1849
- targetKeys << targetKeys.map{|t| self.get_ancestors(t, true)}
1850
- targetKeys.flatten!
1851
- targetKeys.uniq!
1852
-
1853
- # Obtain levels (go from leaves to roots)
1854
- levels = targetKeys.map{|term| self.get_term_level(term)}
1855
- levels.compact!
1856
- levels.uniq!
1857
- levels.sort!
1858
- levels.reverse!
1859
- levels.shift # Leaves are not expandable
1860
-
1861
- # Expand from leaves to roots
1862
- levels.map do |lvl|
1863
- curr_keys = targetKeys.select{|k| self.get_term_level(k) == lvl}
1864
- curr_keys.map do |term_expand|
1865
- to_infer = []
1866
- # Obtain childs
1867
- childs = self.get_descendants(term_expand,true).select{|t| @items.keys.include?(t)}
1868
- # Expand
1869
- if childs.length > 0 && minimum_childs == 1 # Special case
1870
- to_infer = childs.map{|c| @items[c]}.flatten.compact.uniq
1871
- elsif childs.length >= minimum_childs
1872
- to_infer = Hash.new(0)
1873
- # Compare
1874
- while childs.length > 1
1875
- curr_term = childs.shift
1876
- childs.each do |compare_term|
1877
- pivot_items = @items[curr_term]
1878
- compare_items = @items[compare_term]
1879
- if ontology.nil? # Exact match
1880
- pivot_items.map do |pitem|
1881
- if compare_items.include?(pitem)
1882
- to_infer[pitem] += 2
1883
- end
1884
- end
1885
- else # Find MICAs
1886
- local_infer = Hash.new(0)
1887
- pivot_items.map do |pitem|
1888
- micas = compare_items.map{|citem| ontology.get_MICA(pitem, citem)}
1889
- maxmica = micas[0]
1890
- micas.each{|mica| maxmica = mica if mica.last > maxmica.last}
1891
- local_infer[maxmica.first] += 1
1892
- end
1893
- compare_items.map do |citem|
1894
- micas = pivot_items.map{|pitem| ontology.get_MICA(pitem, citem)}
1895
- maxmica = micas[0]
1896
- micas.each{|mica| maxmica = mica if mica.last > maxmica.last}
1897
- local_infer[maxmica.first] += 1
1898
- end
1899
- local_infer.each{|t,freq| to_infer[t] += freq if freq >= 2}
1900
- end
1901
- end
1902
- end
1903
- # Filter infer
1904
- to_infer = to_infer.select{|k,v| v >= minimum_childs}
1905
- end
1906
- # Infer
1907
- if to_infer.length > 0
1908
- @items[term_expand] = [] if @items[term_expand].nil?
1909
- if to_infer.kind_of?(Array)
1910
- @items[term_expand] = (@items[term_expand] + to_infer).uniq
1911
- else
1912
- @items[term_expand] = (@items[term_expand] + to_infer.keys).uniq
1913
- end
1914
- @items[term_expand] = ontology.clean_profile(@items[term_expand]) if clean_profiles && !ontology.nil?
1915
- elsif !@items.include?(term_expand)
1916
- targetKeys.delete(term_expand)
1917
- end
1918
- end
1919
- end
1920
- end
1921
-
1922
-
1923
-
1924
- # NO IDEA WHAT THIS DOES. DON'T USE THIS METHODS IS NOT CHECKED
1925
- # ===== Parameters
1926
- # ++::
1927
- # ===== Returns
1928
- # ...
1929
- def compute_relations_to_items(external_item_list, mode, thresold)
1930
- results = []
1931
- penalized_terms = {}
1932
- # terms_levels = get_terms_levels(@items_relations.keys)
1933
- terms_with_items_levels = @items_relations.keys.map{|term| self.get_term_level(term)}.uniq
1934
- terms_levels = self.get_ontology_levels().select{|k,v| terms_with_items_levels.include?(k)}
1935
- terms_levels = terms_levels.each{|level,terms| [level, terms.select{|t| @items_relations.keys.include?(t)}] } # Use only items terms. MAYBE IT'S NOT OUR TARGET (line added by fmj)
1936
- levels = terms_levels.keys.sort
1937
- levels.reverse_each do |level|
1938
- terms_levels[level].each do |term|
1939
- associated_items = @items_relations[term]
1940
- if mode == :elim
1941
- items_to_remove = penalized_terms[term]
1942
- items_to_remove = [] if items_to_remove.nil?
1943
- pval = get_fisher_exact_test(
1944
- external_item_list - items_to_remove,
1945
- associated_items - items_to_remove,
1946
- ((associated_items | external_item_list) - items_to_remove).length
1947
- )
1948
- if pval <= thresold
1949
- parents = get_parents(term) # Save the items for each parent term to remove them later in the fisher test
1950
- parents.each do |prnt|
1951
- query = penalized_terms[prnt]
1952
- if query.nil?
1953
- penalized_terms[prnt] = @items_relations[term].clone # We need a new array to store the following iterations
1954
- else
1955
- query.concat(@items_relations[term])
1956
- end
1957
- end
1958
- end
1959
- end
1960
- results << [term, pval]
1961
- end
1962
- end
1963
- return results
1964
- end
1965
-
1966
-
1967
- # Check if a given ID is a removable (blacklist) term.
1968
- # +DEPRECATED+ use is_removable? instead
1969
- # ===== Parameters
1970
- # +id+:: to be checked
1971
- # ===== Returns
1972
- # true if given term is a removable (blacklist) term or false in other cases
1973
- def is_removable(id)
1974
- warn "[DEPRECATION] `is_removable` is deprecated. Please use `is_removable?` instead."
1975
- return @removable_terms.include?(id.to_sym)
1976
- end
1977
-
1978
- # Check if a given ID is a removable (blacklist) term
1979
- # ===== Parameters
1980
- # +id+:: to be checked
1981
- # ===== Returns
1982
- # true if given term is a removable (blacklist) term or false in other cases
1983
- def is_removable? id
1984
- return @removable_terms.include?(id.to_sym)
1985
- end
1986
-
1987
- ############################################
1988
- # SPECIAL METHODS
1989
- #############################################
1990
- def ==(other)
1991
- self.header == other.header &&
1992
- self.stanzas == other.stanzas &&
1993
- self.ancestors_index == other.ancestors_index &&
1994
- self.alternatives_index == other.alternatives_index &&
1995
- self.obsoletes_index == other.obsoletes_index &&
1996
- self.structureType == other.structureType &&
1997
- self.ics == other.ics &&
1998
- self.meta == other.meta &&
1999
- self.dicts == other.dicts &&
2000
- self.profiles == other.profiles &&
2001
- self.profilesDict == other.profilesDict &&
2002
- (self.items.keys - other.items.keys).empty? &&
2003
- self.removable_terms == other.removable_terms &&
2004
- self.special_tags == other.special_tags &&
2005
- self.items == other.items &&
2006
- self.term_paths == other.term_paths &&
2007
- self.max_freqs == other.max_freqs
7
+ #########################################################
8
+ # AUTHOR NOTES
9
+ #########################################################
10
+
11
+ # 1 - Store @profiles as @stanzas[:instances]
12
+ # 2 - Items values (not keys) are imported as strings, not as symbols (maybe add a flag which indicates if values are, or not, symbols?)
13
+
14
+
15
+ #############################################
16
+ # FIELDS
17
+ #############################################
18
+ # Handled class variables
19
+ # => @@basic_tags :: hash with main OBO structure tags
20
+ # => @@allowed_calcs :: hash with allowed ICs and similaritites calcs
21
+ # => @@symbolizable_ids :: tags which can be symbolized
22
+ # => @@tags_with_trailing_modifiers :: tags which can include extra info after specific text modifiers
23
+ #
24
+ # Handled object variables
25
+ # => @header :: file header (if is available)
26
+ # => @stanzas :: OBO stanzas {:terms,:typedefs,:instances}
27
+ # => @ancestors_index :: hash of ancestors per each term handled with any structure relationships
28
+ # => @descendants_index :: hash of descendants per each term handled with any structure relationships
29
+ # => @alternatives_index :: has of alternative IDs (include alt_id and obsoletes)
30
+ # => @obsoletes_index :: hash of obsoletes and it's new ids
31
+ # => @special_tags :: set of special tags to be expanded (:is_a, :obsolete, :alt_id)
32
+ # => @structureType :: type of ontology structure depending on ancestors relationship. Allowed: {atomic, sparse, circular, hierarchical}
33
+ # => @ics :: already calculated ICs for handled terms and IC types
34
+ # => @meta :: meta_information about handled terms like [ancestors, descendants, struct_freq, observed_freq]
35
+ # => @max_freqs :: maximum freqs found for structural and observed freqs
36
+ # => @dicts :: bidirectional dictionaries with three levels <key|value>: 1º) <tag|hash2>; 2º) <(:byTerm/:byValue)|hash3>; 3º) dictionary <k|v>
37
+ # => @profiles :: set of terms assigned to an ID
38
+ # => @profilesDict :: set of profile IDs assigned to a term
39
+ # => @items :: hash with items relations to terms
40
+ # => @removable_terms :: array of terms to not be considered
41
+ # => @term_paths :: metainfo about parental paths of each term
42
+
43
+ @@basic_tags = {ancestors: [:is_a], obsolete: :is_obsolete, alternative: [:replaced_by,:consider,:alt_id]}
44
+ @@allowed_calcs = {ics: [:resnik, :resnik_observed, :seco, :zhou, :sanchez], sims: [:resnik, :lin, :jiang_conrath]}
45
+ @@symbolizable_ids = [:id, :alt_id, :replaced_by, :consider]
46
+ @@tags_with_trailing_modifiers = [:is_a, :union_of, :disjoint_from, :relationship, :subsetdef, :synonymtypedef, :property_value]
47
+ @@multivalue_tags = [:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider, :subsetdef, :synonymtypedef, :property_value, :remark]
48
+ @@symbolizable_ids.concat(@@tags_with_trailing_modifiers)
49
+
50
+ #############################################
51
+ # CONSTRUCTOR
52
+ #############################################
53
+
54
+ # Instantiate a OBO_Handler object
55
+ # ===== Parameters
56
+ # +file+:: with info to be loaded (.obo ; .json)
57
+ # +load_file+:: activate load process automatically
58
+ # +removable_terms+: term to be removed from calcs
59
+ # +build+: flag to launch metainfo calculation
60
+ # +file_format+: force format type despite file extension. Can be :obo or :json
61
+ def initialize(file: nil, load_file: false, removable_terms: [], build: true, file_format: nil)
62
+ # Initialize object variables
63
+ @header = nil
64
+ @stanzas = {terms: {}, typedefs: {}, instances: {}}
65
+ @ancestors_index = {}
66
+ @descendants_index = {}
67
+ @alternatives_index = {}
68
+ @obsoletes_index = {}
69
+ @structureType = nil
70
+ @ics = Hash[@@allowed_calcs[:ics].map{|ictype| [ictype, {}]}]
71
+ @meta = {}
72
+ @special_tags = @@basic_tags.clone
73
+ @max_freqs = {:struct_freq => -1.0, :observed_freq => -1.0, :max_depth => -1.0}
74
+ @dicts = {}
75
+ @profiles = {}
76
+ @profilesDict = {}
77
+ @items = {}
78
+ @removable_terms = []
79
+ @term_paths = {}
80
+ add_removable_terms(removable_terms) if !removable_terms.empty?
81
+ load_file = true unless file.nil? # This should remove load_file argument, keep it for old scripts
82
+ # Load if proceeds
83
+ if load_file
84
+ fformat = file_format
85
+ fformat = File.extname(file) if fformat.nil? && !file.nil?
86
+ if fformat == :obo || fformat == ".obo"
87
+ load(file, build: build)
88
+ elsif fformat == :json || fformat == ".json"
89
+ self.read(file, build: build)
90
+ elsif !fformat.nil?
91
+ warn 'Format not allowed. Loading process will not be performed'
92
+ end
93
+ end
94
+ end
95
+
96
+
97
+ #############################################
98
+ # CLASS METHODS
99
+ #############################################
100
+
101
+ # Expand a (starting) term using a specific tag and return all extended terms into an array and
102
+ # the relationship structuture observed (hierarchical or circular). If circular structure is
103
+ # foumd, extended array will be an unique vector without starting term (no loops).
104
+ # +Note+: we extremly recomend use get_related_ids_by_tag function instead of it (directly)
105
+ # ===== Parameters
106
+ # +start+:: term where start to expand
107
+ # +terms+:: set to be used to expand
108
+ # +target_tag+:: tag used to expand
109
+ # +eexpansion+:: already expanded info
110
+ # +split_info_char+:: special regex used to split info (if it is necessary)
111
+ # +split_info_indx+:: special index to take splitted info (if it is necessary)
112
+ # +alt_ids+:: set of alternative IDs
113
+ # ===== Returns
114
+ # A vector with the observed structure (string) and the array with extended terms.
115
+ def self.get_related_ids(start_id, terms, target_tag, related_ids = {}, alt_ids = {})
116
+ # Take start_id term available info and already accumulated info
117
+ current_associations = related_ids[start_id]
118
+ current_associations = [] if current_associations.nil?
119
+ return [:no_term,[]] if terms[start_id].nil?
120
+ id_relations = terms[start_id][target_tag]
121
+ return [:source,[]] if id_relations.nil?
122
+
123
+ # Prepare auxiliar variables
124
+ struct = :hierarchical
125
+
126
+ # Study direct extensions
127
+ id_relations = id_relations.clone
128
+ while id_relations.length > 0
129
+ id = id_relations.shift
130
+ id = alt_ids[id].first if alt_ids.include?(id) # NOTE: if you want to persist current ID instead source ID, re-implement this
131
+
132
+ # Handle
133
+ if current_associations.include?(id) # Check if already have been included into this expansion
134
+ struct = :circular
135
+ else
136
+ current_associations << id
137
+ if related_ids.include?(id) # Check if current already has been expanded
138
+ current_associations = current_associations | related_ids[id]
139
+ if current_associations.include?(start_id) # Check circular case
140
+ struct = :circular
141
+ [id, start_id].each{|repeated| current_associations.delete(repeated)}
142
+ end
143
+ else # Expand
144
+ related_ids[start_id] = current_associations
145
+ structExp, current_related_ids = self.get_related_ids(id, terms, target_tag, related_ids, alt_ids) # Expand current
146
+ current_associations = current_associations | current_related_ids
147
+ struct = :circular if structExp == :circular # Check struct
148
+ if current_associations.include?(start_id) # Check circular case
149
+ struct = :circular
150
+ current_associations.delete(start_id)
151
+ end
152
+ end
153
+ end
154
+ end
155
+ related_ids[start_id] = current_associations
156
+
157
+ return struct, current_associations
158
+ end
159
+
160
+
161
+ # Expand terms using a specific tag and return all extended terms into an array and
162
+ # the relationship structuture observed (hierarchical or circular). If circular structure is
163
+ # foumd, extended array will be an unique vector without starting term (no loops)
164
+ # ===== Parameters
165
+ # +terms+:: set to be used to expand
166
+ # +target_tag+:: tag used to expand
167
+ # +split_info_char+:: special regex used to split info (if it is necessary)
168
+ # +split_info_indx+:: special index to take splitted info (if it is necessary)
169
+ # +alt_ids+:: set of alternative IDs
170
+ # +obsoletes+:: integer with the number of obsolete IDs. used to calculate structure type.
171
+ # ===== Returns
172
+ # A vector with the observed structure (string) and the hash with extended terms
173
+ def self.get_related_ids_by_tag(terms:,target_tag:, alt_ids: {}, obsoletes: 0)
174
+ # Define structure type
175
+ structType = :hierarchical
176
+ related_ids = {}
177
+ terms.each do |id, tags|
178
+ # Check if target tag is defined
179
+ if !tags[target_tag].nil?
180
+ # Obtain related terms
181
+ set_structure, _ = self.get_related_ids(id, terms, target_tag, related_ids, alt_ids)
182
+ # Check structure
183
+ structType = :circular if set_structure == :circular
184
+ end
185
+ end
186
+
187
+ # Check special case
188
+ structType = :atomic if related_ids.length <= 0
189
+ structType = :sparse if related_ids.length > 0 && ((terms.length - related_ids.length - obsoletes) >= 2)
190
+ # Return type and hash with related_ids
191
+ return structType, related_ids
192
+ end
193
+
194
+
195
+ # Class method to transform string with <tag : info> into hash structure
196
+ # ===== Parameters
197
+ # +attributes+:: array tuples with info to be transformed into hash format
198
+ # ===== Returns
199
+ # Attributes stored into hash structure
200
+ def self.info2hash(attributes, split_char = " ! ", selected_field = 0)
201
+ # Load info
202
+ info_hash = {}
203
+ # Only TERMS multivalue tags (future add Typedefs and Instance)
204
+ # multivalue_tags = [:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider]
205
+ attributes.each do |tag, value|
206
+ value.gsub!(/{source=[\\\":A-Za-z0-9\/\.\-, =]+} /, '') if tag == 'is_a' # To delete "source" attributes in is_a tag of MONDO ontology
207
+ # Check
208
+ raise EncodingError, 'Info element incorrect format' if (tag.nil?) || (value.nil?)
209
+ # Prepare
210
+ tag = tag.lstrip.to_sym
211
+ value.lstrip!
212
+ value = value.split(split_char)[selected_field].to_sym if @@tags_with_trailing_modifiers.include?(tag)
213
+
214
+ # Store
215
+ query = info_hash[tag]
216
+ if !query.nil? # Tag already exists
217
+ if !query.kind_of?(Array) # Check that tag is multivalue
218
+ raise('Attempt to concatenate plain text with another. The tag is not declared as multivalue. [' + tag.to_s + '](' + query + ')')
219
+ else
220
+ query << value # Add new value to tag
221
+ end
222
+ else # New entry
223
+ if @@multivalue_tags.include?(tag)
224
+ info_hash[tag] = [value]
225
+ else
226
+ info_hash[tag] = value
227
+ end
228
+ end
229
+ end
230
+ self.symbolize_ids(info_hash)
231
+ return info_hash
232
+ end
233
+
234
+
235
+ # Class method to load an OBO format file (based on OBO 1.4 format). Specially focused on load
236
+ # the Header, the Terms, the Typedefs and the Instances.
237
+ # ===== Parameters
238
+ # +file+:: OBO file to be loaded
239
+ # ===== Returns
240
+ # Hash with FILE, HEADER and STANZAS info
241
+ def self.load_obo(file) #TODO: Send to obo_parser class
242
+ raise("File is not defined") if file.nil?
243
+ # Data variables
244
+ header = ''
245
+ stanzas = {terms: {}, typedefs: {}, instances: {}}
246
+ # Auxiliar variables
247
+ infoType = 'Header'
248
+ currInfo = []
249
+ stanzas_flags = %w[[Term] [Typedef] [Instance]]
250
+ # Read file
251
+ File.open(file).each do |line|
252
+ line.chomp!
253
+ next if line.empty?
254
+ fields = line.split(':', 2)
255
+ # Check if new instance is found
256
+ if stanzas_flags.include?(line)
257
+ header = self.process_entity(header, infoType, stanzas, currInfo)
258
+ # Update info variables
259
+ currInfo = []
260
+ infoType = line.gsub!(/[\[\]]/, '')
261
+ next
262
+ end
263
+ # Concat info
264
+ currInfo << fields
265
+ end
266
+ # Store last loaded info
267
+ header = self.process_entity(header, infoType, stanzas, currInfo) if !currInfo.empty?
268
+
269
+ # Prepare to return
270
+ finfo = {:file => file, :name => File.basename(file, File.extname(file))}
271
+ return finfo, header, stanzas
272
+ end
273
+
274
+
275
+ # Handle OBO loaded info and stores it into correct container and format
276
+ # ===== Parameters
277
+ # +header+:: container
278
+ # +infoType+:: current ontology item type detected
279
+ # +stanzas+:: container
280
+ # +currInfo+:: info to be stored
281
+ # ===== Returns
282
+ # header newly/already stored
283
+ def self.process_entity(header, infoType, stanzas, currInfo)
284
+ info = self.info2hash(currInfo)
285
+ # Store current info
286
+ if infoType.eql?('Header')
287
+ header = info
288
+ else
289
+ id = info[:id]
290
+ case infoType
291
+ when 'Term'
292
+ stanzas[:terms][id] = info
293
+ when 'Typedef'
294
+ stanzas[:typedefs][id] = info
295
+ when 'Instance'
296
+ stanzas[:instances][id] = info
297
+ end
298
+ end
299
+ return header
300
+ end
301
+
302
+
303
+ # Symboliza all values into hashs using symbolizable tags as keys
304
+ # ===== Parameters
305
+ # +item_hash+:: hash to be checked
306
+ def self.symbolize_ids(item_hash)
307
+ @@symbolizable_ids.each do |tag|
308
+ query = item_hash[tag]
309
+ if !query.nil?
310
+ if query.kind_of?(Array)
311
+ query.map!{|item| item.to_sym}
312
+ else
313
+ item_hash[tag] = query.to_sym if !query.nil?
314
+ end
315
+ end
316
+ end
317
+ end
318
+
319
+
320
+ #
321
+ # ===== Parameters
322
+ # +root+:: main term to expand
323
+ # +ontology+:: to be cutted
324
+ # +clone+:: if true, given ontology object will not be mutated
325
+ # +remove_up+:: if true, stores only the root term given an it descendants. If false, only root ancestors will be stored
326
+ # ===== Returns
327
+ # An Ontology object with terms after cut the ontology.
328
+ def self.mutate(root, ontology, clone: true, remove_up: true)
329
+ ontology = ontology.clone if clone
330
+ # Obtain affected IDs
331
+ descendants = ontology.descendants_index[root]
332
+ descendants << root # Store itself to do not remove it
333
+ # Remove unnecesary terms
334
+ ontology.stanzas[:terms] = ontology.stanzas[:terms].select{|id,v| remove_up ? descendants.include?(id) : !descendants.include?(id)}
335
+ ontology.ics = Hash[@@allowed_calcs[:ics].map{|ictype| [ictype, {}]}]
336
+ ontology.max_freqs = {:struct_freq => -1.0, :observed_freq => -1.0, :max_depth => -1.0}
337
+ ontology.dicts = {}
338
+ ontology.removable_terms = []
339
+ ontology.term_paths = {}
340
+ # Recalculate metadata
341
+ ontology.build_index
342
+ ontology.add_observed_terms_from_profiles
343
+ # Finish
344
+ return ontology
345
+ end
346
+
347
+
348
+
349
+ #############################################
350
+ # GENERAL METHODS
351
+ #############################################
352
+
353
+ # Include removable terms to current removable terms list
354
+ # ===== Parameters
355
+ # +terms+:: terms array to be concatenated
356
+ def add_removable_terms(terms)
357
+ terms = terms.map{|term| term.to_sym}
358
+ @removable_terms.concat(terms)
359
+ end
360
+
361
+
362
+ # Include removable terms to current removable terms list loading new
363
+ # terms from a one column plain text file
364
+ # ===== Parameters
365
+ # +file+:: to be loaded
366
+ def add_removable_terms_from_file(file)
367
+ File.open(excluded_codes_file).each do |line|
368
+ line.chomp!
369
+ @removable_terms << line.to_sym
370
+ end
371
+ end
372
+
373
+
374
+ # Increase observed frequency for a specific term
375
+ # ===== Parameters
376
+ # +term+:: term which frequency is going to be increased
377
+ # +increas+:: frequency rate to be increased. Default = 1
378
+ # ===== Return
379
+ # true if process ends without errors, false in other cases
380
+ def add_observed_term(term:,increase: 1.0)
381
+ # Check
382
+ raise ArgumentError, "Term given is NIL" if term.nil?
383
+ return false unless @stanzas[:terms].include?(term)
384
+ return false if @removable_terms.include?(term)
385
+ if @alternatives_index.include?(term)
386
+ alt_id = @alternatives_index[term]
387
+ @meta[alt_id] = {:ancestors => -1.0,:descendants => -1.0,:struct_freq => 0.0,:observed_freq => 0.0} if @meta[alt_id].nil?
388
+ @meta[term] = @meta[alt_id]
389
+ end
390
+ # Check if exists
391
+ @meta[term] = {:ancestors => -1.0,:descendants => -1.0,:struct_freq => 0.0,:observed_freq => 0.0} if @meta[term].nil?
392
+ # Add frequency
393
+ @meta[term][:observed_freq] = 0 if @meta[term][:observed_freq] == -1
394
+ @meta[term][:observed_freq] += increase
395
+ # Check maximum frequency
396
+ @max_freqs[:observed_freq] = @meta[term][:observed_freq] if @max_freqs[:observed_freq] < @meta[term][:observed_freq]
397
+ return true
398
+ end
399
+
400
+
401
+ # Increase the arbitrary frequency of a given term set
402
+ # ===== Parameters
403
+ # +terms+:: set of terms to be updated
404
+ # +increase+:: amount to be increased
405
+ # +transform_to_sym+:: if true, transform observed terms to symbols. Default: false
406
+ # ===== Return
407
+ # true if process ends without errors and false in other cases
408
+ def add_observed_terms(terms:, increase: 1.0, transform_to_sym: false)
409
+ # Check
410
+ raise ArgumentError, 'Terms array given is NIL' if terms.nil?
411
+ raise ArgumentError, 'Terms given is not an array' if !terms.is_a? Array
412
+ # Add observations
413
+ if transform_to_sym
414
+ checks = terms.map{|id| self.add_observed_term(term: id.to_sym,increase: increase)}
415
+ else
416
+ checks = terms.map{|id| self.add_observed_term(term: id,increase: increase)}
417
+ end
418
+ return checks
419
+ end
420
+
421
+
422
+ # Compare to terms sets
423
+ # ===== Parameters
424
+ # +termsA+:: set to be compared
425
+ # +termsB+:: set to be compared
426
+ # +sim_type+:: similitude method to be used. Default: resnik
427
+ # +ic_type+:: ic type to be used. Default: resnik
428
+ # +bidirectional+:: calculate bidirectional similitude. Default: false
429
+ # ===== Return
430
+ # similitude calculated
431
+ def compare(termsA, termsB, sim_type: :resnik, ic_type: :resnik, bidirectional: true, store_mica: false)
432
+ # Check
433
+ raise ArgumentError, "Terms sets given are NIL" if termsA.nil? | termsB.nil?
434
+ raise ArgumentError, "Set given is empty. Aborting similarity calc" if termsA.empty? | termsB.empty?
435
+ micasA = []
436
+ # Compare A -> B
437
+ termsA.each do |tA|
438
+ micas = []
439
+ termsB.each do |tB|
440
+ if store_mica
441
+ value = @mica_index.dig(tA, tB)
442
+ else
443
+ value = nil
444
+ end
445
+ if value.nil?
446
+ value = self.get_similarity(tA, tB, type: sim_type, ic_type: ic_type)
447
+ if store_mica
448
+ value = true if value.nil? # We use true to save that the operation was made but there is not mica value
449
+ add2nestHash(@mica_index, tA, tB, value)
450
+ end
451
+ end
452
+ micas << value if value.class == Float
453
+ end
454
+ if !micas.empty?
455
+ micasA << micas.max # Obtain maximum value
456
+ else
457
+ micasA << 0
458
+ end
459
+ end
460
+ means_sim = micasA.inject{ |sum, el| sum + el }.fdiv(micasA.size)
461
+ # Compare B -> A
462
+ if bidirectional
463
+ means_simA = means_sim * micasA.size
464
+ means_simB = self.compare(termsB, termsA, sim_type: sim_type, ic_type: ic_type, bidirectional: false, store_mica: store_mica) * termsB.size
465
+ means_sim = (means_simA + means_simB).fdiv(termsA.size + termsB.size)
466
+ end
467
+ # Return
468
+ return means_sim
469
+ end
470
+
471
+ def add2nestHash(h, key1, key2, val)
472
+ query1 = h[key1]
473
+ if query1.nil?
474
+ h[key1] = {key2 => val}
475
+ else
476
+ query1[key2] = val
477
+ end
478
+ end
479
+
480
+ # Compare internal stored profiles against another set of profiles. If an external set is not provided, internal profiles will be compared with itself
481
+ # ===== Parameters
482
+ # +external_profiles+:: set of external profiles. If nil, internal profiles will be compared with itself
483
+ # +sim_type+:: similitude method to be used. Default: resnik
484
+ # +ic_type+:: ic type to be used. Default: resnik
485
+ # +bidirectional+:: calculate bidirectional similitude. Default: false
486
+ # ===== Return
487
+ # Similitudes calculated
488
+ def compare_profiles(external_profiles: nil, sim_type: :resnik, ic_type: :resnik, bidirectional: true)
489
+ profiles_similarity = {} #calculate similarity between patients profile
490
+ profiles_ids = @profiles.keys
491
+ if external_profiles.nil?
492
+ comp_ids = profiles_ids
493
+ comp_profiles = @profiles
494
+ main_ids = comp_ids
495
+ main_profiles = comp_profiles
496
+ else
497
+ comp_ids = external_profiles.keys
498
+ comp_profiles = external_profiles
499
+ main_ids = profiles_ids
500
+ main_profiles = @profiles
501
+ end
502
+ # Compare
503
+ @mica_index = {}
504
+ while !main_ids.empty?
505
+ curr_id = main_ids.shift
506
+ current_profile = main_profiles[curr_id]
507
+ comp_ids.each do |id|
508
+ profile = comp_profiles[id]
509
+ value = compare(current_profile, profile, sim_type: sim_type, ic_type: ic_type, bidirectional: bidirectional, store_mica: true)
510
+ query = profiles_similarity[curr_id]
511
+ if query.nil?
512
+ profiles_similarity[curr_id] = {id => value}
513
+ else
514
+ query[id] = value
515
+ end
516
+ end
517
+ end
518
+ return profiles_similarity
519
+ end
520
+
521
+
522
+ # Expand alternative IDs arround all already stored terms
523
+ # ===== Parameters
524
+ # +alt_tag+:: tag used to expand alternative IDs
525
+ # ===== Returns
526
+ # true if process ends without errors and false in other cases
527
+ def get_index_alternatives(alt_tag: @@basic_tags[:alternative].last)
528
+ # Check input
529
+ raise('stanzas terms empty') if @stanzas[:terms].empty?
530
+ # Take all alternative IDs
531
+ alt_ids2add = {}
532
+ @stanzas[:terms].each do |id, tags|
533
+ if id == tags[:id] # Avoid simulated alternative terms
534
+ # id = tags[:id] # Take always real ID in case of alternative terms simulted
535
+ alt_ids = tags[alt_tag]
536
+ if !alt_ids.nil?
537
+ alt_ids = alt_ids - @removable_terms - [id]
538
+ # Update info
539
+ alt_ids.each do |alt_term|
540
+ @alternatives_index[alt_term] = id
541
+ alt_ids2add[alt_term] = @stanzas[:terms][id] if !@stanzas[:terms].include?(alt_term)
542
+ @ancestors_index[alt_term] = @ancestors_index[id] if !@ancestors_index[id].nil?
543
+ end
544
+ end
545
+ end
546
+ end
547
+ @stanzas[:terms].merge!(alt_ids2add)
548
+ end
549
+
550
+
551
+ # Executes basic expansions of tags (alternatives, obsoletes and parentals) with default values
552
+ # ===== Returns
553
+ # true if eprocess ends without errors and false in other cases
554
+ def build_index()
555
+ self.get_index_obsoletes
556
+ self.get_index_alternatives
557
+ self.get_index_child_parent_relations
558
+ @alternatives_index.each{|k,v| @alternatives_index[k] = self.extract_id(v)}
559
+ ## @alternatives_index.map {|k,v| @alternatives_index[k] = self.stanzas[:terms][v][:id] if k == v} unless self.stanzas[:terms].empty?
560
+ @alternatives_index.compact!
561
+ @obsoletes_index.each{|k,v| @obsoletes_index[k] = self.extract_id(v)}
562
+ @obsoletes_index.compact!
563
+ @ancestors_index.each{|k,v| @ancestors_index[k] = v.map{|t| self.extract_id(t)}.compact}
564
+ @ancestors_index.compact!
565
+ @descendants_index.each{|k,v| @descendants_index[k] = v.map{|t| self.extract_id(t)}.compact}
566
+ @descendants_index.compact!
567
+ self.get_index_frequencies
568
+ self.calc_dictionary(:name)
569
+ self.calc_dictionary(:synonym, select_regex: /\"(.*)\"/)
570
+ self.calc_term_levels(calc_paths: true)
571
+ end
572
+
573
+
574
+ # Calculates regular frequencies based on ontology structure (using parentals)
575
+ # ===== Returns
576
+ # true if everything end without errors and false in other cases
577
+ def get_index_frequencies()
578
+ # Check
579
+ if @ancestors_index.empty?
580
+ warn('ancestors_index object is empty')
581
+ else
582
+ # Per each term, add frequencies
583
+ @stanzas[:terms].each do |id, tags|
584
+ if @alternatives_index.include?(id)
585
+ alt_id = @alternatives_index[id]
586
+ query = @meta[alt_id] # Check if exist
587
+ if query.nil?
588
+ query = {ancestors: 0.0, descendants: 0.0, struct_freq: 0.0, observed_freq: 0.0}
589
+ @meta[alt_id] = query
590
+ end
591
+ @meta[id] = query
592
+ # Note: alternative terms do not increase structural frequencies
593
+ else # Official term
594
+ query = @meta[id] # Check if exist
595
+ if query.nil?
596
+ query = {ancestors: 0.0, descendants: 0.0, struct_freq: 0.0, observed_freq: 0.0}
597
+ @meta[id] = query
598
+ end
599
+ # Store metadata
600
+ query[:ancestors] = @ancestors_index.include?(id) ? @ancestors_index[id].count{|anc| !@alternatives_index.include?(anc)}.to_f : 0.0
601
+ query[:descendants] = @descendants_index.include?(id) ? @descendants_index[id].count{|desc| !@alternatives_index.include?(desc)}.to_f : 0.0
602
+ query[:struct_freq] = query[:descendants] + 1.0
603
+ # Update maximums
604
+ @max_freqs[:struct_freq] = query[:struct_freq] if @max_freqs[:struct_freq] < query[:struct_freq]
605
+ @max_freqs[:max_depth] = query[:descendants] if @max_freqs[:max_depth] < query[:descendants]
606
+ end
607
+ end
608
+ end
609
+ end
610
+
611
+
612
+ # Expand obsoletes set and link info to their alternative IDs
613
+ # ===== Parameters
614
+ # +obs_tags+:: tags to be used to find obsoletes
615
+ # +alt_tags+:: tags to find alternative IDs (if are available)
616
+ # +reset_obsoletes+:: flag to indicate if obsoletes set must be reset. Default: true
617
+ # ===== Returns
618
+ # true if process ends without errors and false in other cases
619
+ def get_index_obsoletes(obs_tag: @@basic_tags[:obsolete], alt_tags: @@basic_tags[:alternative])
620
+ if @stanzas[:terms].empty?
621
+ warn('stanzas terms empty')
622
+ else
623
+ # Check obsoletes
624
+ @stanzas[:terms].each do |id, term_tags|
625
+ next if term_tags.nil?
626
+ next if self.is_alternative?(id)
627
+ query = term_tags[obs_tag]
628
+ if !query.nil? && query == 'true' # Obsolete tag presence
629
+ next if !@obsoletes_index[id].nil? # Already stored
630
+ # Check if alternative value is available
631
+ alt_ids = alt_tags.map{|alt| term_tags[alt]}.compact
632
+ if !alt_ids.empty?
633
+ alt_id = alt_ids.first.first #FIRST tag, FIRST id
634
+ # Store
635
+ @alternatives_index[id] = alt_id
636
+ @obsoletes_index[id] = alt_id
637
+ end
638
+ end
639
+ end
640
+ end
641
+ end
642
+
643
+
644
+ # Expand parentals set and link all info to their alternative IDs. Also launch frequencies process
645
+ # ===== Parameters
646
+ # +tag+:: tag used to expand parentals
647
+ # +split_info_char+:: special regex used to split info (if it is necessary)
648
+ # +split_info_indx+:: special index to take splitted info (if it is necessary)
649
+ # ===== Returns
650
+ # true if process ends without errors and false in other cases
651
+ def get_index_child_parent_relations(tag: @@basic_tags[:ancestors][0])
652
+ # Check
653
+ if @stanzas[:terms].nil?
654
+ warn('stanzas terms empty')
655
+ else
656
+ # Expand
657
+ structType, parentals = self.class.get_related_ids_by_tag(terms: @stanzas[:terms],
658
+ target_tag: tag,
659
+ alt_ids: @alternatives_index,
660
+ obsoletes: @obsoletes_index.length)
661
+ # Check
662
+ raise('Error expanding parentals') if (structType.nil?) || parentals.nil?
663
+ # Prepare ancestors structure
664
+ anc = {}
665
+ des = {}
666
+ parentals.each do |id, parents|
667
+ parents = parents - @removable_terms
668
+ anc[id] = parents
669
+ parents.each do |anc_id| # Add descendants
670
+ if !des.include?(anc_id)
671
+ des[anc_id] = [id]
672
+ else
673
+ des[anc_id] << id
674
+ end
675
+ end
676
+ end
677
+ # Store alternatives
678
+ # @alternatives_index.each do |id,alt|
679
+ # anc[id] = anc[alt] if anc.include?(alt)
680
+ # des[id] = des[alt] if des.include?(alt)
681
+ # end
682
+ # Check structure
683
+ if ![:atomic,:sparse].include? structType
684
+ structType = structType == :circular ? :circular : :hierarchical
685
+ end
686
+ # Store
687
+ @ancestors_index = anc
688
+ @descendants_index = des
689
+ @structureType = structType
690
+ end
691
+ # Finish
692
+ end
693
+
694
+
695
+ # Find ancestors of a given term
696
+ # ===== Parameters
697
+ # +term+:: to be checked
698
+ # +filter_alternatives+:: if true, remove alternatives from final results
699
+ # ===== Returns
700
+ # an array with all ancestors of given term or false if parents are not available yet
701
+ def get_ancestors(term, filter_alternatives = false)
702
+ return self.get_familiar(term, true, filter_alternatives)
703
+ end
704
+
705
+
706
+ # Find descendants of a given term
707
+ # ===== Parameters
708
+ # +term+:: to be checked
709
+ # +filter_alternatives+:: if true, remove alternatives from final results
710
+ # ===== Returns
711
+ # an array with all descendants of given term or false if parents are not available yet
712
+ def get_descendants(term, filter_alternatives = false)
713
+ return self.get_familiar(term, false, filter_alternatives)
714
+ end
715
+
716
+
717
+ # Find ancestors/descendants of a given term
718
+ # ===== Parameters
719
+ # +term+:: to be checked
720
+ # +return_ancestors+:: return ancestors if true or descendants if false
721
+ # +filter_alternatives+:: if true, remove alternatives from final results
722
+ # ===== Returns
723
+ # an array with all ancestors/descendants of given term or nil if parents are not available yet
724
+ def get_familiar(term, return_ancestors = true, filter_alternatives = false)
725
+ # Find into parentals
726
+ familiars = return_ancestors ? @ancestors_index[term] : @descendants_index[term]
727
+ if !familiars.nil?
728
+ familiars = familiars.clone
729
+ if filter_alternatives
730
+ familiars.reject!{|fm| @alternatives_index.include?(fm)}
731
+ end
732
+ else
733
+ familiars = []
734
+ end
735
+ return familiars
736
+ end
737
+
738
+
739
+ # Obtain IC of an specific term
740
+ # ===== Parameters
741
+ # +term+:: which IC will be calculated
742
+ # +type+:: of IC to be calculated. Default: resnik
743
+ # +force+:: force re-calculate the IC. Do not check if it is already calculated
744
+ # +zhou_k+:: special coeficient for Zhou IC method
745
+ # ===== Returns
746
+ # the IC calculated
747
+ def get_IC(termRaw, type: :resnik, force: false, zhou_k: 0.5)
748
+ term = termRaw.to_sym
749
+ curr_ics = @ics[type]
750
+ # Check
751
+ raise ArgumentError, "IC type specified (#{type}) is not allowed" if !@@allowed_calcs[:ics].include?(type)
752
+ # Check if it's already calculated
753
+ return curr_ics[term] if (curr_ics.include? term) && !force
754
+ # Calculate
755
+ ic = - 1
756
+ term_meta = @meta[term]
757
+ case type # https://arxiv.org/ftp/arxiv/papers/1310/1310.8059.pdf ||| https://sci-hub.st/https://doi.org/10.1016/j.eswa.2012.01.082
758
+ ###########################################
759
+ #### STRUCTURE BASED METRICS
760
+ ###########################################
761
+ # Shortest path
762
+ # Weighted Link
763
+ # Hirst and St-Onge Measure
764
+ # Wu and Palmer
765
+ # Slimani
766
+ # Li
767
+ # Leacock and Chodorow
768
+ ###########################################
769
+ #### INFORMATION CONTENT METRICS
770
+ ###########################################
771
+ when :resnik # Resnik P: Using Information Content to Evaluate Semantic Similarity in a Taxonomy
772
+ # -log(Freq(x) / Max_Freq)
773
+ ic = -Math.log10(term_meta[:struct_freq].fdiv(@max_freqs[:struct_freq]))
774
+ when :resnik_observed
775
+ # -log(Freq(x) / Max_Freq)
776
+ ic = -Math.log10(term_meta[:observed_freq].fdiv(@max_freqs[:observed_freq]))
777
+ # Lin
778
+ # Jiang & Conrath
779
+
780
+ ###########################################
781
+ #### FEATURE-BASED METRICS
782
+ ###########################################
783
+ # Tversky
784
+ # x-similarity
785
+ # Rodirguez
786
+
787
+ ###########################################
788
+ #### HYBRID METRICS
789
+ ###########################################
790
+ when :seco, :zhou # SECO:: An intrinsic information content metric for semantic similarity in WordNet
791
+ # 1 - ( log(hypo(x) + 1) / log(max_nodes) )
792
+ ic = 1 - Math.log10(term_meta[:struct_freq]).fdiv(Math.log10(@stanzas[:terms].length - @alternatives_index.length))
793
+ if :zhou # New Model of Semantic Similarity Measuring in Wordnet
794
+ # k*(IC_Seco(x)) + (1-k)*(log(depth(x))/log(max_depth))
795
+ @ics[:seco][term] = ic # Special store
796
+ ic = zhou_k * ic + (1.0 - zhou_k) * (Math.log10(term_meta[:descendants]).fdiv(Math.log10(@max_freqs[:max_depth])))
797
+ end
798
+ when :sanchez # Semantic similarity estimation in the biomedical domain: An ontology-basedinformation-theoretic perspective
799
+ ic = -Math.log10((term_meta[:descendants].fdiv(term_meta[:ancestors]) + 1.0).fdiv(@max_freqs[:max_depth] + 1.0))
800
+ # Knappe
801
+ end
802
+ curr_ics[term] = ic
803
+ return ic
804
+ end
805
+
806
+
807
+ # Calculates and return resnik ICs (by ontology and observed frequency) for observed terms
808
+ # ===== Returns
809
+ # two hashes with resnik and resnik_observed ICs for observed terms
810
+ def get_observed_ics_by_onto_and_freq
811
+ # Chech there are observed terms
812
+ if @profiles.empty?
813
+ resnik = {}
814
+ resnik_observed = {}
815
+ else
816
+ # Calc ICs for all terms
817
+ observed_terms = @profiles.values.flatten.uniq
818
+ observed_terms.each{ |term| get_IC(term)}
819
+ observed_terms.each{ |term| get_IC(term, type: :resnik_observed)}
820
+ resnik = @ics[:resnik].select{|k,v| observed_terms.include?(k)}
821
+ resnik_observed = @ics[:resnik_observed].select{|k,v| observed_terms.include?(k)}
822
+ end
823
+ return resnik.clone, resnik_observed.clone
824
+ end
825
+
826
+
827
+ # Find the IC of the Most Index Content shared Ancestor (MICA) of two given terms
828
+ # ===== Parameters
829
+ # +termA+:: term to be cheked
830
+ # +termB+:: term to be checked
831
+ # +ic_type+:: IC formula to be used
832
+ # ===== Returns
833
+ # the IC of the MICA(termA,termB)
834
+ def get_ICMICA(termA, termB, ic_type = :resnik)
835
+ term, ic = self.get_MICA(termA, termB, ic_type)
836
+ return term.nil? ? nil : ic
837
+ end
838
+
839
+
840
+ # Find the Most Index Content shared Ancestor (MICA) of two given terms
841
+ # ===== Parameters
842
+ # +termA+:: term to be cheked
843
+ # +termB+:: term to be checked
844
+ # +ic_type+:: IC formula to be used
845
+ # ===== Returns
846
+ # the MICA(termA,termB) and it's IC
847
+ def get_MICA(termA, termB, ic_type = :resnik)
848
+ termA = @alternatives_index[termA] if @alternatives_index.include?(termA)
849
+ termB = @alternatives_index[termB] if @alternatives_index.include?(termB)
850
+ mica = [nil,-1.0]
851
+ # Special case
852
+ if termA.eql?(termB)
853
+ ic = self.get_IC(termA, type: ic_type)
854
+ mica = [termA, ic]
855
+ else
856
+ # Obtain ancestors (include itselfs too)
857
+ anc_A = self.get_ancestors(termA)
858
+ anc_B = self.get_ancestors(termB)
859
+ if !(anc_A.empty? && anc_B.empty?)
860
+ anc_A << termA
861
+ anc_B << termB
862
+ (anc_A & anc_B).each do |anc| # Find MICA in shared ancestors
863
+ ic = self.get_IC(anc, type: ic_type)
864
+ mica = [anc,ic] if ic > mica[1]
865
+ end
866
+ end
867
+ end
868
+ return mica
869
+ end
870
+
871
+
872
+ # Calculate similarity between two given terms
873
+ # ===== Parameters
874
+ # +termsA+:: to be compared
875
+ # +termsB+:: to be compared
876
+ # +type+:: similitude formula to be used
877
+ # +ic_type+:: IC formula to be used
878
+ # ===== Returns
879
+ # the similarity between both sets or false if frequencies are not available yet
880
+ def get_similarity(termA, termB, type: :resnik, ic_type: :resnik)
881
+ # Check
882
+ raise ArgumentError, "SIM type specified (#{type}) is not allowed" if !@@allowed_calcs[:sims].include?(type)
883
+ sim = nil
884
+ mica, sim_res = get_MICA(termA, termB, ic_type)
885
+ if !mica.nil?
886
+ case type
887
+ when :resnik
888
+ sim = sim_res
889
+ when :lin
890
+ sim = (2.0 * sim_res).fdiv(self.get_IC(termA,type: ic_type) + self.get_IC(termB,type: ic_type))
891
+ when :jiang_conrath # This is not a similarity, this is a disimilarity (distance)
892
+ sim = (self.get_IC(termA, type: ic_type) + self.get_IC(termB, type: ic_type)) - (2.0 * sim_res)
893
+ end
894
+ end
895
+ return sim
896
+ end
897
+
898
+
899
+ # Method used to load information stored into an OBO file and store it into this object.
900
+ # If a file is specified by input parameter, current @file value is updated
901
+ # ===== Parameters
902
+ # +file+:: optional file to update object stored file
903
+ def load(file, build: true)
904
+ _, header, stanzas = self.class.load_obo(file)
905
+ @header = header
906
+ @stanzas = stanzas
907
+ self.remove_removable()
908
+ # @removable_terms.each{|removableID| @stanzas[:terms].delete(removableID)} if !@removable_terms.empty? # Remove if proceed
909
+ self.build_index() if build
910
+ end
911
+
912
+ #
913
+ def remove_removable()
914
+ @removable_terms.each{|removableID| @stanzas[:terms].delete(removableID)} if !@removable_terms.empty? # Remove if proceed
915
+ end
916
+
917
+
918
+ # Exports an OBO_Handler object in json format
919
+ # ===== Parameters
920
+ # +file+:: where info will be stored
921
+ def write(file)
922
+ # Take object stored info
923
+ obj_info = {header: @header,
924
+ stanzas: @stanzas,
925
+ ancestors_index: @ancestors_index,
926
+ descendants_index: @descendants_index,
927
+ alternatives_index: @alternatives_index,
928
+ obsoletes_index: @obsoletes_index,
929
+ structureType: @structureType,
930
+ ics: @ics,
931
+ meta: @meta,
932
+ special_tags: @special_tags,
933
+ max_freqs: @max_freqs,
934
+ dicts: @dicts,
935
+ profiles: @profiles,
936
+ profilesDict: @profilesDict,
937
+ items: @items,
938
+ removable_terms: @removable_terms,
939
+ term_paths: @term_paths}
940
+ # Convert to JSON format & write
941
+ File.open(file, "w") { |f| f.write obj_info.to_json }
942
+ end
943
+
944
+
945
+ def is_number? string
946
+ true if Float(string) rescue false
947
+ end
948
+
949
+
950
+ # Read a JSON file with an OBO_Handler object stored
951
+ # ===== Parameters
952
+ # +file+:: with object info
953
+ # +file+:: if true, calculate indexes. Default: true
954
+ # ===== Return
955
+ # OBO_Handler internal fields
956
+ def read(file, build: true)
957
+ # Read file
958
+ jsonFile = File.open(file)
959
+ jsonInfo = JSON.parse(jsonFile.read, :symbolize_names => true)
960
+ # Pre-process (Symbolize some hashs values)
961
+ if !jsonInfo[:header].nil?
962
+ aux = jsonInfo[:header].map do |entry,info|
963
+ if info.kind_of?(Array) && @@symbolizable_ids.include?(entry)
964
+ [entry,info.map{|item| item.to_sym}]
965
+ else
966
+ [entry,info]
967
+ end
968
+ end
969
+ jsonInfo[:header] = aux.to_h
970
+ end
971
+ jsonInfo[:stanzas][:terms].map{|id,info| self.class.symbolize_ids(info)} # STANZAS
972
+ jsonInfo[:stanzas][:typedefs].map{|id,info| self.class.symbolize_ids(info)}
973
+ jsonInfo[:stanzas][:instances].map{|id,info| self.class.symbolize_ids(info)}
974
+ # Optional
975
+ jsonInfo[:alternatives_index] = jsonInfo[:alternatives_index].map{|id,value| [id, value.to_sym]}.to_h unless jsonInfo[:alternatives_index].nil?
976
+ jsonInfo[:ancestors_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}} unless jsonInfo[:ancestors_index].nil?
977
+ jsonInfo[:descendants_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}} unless jsonInfo[:descendants_index].nil?
978
+ jsonInfo[:obsoletes_index] = jsonInfo[:obsoletes_index].map{|id,value| [id, value.to_sym]}.to_h unless jsonInfo[:obsoletes_index].nil?
979
+ jsonInfo[:dicts] = jsonInfo[:dicts].each do |flag, dictionaries|
980
+ next if dictionaries.nil?
981
+ # Special case: byTerm
982
+ dictionaries[:byTerm] = dictionaries[:byTerm].map do |term, value|
983
+ if !term.to_s.scan(/\A[-+]?[0-9]*\.?[0-9]+\Z/).empty? # Numeric dictionary
984
+ [term.to_s.to_i, value.map{|term| term.to_sym}]
985
+ elsif value.is_a? Numeric # Numeric dictionary
986
+ [term.to_sym, value]
987
+ elsif value.kind_of?(Array) && flag == :is_a
988
+ [term.to_sym, value.map{|v| v.to_sym}]
989
+ else
990
+ [term.to_sym, value]
991
+ end
992
+ end
993
+ dictionaries[:byTerm] = dictionaries[:byTerm].to_h
994
+ # By value
995
+ dictionaries[:byValue] = dictionaries[:byValue].map do |value, term|
996
+ if value.is_a? Numeric # Numeric dictionary
997
+ [value, term.to_sym]
998
+ elsif term.is_a? Numeric # Numeric dictionary
999
+ [value.to_s.to_sym, term]
1000
+ elsif flag == :is_a
1001
+ [value.to_sym, term.map{|v| v.to_sym}]
1002
+ elsif term.kind_of?(Array)
1003
+ [value.to_sym, term.map{|t| t.to_sym}]
1004
+ else
1005
+ [value.to_s, term.to_sym]
1006
+ end
1007
+ end
1008
+ dictionaries[:byValue] = dictionaries[:byValue].to_h
1009
+ end
1010
+ if !jsonInfo[:profiles].nil?
1011
+ jsonInfo[:profiles].map{|id,terms| terms.map!{|term| term.to_sym}}
1012
+ jsonInfo[:profiles].keys.map{|id| jsonInfo[:profiles][id.to_s.to_i] = jsonInfo[:profiles].delete(id) if self.is_number?(id.to_s)}
1013
+ end
1014
+ jsonInfo[:profilesDict].map{|term,ids| ids.map!{|id| id.to_sym if !id.is_a?(Numeric)}} unless jsonInfo[:profilesDict].nil?
1015
+ jsonInfo[:removable_terms] = jsonInfo[:removable_terms].map{|term| term.to_sym} unless jsonInfo[:removable_terms].nil?
1016
+ jsonInfo[:special_tags] = jsonInfo[:special_tags].each do |k, v|
1017
+ next if v.nil?
1018
+ if v.kind_of?(Array)
1019
+ jsonInfo[:special_tags][k] = v.map{|tag| tag.to_sym}
1020
+ else
1021
+ jsonInfo[:special_tags][k] = v.to_sym
1022
+ end
1023
+ end
1024
+ jsonInfo[:items].each{|k,v| jsonInfo[:items][k] = v.map{|item| item.to_sym}} unless jsonInfo[:items].nil?
1025
+ jsonInfo[:term_paths].each{|term,info| jsonInfo[:term_paths][term][:paths] = info[:paths].map{|path| path.map{|t| t.to_sym}}} unless jsonInfo[:term_paths].nil?
1026
+
1027
+ # Store info
1028
+ @header = jsonInfo[:header]
1029
+ @stanzas = jsonInfo[:stanzas]
1030
+ @ancestors_index = jsonInfo[:ancestors_index]
1031
+ @descendants_index = jsonInfo[:descendants_index]
1032
+ @alternatives_index = jsonInfo[:alternatives_index]
1033
+ @obsoletes_index = jsonInfo[:obsoletes_index]
1034
+ jsonInfo[:structureType] = jsonInfo[:structureType].to_sym unless jsonInfo[:structureType].nil?
1035
+ @structureType = jsonInfo[:structureType]
1036
+ @ics = jsonInfo[:ics]
1037
+ @meta = jsonInfo[:meta]
1038
+ @special_tags = jsonInfo[:special_tags]
1039
+ @max_freqs = jsonInfo[:max_freqs]
1040
+ @dicts = jsonInfo[:dicts]
1041
+ @profiles = jsonInfo[:profiles]
1042
+ @profilesDict = jsonInfo[:profilesDict]
1043
+ @items = jsonInfo[:items]
1044
+ @removable_terms = jsonInfo[:removable_terms]
1045
+ @term_paths = jsonInfo[:term_paths]
1046
+
1047
+ self.build_index() if build
1048
+ end
1049
+
1050
+
1051
+ # Check if a given ID is stored as term into this object
1052
+ # ===== Parameters
1053
+ # +id+:: to be checked
1054
+ # ===== Return
1055
+ # True if term is allowed or false in other cases
1056
+ def exists? id
1057
+ return stanzas[:terms].include?(id)
1058
+ end
1059
+
1060
+
1061
+ # This method assumes that a text given contains an allowed ID. And will try to obtain it splitting it
1062
+ # ===== Parameters
1063
+ # +text+:: to be checked
1064
+ # ===== Return
1065
+ # The correct ID if it can be found or nil in other cases
1066
+ def extract_id(text, splitBy: ' ')
1067
+ if self.exists?(text)
1068
+ return text
1069
+ else
1070
+ splittedText = text.to_s.split(splitBy).first.to_sym
1071
+ return self.exists?(splittedText) ? splittedText : nil
1072
+ end
1073
+ end
1074
+
1075
+
1076
+ # Generate a bidirectinal dictionary set using a specific tag and terms stanzas set
1077
+ # This functions stores calculated dictionary into @dicts field.
1078
+ # This functions stores first value for multivalue tags
1079
+ # This function does not handle synonyms for byValue dictionaries
1080
+ # ===== Parameters
1081
+ # +tag+:: to be used to calculate dictionary
1082
+ # +select_regex+:: gives a regfex that can be used to modify value to be stored
1083
+ # +substitute_alternatives+:: flag used to indicate if alternatives must, or not, be replaced by it official ID
1084
+ # +store_tag+:: flag used to store dictionary. If nil, mandatory tag given will be used
1085
+ # +multiterm+:: if true, byValue will allows multi-term linkage (array)
1086
+ # +self_type_references+:: if true, program assumes that refrences will be between Ontology terms, and it term IDs will be checked
1087
+ # ===== Return
1088
+ # void. And stores calcualted bidirectional dictonary into dictionaries main container
1089
+ def calc_dictionary(tag, select_regex: nil, substitute_alternatives: true, store_tag: nil, multiterm: false, self_type_references: false)
1090
+ tag = tag.to_sym
1091
+ store_tag = tag if store_tag.nil?
1092
+ if @stanzas[:terms].empty?
1093
+ warn('Terms are not already loaded. Aborting dictionary calc')
1094
+ else
1095
+ byTerm = {}
1096
+ byValue = {}
1097
+ # Calc per term
1098
+ @stanzas[:terms].each do |term, tags|
1099
+ referenceTerm = term
1100
+ if @alternatives_index.include?(term) && substitute_alternatives # Special case
1101
+ referenceTerm = @alternatives_index[term] if !@obsoletes_index.include?(@alternatives_index[term])
1102
+ end
1103
+ queryTag = tags[tag]
1104
+ if !queryTag.nil?
1105
+ # Pre-process
1106
+ if !select_regex.nil?
1107
+ if queryTag.kind_of?(Array)
1108
+ queryTag = queryTag.map{|value| value.scan(select_regex).first}
1109
+ queryTag.flatten!
1110
+ else
1111
+ queryTag = queryTag.scan(select_regex).first
1112
+ end
1113
+ queryTag.compact!
1114
+ end
1115
+ if queryTag.kind_of?(Array) # Store
1116
+ if !queryTag.empty?
1117
+ if byTerm.include?(referenceTerm)
1118
+ byTerm[referenceTerm] = (byTerm[referenceTerm] + queryTag).uniq
1119
+ else
1120
+ byTerm[referenceTerm] = queryTag
1121
+ end
1122
+ if multiterm
1123
+ queryTag.each do |value|
1124
+ byValue[value] = [] if byValue[value].nil?
1125
+ byValue[value] << referenceTerm
1126
+ end
1127
+ else
1128
+ queryTag.each{|value| byValue[value] = referenceTerm}
1129
+ end
1130
+ end
1131
+ else
1132
+ if byTerm.include?(referenceTerm)
1133
+ byTerm[referenceTerm] = (byTerm[referenceTerm] + [queryTag]).uniq
1134
+ else
1135
+ byTerm[referenceTerm] = [queryTag]
1136
+ end
1137
+ if multiterm
1138
+ byValue[queryTag] = [] if byValue[queryTag].nil?
1139
+ byValue[queryTag] << referenceTerm
1140
+ else
1141
+ byValue[queryTag] = referenceTerm
1142
+ end
1143
+ end
1144
+ end
1145
+ end
1146
+
1147
+ # Check self-references
1148
+ if self_type_references
1149
+ byTerm.map do |term, references|
1150
+ corrected_references = references.map do |t|
1151
+ checked = self.extract_id(t)
1152
+ if checked.nil?
1153
+ t
1154
+ else
1155
+ byValue[checked] = byValue.delete(t) if checked != t && byValue[checked].nil? # Update in byValue
1156
+ checked
1157
+ end
1158
+ end
1159
+ byTerm[term] = corrected_references.uniq
1160
+ end
1161
+ end
1162
+
1163
+ # Check order
1164
+ byTerm.map do |term,values|
1165
+ if self.exists?(term)
1166
+ referenceValue = @stanzas[:terms][term][tag]
1167
+ if !referenceValue.nil?
1168
+ if !select_regex.nil?
1169
+ if referenceValue.kind_of?(Array)
1170
+ referenceValue = referenceValue.map{|value| value.scan(select_regex).first}
1171
+ referenceValue.flatten!
1172
+ else
1173
+ referenceValue = referenceValue.scan(select_regex).first
1174
+ end
1175
+ referenceValue.compact!
1176
+ end
1177
+ if self_type_references
1178
+ if referenceValue.kind_of?(Array)
1179
+ aux = referenceValue.map{|t| self.extract_id(t)}
1180
+ else
1181
+ aux = self.extract_id(referenceValue)
1182
+ end
1183
+ aux.compact! unless aux.nil?
1184
+ referenceValue = aux unless aux.nil?
1185
+ end
1186
+ referenceValue = [referenceValue] if !referenceValue.kind_of?(Array)
1187
+ byTerm[term] = referenceValue + (values - referenceValue)
1188
+ end
1189
+ end
1190
+ end
1191
+
1192
+ # Store
1193
+ @dicts[store_tag] = {byTerm: byTerm, byValue: byValue}
1194
+ end
1195
+ end
1196
+
1197
+
1198
+ # Calculates :is_a dictionary without alternatives substitution
1199
+ def calc_ancestors_dictionary
1200
+ self.calc_dictionary(:is_a, substitute_alternatives: false, self_type_references: true, multiterm: true)
1201
+ end
1202
+
1203
+
1204
+ # Translate a given value using an already calcualted dictionary
1205
+ # ===== Parameters
1206
+ # +toTranslate+:: value to be translated using dictiontionary
1207
+ # +tag+:: used to generate the dictionary
1208
+ # +byValue+:: boolean flag to indicate if dictionary must be used values as keys or terms as keys. Default: values as keys = true
1209
+ # ===== Return
1210
+ # translation
1211
+ def translate(toTranslate, tag, byValue: true)
1212
+ dict = byValue ? @dicts[tag][:byValue] : @dicts[tag][:byTerm]
1213
+ toTranslate = get_main_id(toTranslate) if !byValue
1214
+ return dict[toTranslate]
1215
+ end
1216
+
1217
+
1218
+ # Translate a name given
1219
+ # ===== Parameters
1220
+ # +name+:: to be translated
1221
+ # ===== Return
1222
+ # translated name or nil if it's not stored into this ontology
1223
+ def translate_name(name)
1224
+ term = self.translate(name, :name)
1225
+ term = self.translate(name, :synonym) if term.nil?
1226
+ return term
1227
+ end
1228
+
1229
+
1230
+ # Translate several names and return translations and a list of names which couldn't be translated
1231
+ # ===== Parameters
1232
+ # +names+:: array to be translated
1233
+ # ===== Return
1234
+ # two arrays with translations and names which couldn't be translated respectively
1235
+ def translate_names(names)
1236
+ translated = []
1237
+ rejected = []
1238
+ names.each do |name|
1239
+ tr = self.translate_name(name)
1240
+ if tr.nil?
1241
+ rejected << name
1242
+ else
1243
+ translated << tr
1244
+ end
1245
+ end
1246
+ return translated, rejected
1247
+ end
1248
+
1249
+
1250
+ # Translates a given ID to it assigned name
1251
+ # ===== Parameters
1252
+ # +id+:: to be translated
1253
+ # ===== Return
1254
+ # main name or nil if it's not included into this ontology
1255
+ def translate_id(id)
1256
+ name = self.translate(id, :name, byValue: false)
1257
+ return name.nil? ? nil : name.first
1258
+ end
1259
+
1260
+
1261
+ # Translates several IDs and returns translations and not allowed IDs list
1262
+ # ===== Parameters
1263
+ # +ids+:: to be translated
1264
+ # ===== Return
1265
+ # two arrays with translations and names which couldn't be translated respectively
1266
+ def translate_ids(ids)
1267
+ translated = []
1268
+ rejected = []
1269
+ ids.each do |term_id|
1270
+ tr = self.translate_id(term_id.to_sym)
1271
+ if !tr.nil?
1272
+ translated << tr
1273
+ else
1274
+ rejected << tr
1275
+ end
1276
+ end
1277
+ return translated, rejected
1278
+ end
1279
+
1280
+
1281
+ # ===== Returns
1282
+ # the main ID assigned to a given ID. If it's a non alternative/obsolete ID itself will be returned
1283
+ # ===== Parameters
1284
+ # +id+:: to be translated
1285
+ # ===== Return
1286
+ # main ID related to a given ID. Returns nil if given ID is not an allowed ID
1287
+ def get_main_id(id)
1288
+ return nil if !@stanzas[:terms].include? id
1289
+ new_id = id
1290
+ mainID = @alternatives_index[id]
1291
+ new_id = mainID if !mainID.nil? & !@obsoletes_index.include?(mainID)
1292
+ return new_id
1293
+ end
1294
+
1295
+
1296
+ # Check a pull of IDs and return allowed IDs removing which are not official terms on this ontology
1297
+ # ===== Parameters
1298
+ # +ids+:: to be checked
1299
+ # ===== Return
1300
+ # two arrays whit allowed and rejected IDs respectively
1301
+ def check_ids(ids, substitute: true)
1302
+ checked_codes = []
1303
+ rejected_codes = []
1304
+ ids.each do |id|
1305
+ if @stanzas[:terms].include? id
1306
+ if substitute
1307
+ checked_codes << self.get_main_id(id)
1308
+ else
1309
+ checked_codes << id
1310
+ end
1311
+ else
1312
+ rejected_codes << id
1313
+ end
1314
+ end
1315
+ return checked_codes, rejected_codes
1316
+ end
1317
+
1318
+
1319
+ # Stores a given profile with an specific ID. If ID is already assigend to a profile, it will be replaced
1320
+ # ===== Parameters
1321
+ # +id+:: assigned to profile
1322
+ # +terms+:: array of terms
1323
+ # +substitute+:: subsstitute flag from check_ids
1324
+ def add_profile(id, terms, substitute: true)
1325
+ warn("Profile assigned to ID (#{id}) is going to be replaced") if @profiles.include? id
1326
+ correct_terms, rejected_terms = self.check_ids(terms, substitute: substitute)
1327
+ if !rejected_terms.empty?
1328
+ warn('Given terms contains erroneus IDs. These IDs will be removed')
1329
+ end
1330
+ if id.is_a? Numeric
1331
+ @profiles[id] = correct_terms
1332
+ else
1333
+ @profiles[id.to_sym] = correct_terms
1334
+ end
1335
+ end
1336
+
1337
+
1338
+ # Method used to store a pull of profiles
1339
+ # ===== Parameters
1340
+ # +profiles+:: array/hash of profiles to be stored. If it's an array, numerical IDs will be assigned starting at 1
1341
+ # +calc_metadata+:: if true, launch calc_profiles_dictionary process
1342
+ # +reset_stored+:: if true, remove already stored profiles
1343
+ # +substitute+:: subsstitute flag from check_ids
1344
+ def load_profiles(profiles, calc_metadata: true, reset_stored: false, substitute: false)
1345
+ self.reset_profiles if reset_stored
1346
+ # Check
1347
+ if profiles.kind_of?(Array)
1348
+ profiles.each_with_index do |items, i|
1349
+ self.add_profile(i, items.map {|item| item.to_sym}, substitute: substitute)
1350
+ end
1351
+ else # Hash
1352
+ if !profiles.keys.select{|id| @profiles.include?(id)}.empty?
1353
+ warn('Some profiles given are already stored. Stored version will be replaced')
1354
+ end
1355
+ profiles.each{|id, prof| self.add_profile(id, prof, substitute: substitute)}
1356
+ end
1357
+
1358
+ self.add_observed_terms_from_profiles(reset: true)
1359
+
1360
+ if calc_metadata
1361
+ self.calc_profiles_dictionary
1362
+ end
1363
+ end
1364
+
1365
+
1366
+ # Internal method used to remove already stored profiles and restore observed frequencies
1367
+ def reset_profiles
1368
+ # Clean profiles storage
1369
+ @profiles = {}
1370
+ # Reset frequency observed
1371
+ @meta.each{|term,info| info[:observed_freq] = 0}
1372
+ @max_freqs[:observed_freq] = 0
1373
+ end
1374
+
1375
+
1376
+ # ===== Returns
1377
+ # profiles assigned to a given ID
1378
+ # ===== Parameters
1379
+ # +id+:: profile ID
1380
+ # ===== Return
1381
+ # specific profile or nil if it's not stored
1382
+ def get_profile(id)
1383
+ return @profiles[id]
1384
+ end
1385
+
1386
+
1387
+ # ===== Returns
1388
+ # an array of sizes for all stored profiles
1389
+ # ===== Return
1390
+ # array of profile sizes
1391
+ def get_profiles_sizes()
1392
+ return @profiles.map{|id,terms| terms.length}
1393
+ end
1394
+
1395
+
1396
+ # ===== Returns
1397
+ # mean size of stored profiles
1398
+ # ===== Parameters
1399
+ # +round_digits+:: number of digits to round result. Default: 4
1400
+ # ===== Returns
1401
+ # mean size of stored profiles
1402
+ def get_profiles_mean_size(round_digits: 4)
1403
+ sizes = self.get_profiles_sizes
1404
+ return sizes.inject(0){|sum, n| sum + n}.fdiv(@profiles.length).round(round_digits)
1405
+ end
1406
+
1407
+
1408
+ # Calculates profiles sizes and returns size assigned to percentile given
1409
+ # ===== Parameters
1410
+ # +perc+:: percentile to be returned
1411
+ # +increasing_sort+:: flag to indicate if sizes order must be increasing. Default: false
1412
+ # ===== Returns
1413
+ # values assigned to percentile asked
1414
+ def get_profile_length_at_percentile(perc=50, increasing_sort: false)
1415
+ prof_lengths = self.get_profiles_sizes.sort
1416
+ prof_lengths.reverse! if !increasing_sort
1417
+ n_profiles = prof_lengths.length
1418
+ percentile_index = ((perc * (n_profiles - 1)).fdiv(100) - 0.5).round # Take length which not overpass percentile selected
1419
+ percentile_index = 0 if percentile_index < 0 # Special case (caused by literal calc)
1420
+ return prof_lengths[percentile_index]
1421
+ end
1422
+
1423
+
1424
+ # Translate a given profile to terms names
1425
+ # ===== Parameters
1426
+ # +prof+:: array of terms to be translated
1427
+ # ===== Returns
1428
+ # array of translated terms. Can include nils if some IDs are not allowed
1429
+ def profile_names(prof)
1430
+ return prof.map{|term| self.translate_id(term)}
1431
+ end
1432
+
1433
+
1434
+ # Trnaslates a bunch of profiles to it sets of term names
1435
+ # ===== Parameters
1436
+ # +profs+:: array of profiles
1437
+ # +asArray+:: flag to indicate if results must be returned as: true => an array of tuples [ProfID, ArrayOdNames] or ; false => hashs of translations
1438
+ # ===== Returns
1439
+ # translated profiles
1440
+ def translate_profiles_ids(profs = [], asArray: true)
1441
+ profs = @profiles if profs.empty?
1442
+ profs = profs.each_with_index.map{|terms, index| [index, terms]}.to_h if profs.kind_of?(Array)
1443
+ profs_names = profs.map{|id, terms| [id, self.profile_names(terms)]}.to_h
1444
+ return asArray ? profs_names.values : profs_names
1445
+ end
1446
+
1447
+
1448
+ # Includes as "observed_terms" all terms included into stored profiles
1449
+ # ===== Parameters
1450
+ # +reset+:: if true, reset observed freqs alreeady stored befor re-calculate
1451
+ def add_observed_terms_from_profiles(reset: false)
1452
+ @meta.each{|term, freqs| freqs[:observed_freq] = -1} if reset
1453
+ @profiles.each{|id, terms| self.add_observed_terms(terms: terms)}
1454
+ end
1455
+
1456
+
1457
+ # Get a term frequency
1458
+ # ===== Parameters
1459
+ # +term+:: term to be checked
1460
+ # +type+:: type of frequency to be returned. Allowed: [:struct_freq, :observed_freq]
1461
+ # ===== Returns
1462
+ # frequency of term given or nil if term is not allowed
1463
+ def get_frequency(term, type: :struct_freq)
1464
+ queryFreq = @meta[term]
1465
+ return queryFreq.nil? ? nil : queryFreq[type]
1466
+ end
1467
+
1468
+
1469
+ # Geys structural frequency of a term given
1470
+ # ===== Parameters
1471
+ # +term+:: to be checked
1472
+ # ===== Returns
1473
+ # structural frequency of given term or nil if term is not allowed
1474
+ def get_structural_frequency(term)
1475
+ return self.get_frequency(term, type: :struct_freq)
1476
+ end
1477
+
1478
+
1479
+ # Gets observed frequency of a term given
1480
+ # ===== Parameters
1481
+ # +term+:: to be checked
1482
+ # ===== Returns
1483
+ # observed frequency of given term or nil if term is not allowed
1484
+ def get_observed_frequency(term)
1485
+ return self.get_frequency(term, type: :observed_freq)
1486
+ end
1487
+
1488
+
1489
+ # Calculates frequencies of stored profiles terms
1490
+ # ===== Parameters
1491
+ # +ratio+:: if true, frequencies will be returned as ratios between 0 and 1.
1492
+ # +literal+:: if true, literal terms will be used to calculate frequencies instead translate alternative terms
1493
+ # +asArray+:: used to transform returned structure format from hash of Term-Frequency to an array of tuples [Term, Frequency]
1494
+ # +translate+:: if true, term IDs will be translated to
1495
+ # ===== Returns
1496
+ # stored profiles terms frequencies
1497
+ def get_profiles_terms_frequency(ratio: true, literal: true, asArray: true, translate: true)
1498
+ n_profiles = @profiles.length
1499
+ if literal
1500
+ freqs = {}
1501
+ @profiles.each do |id, terms|
1502
+ terms.each do |literalTerm|
1503
+ if freqs.include?(literalTerm)
1504
+ freqs[literalTerm] += 1
1505
+ else
1506
+ freqs[literalTerm] = 1
1507
+ end
1508
+ end
1509
+ end
1510
+ if (ratio || translate)
1511
+ aux_keys = freqs.keys
1512
+ aux_keys.each do |term|
1513
+ freqs[term] = freqs[term].fdiv(n_profiles) if ratio
1514
+ if translate
1515
+ tr = self.translate_id(term)
1516
+ freqs[tr] = freqs.delete(term) if !tr.nil?
1517
+ end
1518
+ end
1519
+ end
1520
+ if asArray
1521
+ freqs = freqs.map{|term, freq| [term, freq]}
1522
+ freqs.sort!{|h1, h2| h2[1] <=> h1[1]}
1523
+ end
1524
+ else # Freqs translating alternatives
1525
+ freqs = @meta.select{|id, freqs| freqs[:observed_freq] > 0}.map{|id, freqs| [id, ratio ? freqs[:observed_freq].fdiv(n_profiles) : freqs[:observed_freq]]}
1526
+ freqs = freqs.to_h if !asArray
1527
+ if translate
1528
+ freqs = freqs.map do |term, freq|
1529
+ tr = self.translate_id(term)
1530
+ tr.nil? ? [term, freq] : [tr, freq]
1531
+ end
1532
+ end
1533
+ if asArray
1534
+ freqs = freqs.map{|term, freq| [term, freq]}
1535
+ freqs.sort!{|h1, h2| h2[1] <=> h1[1]}
1536
+ else
1537
+ freqs = freqs.to_h
1538
+ end
1539
+ end
1540
+ return freqs
1541
+ end
1542
+
1543
+
1544
+ # Clean a given profile returning cleaned set of terms and removed ancestors term.
1545
+ # ===== Parameters
1546
+ # +prof+:: array of terms to be checked
1547
+ # ===== Returns
1548
+ # two arrays, first is the cleaned profile and second is the removed elements array
1549
+ def remove_ancestors_from_profile(prof)
1550
+ ancestors = prof.map{|term| self.get_ancestors(term)}.flatten.uniq
1551
+ redundant = prof.select{|term| ancestors.include?(term)}
1552
+ return prof - redundant, redundant
1553
+ end
1554
+
1555
+
1556
+ # Remove alternative IDs if official ID is present. DOES NOT REMOVE synonyms or alternative IDs of the same official ID
1557
+ # ===== Parameters
1558
+ # +prof+:: array of terms to be checked
1559
+ # ===== Returns
1560
+ # two arrays, first is the cleaned profile and second is the removed elements array
1561
+ def remove_alternatives_from_profile(prof)
1562
+ alternatives = prof.select{|term| @alternatives_index.include?(term)}
1563
+ redundant = alternatives.select{|alt_id| prof.include?(@alternatives_index[alt_id])}
1564
+ return prof - redundant, redundant
1565
+ end
1566
+
1567
+
1568
+ # Remove alternatives (if official term is present) and ancestors terms of a given profile
1569
+ # ===== Parameters
1570
+ # +profile+:: profile to be cleaned
1571
+ # +remove_alternatives+:: if true, clenaed profiles will replace already stored profiles
1572
+ # ===== Returns
1573
+ # cleaned profile
1574
+ def clean_profile(profile, remove_alternatives: true)
1575
+ warn('Estructure is circular, behaviour could not be which is expected') if @structureType == :circular
1576
+ terms_without_ancestors, _ = self.remove_ancestors_from_profile(profile)
1577
+ if remove_alternatives
1578
+ terms_without_ancestors_and_alternatices, _ = self.remove_alternatives_from_profile(terms_without_ancestors)
1579
+ else
1580
+ terms_without_ancestors_and_alternatices = terms_without_ancestors
1581
+ end
1582
+ return terms_without_ancestors_and_alternatices
1583
+ end
1584
+
1585
+ def clean_profile_hard(profile, options = {})
1586
+ profile, _ = check_ids(profile)
1587
+ profile = profile.select{|t| !is_obsolete?(t)}
1588
+ if !options[:term_filter].nil?
1589
+ profile.select! {|term| get_ancestors(term).include?(options[:term_filter])}
1590
+ end
1591
+ profile = clean_profile(profile.uniq)
1592
+ return profile
1593
+ end
1594
+
1595
+ # Remove terms from a given profile using hierarchical info and scores set given
1596
+ # ===== Parameters
1597
+ # +profile+:: profile to be cleaned
1598
+ # +scores+:: hash with terms by keys and numerical values (scores)
1599
+ # +byMax+:: if true, maximum scored term will be keeped, if false, minimum will be keeped
1600
+ # +remove_without_score+:: if true, terms without score will be removed. Default: true
1601
+ # ===== Returns
1602
+ # cleaned profile
1603
+ def clean_profile_by_score(profile, scores, byMax: true, remove_without_score: true)
1604
+ scores = scores.sort_by{|term,score| score}.to_h
1605
+ keep = profile.map do |term|
1606
+ if scores.include?(term)
1607
+ parentals = [self.get_ancestors(term), self.get_descendants(term)].flatten
1608
+ targetable = parentals.select{|parent| profile.include?(parent)}
1609
+ if targetable.empty?
1610
+ term
1611
+ else
1612
+ targetable << term
1613
+ targets = scores.select{|term,score| targetable.include?(term)}.to_h
1614
+ byMax ? targets.keys.last : targets.keys.first
1615
+ end
1616
+ elsif remove_without_score
1617
+ nil
1618
+ else
1619
+ term
1620
+ end
1621
+ end
1622
+ return keep.compact.uniq
1623
+ end
1624
+
1625
+
1626
+ # Remove alternatives (if official term is present) and ancestors terms of stored profiles
1627
+ # ===== Parameters
1628
+ # +store+:: if true, clenaed profiles will replace already stored profiles
1629
+ # +remove_alternatives+:: if true, clenaed profiles will replace already stored profiles
1630
+ # ===== Returns
1631
+ # a hash with cleaned profiles
1632
+ def clean_profiles(store: false, remove_alternatives: true)
1633
+ cleaned_profiles = {}
1634
+ @profiles.each{ |id, terms| cleaned_profiles[id] = self.clean_profile(terms, remove_alternatives: remove_alternatives)}
1635
+ @profiles = cleaned_profiles if store
1636
+ return cleaned_profiles
1637
+ end
1638
+
1639
+
1640
+ # Calculates number of ancestors present (redundant) in each profile stored
1641
+ # ===== Returns
1642
+ # array of parentals for each profile
1643
+ def parentals_per_profile
1644
+ cleaned_profiles = self.clean_profiles(remove_alternatives: false)
1645
+ parentals = @profiles.map{ |id, terms| terms.length - cleaned_profiles[id].length}
1646
+ return parentals
1647
+ end
1648
+
1649
+
1650
+ def get_profile_redundancy()
1651
+ profile_sizes = self.get_profiles_sizes
1652
+ parental_terms_per_profile = self.parentals_per_profile# clean_profiles
1653
+ parental_terms_per_profile = parental_terms_per_profile.map{|item| item[0]}
1654
+ profile_sizes, parental_terms_per_profile = profile_sizes.zip(parental_terms_per_profile).sort_by{|i| i.first}.reverse.transpose
1655
+ return profile_sizes, parental_terms_per_profile
1656
+ end
1657
+
1658
+ def compute_term_list_and_childs()
1659
+ suggested_childs = {}
1660
+ total_terms = 0
1661
+ terms_with_more_specific_childs = 0
1662
+ @profiles.each do |id, terms|
1663
+ total_terms += terms.length
1664
+ more_specific_childs = self.get_childs_table(terms, true)
1665
+ terms_with_more_specific_childs += more_specific_childs.select{|hpo_record| !hpo_record.last.empty?}.length #Exclude phenotypes with no childs
1666
+ suggested_childs[id] = more_specific_childs
1667
+ end
1668
+ return suggested_childs, terms_with_more_specific_childs.fdiv(total_terms)
1669
+ end
1670
+
1671
+ # Calculates mean IC of a given profile
1672
+ # ===== Parameters
1673
+ # +prof+:: profile to be checked
1674
+ # +ic_type+:: ic_type to be used
1675
+ # +zhou_k+:: special coeficient for Zhou IC method
1676
+ # ===== Returns
1677
+ # mean IC for a given profile
1678
+ def get_profile_mean_IC(prof, ic_type: :resnik, zhou_k: 0.5)
1679
+ return prof.map{|term| self.get_IC(term, type: ic_type, zhou_k: zhou_k)}.inject(0){|sum,x| sum + x}.fdiv(prof.length)
1680
+ end
1681
+
1682
+
1683
+ # Calculates resnik ontology, and resnik observed mean ICs for all profiles stored
1684
+ # ===== Returns
1685
+ # two hashes with Profiles and IC calculated for resnik and observed resnik respectively
1686
+ def get_profiles_resnik_dual_ICs
1687
+ struct_ics = {}
1688
+ observ_ics = {}
1689
+ @profiles.each do |id, terms|
1690
+ struct_ics[id] = self.get_profile_mean_IC(terms, ic_type: :resnik)
1691
+ observ_ics[id] = self.get_profile_mean_IC(terms, ic_type: :resnik_observed)
1692
+ end
1693
+ return struct_ics.clone, observ_ics.clone
1694
+ end
1695
+
1696
+
1697
+ # Calculates ontology structural levels for all ontology terms
1698
+ # ===== Parameters
1699
+ # +calc_paths+:: calculates term paths if it's not already calculated
1700
+ # +shortest_path+:: if true, level is calculated with shortest path, largest path will be used in other cases
1701
+ def calc_term_levels(calc_paths: false, shortest_path: true)
1702
+ if @term_paths.empty?
1703
+ if calc_paths
1704
+ self.calc_term_paths
1705
+ else
1706
+ warn('Term paths are not already loaded. Aborting dictionary calc')
1707
+ end
1708
+ end
1709
+ if !@term_paths.empty?
1710
+ byTerm = {}
1711
+ byValue = {}
1712
+ # Calc per term
1713
+ @term_paths.each do |term, info|
1714
+ level = shortest_path ? info[:shortest_path] : info[:largest_path]
1715
+ if level.nil?
1716
+ level = -1
1717
+ else
1718
+ level = level.round(0)
1719
+ end
1720
+ byTerm[term] = level
1721
+ queryLevels = byValue[level]
1722
+ if queryLevels.nil?
1723
+ byValue[level] = [term]
1724
+ else
1725
+ byValue[level] << term
1726
+ end
1727
+ end
1728
+ @dicts[:level] = {byTerm: byValue, byValue: byTerm} # Note: in this case, value has multiplicity and term is unique value
1729
+ # Update maximum depth
1730
+ @max_freqs[:max_depth] = byValue.keys.max
1731
+ end
1732
+ end
1733
+
1734
+
1735
+ # Check if a term given is marked as obsolete
1736
+ def is_obsolete? term
1737
+ return @obsoletes_index.include?(term)
1738
+ end
1739
+
1740
+ # Check if a term given is marked as alternative
1741
+ def is_alternative? term
1742
+ return @alternatives_index.include?(term)
1743
+ end
1744
+
1745
+ # Find paths of a term following it ancestors and stores all possible paths for it and it's parentals.
1746
+ # Also calculates paths metadata and stores into @term_paths
1747
+ def calc_term_paths(only_main_terms=false)
1748
+ self.calc_ancestors_dictionary if @dicts[:is_a].nil? # Calculate direct parentals dictionary if it's not already calculated
1749
+ visited_terms = {} # PEDRO: To keep track of visited data, hash accesions are fast than array includes. I don't understant why use this variable instead of check @term_paths to see if the data is calculated
1750
+ @term_paths = {}
1751
+ if [:hierarchical, :sparse].include? @structureType
1752
+ @stanzas[:terms].each do |term, t_attributes|
1753
+ if !only_main_terms && (self.is_obsolete?(term) || self.is_alternative?(term)) # Special case (obsoletes)
1754
+ special_term = term
1755
+ term = self.is_obsolete?(term) ? @obsoletes_index[term] : @alternatives_index[term]
1756
+ @term_paths[term] = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} if !@term_paths.include?(term)
1757
+ @term_paths[special_term] = @term_paths[term]
1758
+ visited_terms[special_term] = true
1759
+ end
1760
+ if !visited_terms.include?(term)
1761
+ # PEDRO: This code is very similar to expand_path method, but cannot be replaced by it (test fail). We must work to use this method here
1762
+ path_attr = @term_paths[term]
1763
+ if path_attr.nil?
1764
+ path_attr = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []} # create new path data
1765
+ @term_paths[term] = path_attr #save path data container
1766
+ end
1767
+ parentals = @dicts[:is_a][:byTerm][term]
1768
+ if parentals.nil?
1769
+ path_attr[:paths] << [term]
1770
+ else
1771
+ parentals.each do |direct_parental|
1772
+ self.expand_path(direct_parental)
1773
+ new_paths = @term_paths[direct_parental][:paths]
1774
+ path_attr[:paths].concat(new_paths.map{|path| path.clone.unshift(term)})
1775
+ end
1776
+ end
1777
+ anc = @ancestors_index[term].each{|anc| visited_terms[anc] = true} if @ancestors_index.include?(term)
1778
+ visited_terms[term] = true
1779
+ end
1780
+ # Update metadata
1781
+ path_attr = @term_paths[term]
1782
+ path_attr[:total_paths] = path_attr[:paths].length
1783
+ paths_sizes = path_attr[:paths].map{|path| path.length}
1784
+ path_attr[:largest_path] = paths_sizes.max
1785
+ path_attr[:shortest_path] = paths_sizes.min
1786
+ end
1787
+ else
1788
+ warn('Ontology structure must be hierarchical or sparse to calculate term levels. Aborting paths calculation')
1789
+ end
1790
+ end
1791
+
1792
+
1793
+ # Recursive function whic finds paths of a term following it ancestors and stores all possible paths for it and it's parentals
1794
+ # ===== Parameters
1795
+ # +curr_term+:: current visited term
1796
+ # +visited_terms+:: already expanded terms
1797
+ def expand_path(curr_term)
1798
+ if !@term_paths.include?(curr_term)
1799
+ path_attr = {total_paths: 0, largest_path: 0, shortest_path: 0, paths: []}
1800
+ @term_paths[curr_term] = path_attr
1801
+ direct_parentals = @dicts[:is_a][:byTerm][curr_term]
1802
+ if direct_parentals.nil? # No parents :: End of recurrence
1803
+ path_attr[:paths] << [curr_term]
1804
+ else # Expand and concat
1805
+ direct_parentals.each do |ancestor|
1806
+ path_attr_parental = @term_paths[ancestor]
1807
+ if path_attr_parental.nil? # Calculate new paths
1808
+ self.expand_path(ancestor)
1809
+ new_paths = @term_paths[ancestor][:paths]
1810
+ else # Use direct_parental paths already calculated
1811
+ new_paths = path_attr_parental[:paths]
1812
+ end
1813
+ path_attr[:paths].concat(new_paths.map{|path| path.clone.unshift(curr_term)})
1814
+ end
1815
+ end
1816
+ end
1817
+ end
1818
+
1819
+
1820
+ # Gets ontology levels calculated
1821
+ # ===== Returns
1822
+ # ontology levels calculated
1823
+ def get_ontology_levels
1824
+ return @dicts[:level][:byTerm].clone # By term, in this case, is Key::Level, Value::Terms
1825
+ end
1826
+
1827
+
1828
+ # Gets ontology level of a specific term
1829
+ # ===== Returns
1830
+ # Term level
1831
+ def get_term_level(term)
1832
+ return @dicts[:level][:byValue][term]
1833
+ end
1834
+
1835
+ # nil, term not found, [] term exists but not has parents
1836
+ def get_parental_path(term, which_path = :shortest_path, level = 0)
1837
+ path = nil
1838
+ path_attr = @term_paths[term]
1839
+ if !path_attr.nil?
1840
+ path_length = path_attr[which_path]
1841
+ all_paths = path_attr[:paths]
1842
+ if all_paths.empty?
1843
+ path = []
1844
+ else
1845
+ path = all_paths.select{|pt| pt.length == path_length}.first.clone
1846
+ if level > 0 # we want the term and his ascendants until a specific level
1847
+ n_parents = path_length - level
1848
+ path = path[0..n_parents]
1849
+ end
1850
+ path.shift # Discard the term itself
1851
+ end
1852
+ end
1853
+ return path
1854
+ end
1855
+
1856
+ # Return ontology levels from profile terms
1857
+ # ===== Returns
1858
+ # hash of term levels (Key: level; Value: array of term IDs)
1859
+ def get_ontology_levels_from_profiles(uniq = true) # TODO: remove uniq and check dependencies
1860
+ profiles_terms = @profiles.values.flatten
1861
+ profiles_terms.uniq! if uniq
1862
+ term_freqs_byProfile = {}
1863
+ profiles_terms.each do |term|
1864
+ query = term_freqs_byProfile[term]
1865
+ if query.nil?
1866
+ term_freqs_byProfile[term] = 1
1867
+ else
1868
+ term_freqs_byProfile[term] += 1
1869
+ end
1870
+ end
1871
+ levels_filtered = @dicts[:level][:byTerm].map{|level, terms| [level,terms.map{|t| profiles_terms.include?(t) ? Array.new(term_freqs_byProfile[t], t) : nil}.flatten.compact]}.select{|level, filteredTerms| !filteredTerms.empty?}.to_h
1872
+ return levels_filtered
1873
+ end
1874
+
1875
+ def get_profile_ontology_distribution_tables
1876
+ cohort_ontology_levels = get_ontology_levels_from_profiles(uniq=false)
1877
+ uniq_cohort_ontology_levels = get_ontology_levels_from_profiles
1878
+ hpo_ontology_levels = get_ontology_levels
1879
+ total_ontology_terms = hpo_ontology_levels.values.flatten.length
1880
+ total_cohort_terms = cohort_ontology_levels.values.flatten.length
1881
+ total_uniq_cohort_terms = uniq_cohort_ontology_levels.values.flatten.length
1882
+
1883
+ ontology_levels = []
1884
+ distribution_percentage = []
1885
+ hpo_ontology_levels.each do |level, terms|
1886
+ cohort_terms = cohort_ontology_levels[level]
1887
+ uniq_cohort_terms = uniq_cohort_ontology_levels[level]
1888
+ if cohort_terms.nil? || uniq_cohort_terms.nil?
1889
+ num = 0
1890
+ u_num = 0
1891
+ else
1892
+ num = cohort_terms.length
1893
+ u_num = uniq_cohort_terms.length
1894
+ end
1895
+ ontology_levels << [level, terms.length, num]
1896
+ distribution_percentage << [
1897
+ level,
1898
+ (terms.length.fdiv(total_ontology_terms)*100).round(3),
1899
+ (num.fdiv(total_cohort_terms)*100).round(3),
1900
+ (u_num.fdiv(total_uniq_cohort_terms)*100).round(3)
1901
+ ]
1902
+ end
1903
+ ontology_levels.sort! { |x,y| x.first <=> y.first }
1904
+ distribution_percentage.sort! { |x,y| x.first <=> y.first }
1905
+ return ontology_levels, distribution_percentage
1906
+ end
1907
+
1908
+ def get_dataset_specifity_index(mode)
1909
+ ontology_levels, distribution_percentage = get_profile_ontology_distribution_tables
1910
+ if mode == 'uniq'
1911
+ observed_distribution = 3
1912
+ elsif mode == 'weigthed'
1913
+ observed_distribution = 2
1914
+ end
1915
+ max_terms = distribution_percentage.map{|row| row[1]}.max
1916
+ maxL = nil
1917
+ distribution_percentage.each do |level_info|
1918
+ maxL = level_info.first if level_info[1] == max_terms
1919
+ end
1920
+ diffL = distribution_percentage.map{|l| [l[0], l[observed_distribution] - l[1]]}
1921
+ diffL.select!{|dL| dL.last > 0}
1922
+ lowSection = diffL.select{|dL| dL.first <= maxL}
1923
+ highSection = diffL.select{|dL| dL.first > maxL}
1924
+ dsi = nil
1925
+ if highSection.empty?
1926
+ dsi = 0
1927
+ else
1928
+ accumulated_weigth = 0
1929
+ accumulated_weigthed_diffL = 0
1930
+ hss = get_weigthed_level_contribution(highSection, maxL, ontology_levels.length - maxL)
1931
+ lss = get_weigthed_level_contribution(lowSection, maxL, maxL)
1932
+ dsi = hss.fdiv(lss)
1933
+ end
1934
+ return dsi
1935
+ end
1936
+
1937
+ def get_weigthed_level_contribution(section, maxL, nLevels)
1938
+ accumulated_weigthed_diffL = 0
1939
+ section.each do |level, diff|
1940
+ weightL = maxL - level
1941
+ if weightL >= 0
1942
+ weightL += 1
1943
+ else
1944
+ weightL = weightL.abs
1945
+ end
1946
+ accumulated_weigthed_diffL += diff * weightL
1947
+ end
1948
+ weigthed_contribution = accumulated_weigthed_diffL.fdiv(nLevels)
1949
+ return weigthed_contribution
1950
+ end
1951
+
1952
+
1953
+ # Calculate profiles dictionary with Key= Term; Value = Profiles
1954
+ def calc_profiles_dictionary
1955
+ if @profiles.empty?
1956
+ warn('Profiles are not already loaded. Aborting dictionary calc')
1957
+ else
1958
+ byTerm = {} # Key: Terms
1959
+ # byValue -- Key: Profile == @profiles
1960
+ @profiles.each do |id, terms|
1961
+ terms.each do |term|
1962
+ if byTerm.include?(term)
1963
+ byTerm[term] << id
1964
+ else
1965
+ byTerm[term] = [id]
1966
+ end
1967
+ end
1968
+ end
1969
+ @profilesDict = byTerm
1970
+ end
1971
+ end
1972
+
1973
+
1974
+ # Gets profiles dictionary calculated
1975
+ # ===== Return
1976
+ # profiles dictionary (clone)
1977
+ def get_terms_linked_profiles
1978
+ return @profilesDict.clone
1979
+ end
1980
+
1981
+
1982
+ # Get related profiles to a given term
1983
+ # ===== Parameters
1984
+ # +term+:: to be checked
1985
+ # ===== Returns
1986
+ # profiles which contains given term
1987
+ def get_term_linked_profiles(term)
1988
+ return @profilesDict[term]
1989
+ end
1990
+
1991
+
1992
+ # Gets metainfo table from a set of terms
1993
+ # ===== Parameters
1994
+ # +terms+:: IDs to be expanded
1995
+ # +filter_alternatives+:: flag to be used in get_descendants method
1996
+ # ===== Returns
1997
+ # an array with triplets [TermID, TermName, DescendantsNames]
1998
+ def get_childs_table(terms, filter_alternatives = false)
1999
+ expanded_terms = []
2000
+ terms.each do |t|
2001
+ expanded_terms << [[t, self.translate_id(t)], self.get_descendants(t, filter_alternatives).map{|child| [child, self.translate_id(child)]}]
2002
+ end
2003
+ return expanded_terms
2004
+ end
2005
+
2006
+
2007
+ # Store specific relations hash given into ITEMS structure
2008
+ # ===== Parameters
2009
+ # +relations+:: hash to be stored
2010
+ # +remove_old_relations+:: substitute ITEMS structure instead of merge new relations
2011
+ # +expand+:: if true, already stored keys will be updated with the unique union of both sets
2012
+ def load_item_relations_to_terms(relations, remove_old_relations = false, expand = false)
2013
+ @items = {} if remove_old_relations
2014
+ if !relations.select{|term, items| !@stanzas[:terms].include?(term)}.empty?
2015
+ warn('Some terms specified are not stored into this ontology. These not correct terms will be stored too')
2016
+ end
2017
+ if !remove_old_relations
2018
+ if !relations.select{|term, items| @items.include?(term)}.empty? && !expand
2019
+ warn('Some terms given are already stored. Stored version will be replaced')
2020
+ end
2021
+ end
2022
+ if expand
2023
+ @items = self.concatItems(@items,relations)
2024
+ # relations.each do |k,v| # MUST UPDATE THIS USING A CONCAT SPECIFIC FUNCTION
2025
+ # if @items.keys.include?(k)
2026
+ # if v.kind_of?(Array)
2027
+ # @items[k] = (@items[k] + v).uniq
2028
+ # elsif v.kind_of?(Hash)
2029
+ # @items.merge!(relations) do |k, oldV, newV|
2030
+ # if oldV.kind_of?(Array)
2031
+ # return (oldV + newV).uniq
2032
+ # else
2033
+ # oldV = [oldV,newV]
2034
+ # end
2035
+ # end
2036
+ # elsif @items[k].kind_of?(Array) # We suppose a single value/object from here
2037
+ # @items[k] = (@items[k] + [v]).uniq
2038
+ # else
2039
+ # @items[k] = [@items[k],v]
2040
+ # end
2041
+ # else
2042
+ # @items[k] = v
2043
+ # end
2044
+ # end
2045
+ else
2046
+ @items.merge!(relations)
2047
+ end
2048
+ end
2049
+
2050
+ # Internal function to concat two elements.
2051
+ # ===== Parameters
2052
+ # +itemA+:: item to be concatenated
2053
+ # +itemB+:: item to be concatenated
2054
+ # ===== Returns
2055
+ # Concatenated objects
2056
+ def concatItems(itemA,itemB)
2057
+ # A is Array :: RETURN ARRAY
2058
+ # A_array : B_array
2059
+ # A_array : B_hash => NOT ALLOWED
2060
+ # A_array : B_single => NOT ALLOWED
2061
+ # A is Hash :: RETURN HASH
2062
+ # A_hash : B_array => NOT ALLOWED
2063
+ # A_hash : B_hash
2064
+ # A_hash : B_single => NOT ALLOWED
2065
+ # A is single element => RETURN ARRAY
2066
+ # A_single : B_array
2067
+ # A_single : B_hash => NOT ALLOWED
2068
+ # A_single : B_single
2069
+ concatenated = nil
2070
+ if itemA.kind_of?(Array) && itemB.kind_of?(Array)
2071
+ concatenated = (itemA + itemB).uniq
2072
+ elsif itemA.kind_of?(Hash) && itemB.kind_of?(Hash)
2073
+ concatenated = itemA.merge(itemB) do |k, oldV, newV|
2074
+ self.concatItems(oldV,newV)
2075
+ end
2076
+ elsif itemB.kind_of?(Array)
2077
+ concatenated = ([itemA] + itemB).uniq
2078
+ elsif ![Array, Hash].include?(itemB.class)
2079
+ concatenated = [itemA,itemB].uniq
2080
+ end
2081
+ return concatenated
2082
+ end
2083
+
2084
+
2085
+ # Assign a dictionary already calculated as a items set.
2086
+ # ===== Parameters
2087
+ # +dictID+:: dictionary ID to be stored (:byTerm will be used)
2088
+ def set_items_from_dict(dictID, remove_old_relations = false)
2089
+ @items = {} if remove_old_relations
2090
+ if !@dicts[dictID].nil?
2091
+ @items.merge(@dicts[dictID][:byTerm])
2092
+ else
2093
+ warn('Specified ID is not calculated. Dict will not be added as a items set')
2094
+ end
2095
+ end
2096
+
2097
+
2098
+ # This method computes childs similarity and impute items to it parentals. To do that Item keys must be this ontology allowed terms.
2099
+ # Similarity will be calculated by text extact similarity unless an ontology object will be provided. In this case, MICAs will be used
2100
+ # ===== Parameters
2101
+ # +ontology+:: (Optional) ontology object which items given belongs
2102
+ # +minimum_childs+:: minimum of childs needed to infer relations to parental. Default: 2
2103
+ # +clean_profiles+:: if true, clena_profiles ontology method will be used over inferred profiles. Only if an ontology object is provided
2104
+ # ===== Returns
2105
+ # void and update items object
2106
+ def expand_items_to_parentals(ontology: nil, minimum_childs: 2, clean_profiles: true)
2107
+ # Check item keys
2108
+ if @items.empty?
2109
+ warn('Items have been not provided yet')
2110
+ return nil
2111
+ end
2112
+ targetKeys = @items.keys.select{|k| self.exists?(k)}
2113
+ if targetKeys.length == 0
2114
+ warn('Any item key is allowed')
2115
+ return nil
2116
+ elsif targetKeys.length < @items.keys.length
2117
+ warn('Some item keys are not allowed')
2118
+ end
2119
+
2120
+ # Expand to parentals
2121
+ targetKeys << targetKeys.map{|t| self.get_ancestors(t, true)}
2122
+ targetKeys.flatten!
2123
+ targetKeys.uniq!
2124
+
2125
+ # Obtain levels (go from leaves to roots)
2126
+ levels = targetKeys.map{|term| self.get_term_level(term)}
2127
+ levels.compact!
2128
+ levels.uniq!
2129
+ levels.sort!
2130
+ levels.reverse!
2131
+ levels.shift # Leaves are not expandable
2132
+
2133
+ # Expand from leaves to roots
2134
+ levels.map do |lvl|
2135
+ curr_keys = targetKeys.select{|k| self.get_term_level(k) == lvl}
2136
+ curr_keys.map do |term_expand|
2137
+ to_infer = []
2138
+ # Obtain childs
2139
+ childs = self.get_descendants(term_expand,true).select{|t| !@items[t].nil?}
2140
+ # Expand
2141
+ if childs.length > 0 && minimum_childs == 1 # Special case
2142
+ to_infer = childs.map{|c| @items[c]}.flatten.compact.uniq
2143
+ elsif childs.length >= minimum_childs
2144
+ to_infer = Hash.new(0)
2145
+ # Compare
2146
+ while childs.length > 1
2147
+ curr_term = childs.shift
2148
+ childs.each do |compare_term|
2149
+ pivot_items = @items[curr_term]
2150
+ compare_items = @items[compare_term]
2151
+ if ontology.nil? # Exact match
2152
+ pivot_items.map do |pitem|
2153
+ if compare_items.include?(pitem)
2154
+ to_infer[pitem] += 2
2155
+ end
2156
+ end
2157
+ else # Find MICAs
2158
+ local_infer = Hash.new(0)
2159
+ pivot_items.map do |pitem|
2160
+ micas = compare_items.map{|citem| ontology.get_MICA(pitem, citem)}
2161
+ maxmica = micas[0]
2162
+ micas.each{|mica| maxmica = mica if mica.last > maxmica.last}
2163
+ local_infer[maxmica.first] += 1
2164
+ end
2165
+ compare_items.map do |citem|
2166
+ micas = pivot_items.map{|pitem| ontology.get_MICA(pitem, citem)}
2167
+ maxmica = micas[0]
2168
+ micas.each{|mica| maxmica = mica if mica.last > maxmica.last}
2169
+ local_infer[maxmica.first] += 1
2170
+ end
2171
+ local_infer.each{|t,freq| to_infer[t] += freq if freq >= 2}
2172
+ end
2173
+ end
2174
+ end
2175
+ # Filter infer
2176
+ to_infer = to_infer.select{|k,v| v >= minimum_childs}
2177
+ end
2178
+ # Infer
2179
+ if to_infer.length > 0
2180
+ @items[term_expand] = [] if @items[term_expand].nil?
2181
+ if to_infer.kind_of?(Array)
2182
+ @items[term_expand] = (@items[term_expand] + to_infer).uniq
2183
+ else
2184
+ @items[term_expand] = (@items[term_expand] + to_infer.keys).uniq
2185
+ end
2186
+ @items[term_expand] = ontology.clean_profile(@items[term_expand]) if clean_profiles && !ontology.nil?
2187
+ elsif !@items.include?(term_expand)
2188
+ targetKeys.delete(term_expand)
2189
+ end
2190
+ end
2191
+ end
2192
+ end
2193
+
2194
+
2195
+ # Return direct ancestors/descendants of a given term
2196
+ # ===== Parameters
2197
+ # +term+:: which are requested
2198
+ # +relation+:: can be :ancestor or :descendant
2199
+ # +remove_alternatives+:: if true, alternatives will be removed
2200
+ # ===== Returns
2201
+ # Direct ancestors/descendants of given term or nil if any error occurs
2202
+ def get_direct_related(term, relation, remove_alternatives: false)
2203
+ if @dicts[:is_a].nil?
2204
+ warn("Hierarchy dictionary is not already calculated. Returning nil")
2205
+ return nil
2206
+ end
2207
+ target = nil
2208
+ case relation
2209
+ when :ancestor
2210
+ target = :byTerm
2211
+ when :descendant
2212
+ target = :byValue
2213
+ else
2214
+ warn('Relation type not allowed. Returning nil')
2215
+ end
2216
+ return nil if target.nil?
2217
+ query = @dicts[:is_a][target][term]
2218
+ return query if query.nil?
2219
+ query, _ = remove_alternatives_from_profile(query) if remove_alternatives
2220
+ return query
2221
+ end
2222
+
2223
+
2224
+ # Return direct ancestors of a given term
2225
+ # ===== Parameters
2226
+ # +term+:: which ancestors are requested
2227
+ # +remove_alternatives+:: if true, alternatives will be removed
2228
+ # ===== Returns
2229
+ # Direct ancestors of given term or nil if any error occurs
2230
+ def get_direct_ancentors(term, remove_alternatives: false)
2231
+ return self.get_direct_related(term, :ancestor, remove_alternatives: remove_alternatives)
2232
+ end
2233
+
2234
+ # Return direct descendants of a given term
2235
+ # ===== Parameters
2236
+ # +term+:: which descendants are requested
2237
+ # +remove_alternatives+:: if true, alternatives will be removed
2238
+ # ===== Returns
2239
+ # Direct descendants of given term or nil if any error occurs
2240
+ def get_direct_descendants(term, remove_alternatives: false)
2241
+ return self.get_direct_related(term, :descendant, remove_alternatives: remove_alternatives)
2242
+ end
2243
+
2244
+ def each(att = false)
2245
+ @stanzas[:terms].each do |id, tags|
2246
+ next if @alternatives_index.include?(id)
2247
+ if att
2248
+ yield(id, tags)
2249
+ else
2250
+ yield(id)
2251
+ end
2252
+ end
2253
+ end
2254
+
2255
+ def list_term_attributes
2256
+ terms = []
2257
+ each do |code|
2258
+ terms << [code, translate_id(code), get_term_level(code)]
2259
+ end
2260
+ return terms
2261
+ end
2262
+
2263
+ #============================================================================
2264
+ #============================================================================
2265
+
2266
+ # NO IDEA WHAT THIS DOES. DON'T USE THIS METHODS IS NOT CHECKED
2267
+ # ===== Parameters
2268
+ # ++::
2269
+ # ===== Returns
2270
+ # ...
2271
+ def compute_relations_to_items(external_item_list, total_items, mode, thresold)
2272
+ terms_levels = list_terms_per_level_from_items
2273
+ #puts terms_levels.inspect.yellow
2274
+ connect_familiars!(terms_levels)
2275
+ #puts terms_levels.inspect.blue
2276
+ item_list_with_transf_parental = get_item_list_parental(terms_levels)
2277
+ results = []
2278
+ if mode == :elim
2279
+ results = compute_relations_elim(terms_levels, external_item_list, total_items, thresold, item_list_with_transf_parental)
2280
+ elsif mode == :weight
2281
+ results = compute_relations_weight(terms_levels, external_item_list, total_items, item_list_with_transf_parental)
2282
+ end
2283
+ return results
2284
+ end
2285
+
2286
+ def get_item_list_parental(terms_levels)
2287
+ transfered_list = {}
2288
+ parent_dict = @dicts[:is_a][:byTerm]
2289
+ levels = terms_levels.keys.sort
2290
+ while levels.length > 1
2291
+ level = levels.pop
2292
+ terms_levels[level].each do |term|
2293
+ parents = parent_dict[term]
2294
+ if parents.nil?
2295
+ next
2296
+ elsif parents.length == 1
2297
+ parent = parents.first
2298
+ else
2299
+ parent = (parents | terms_levels[level - 1]).first
2300
+ end
2301
+ term_it = @items[term]
2302
+ parent_it = @items[parent]
2303
+ curr_it = transfered_list[term]
2304
+ parent_all_items = merge_groups([term_it, parent_it, curr_it])
2305
+ transfered_list[parent] = parent_all_items if !parent_all_items.empty?
2306
+ term_all_items = merge_groups([term_it, curr_it])
2307
+ transfered_list[term] = term_all_items if !term_all_items.empty?
2308
+ end
2309
+ end
2310
+ terms_levels[levels.first].each do |term| # Rescue lower level terms that not have children so they cannot receive items
2311
+ transfered_list[term] = @items[term] if transfered_list[term].nil?
2312
+ end
2313
+ return transfered_list
2314
+ end
2315
+
2316
+ def merge_groups(groups)
2317
+ return groups.compact.inject([]){|it, a| it | a}
2318
+ end
2319
+
2320
+ def list_terms_per_level_from_items
2321
+ terms_levels = {}
2322
+ @items.each do |term, items|
2323
+ level = self.get_term_level(term)
2324
+ query = terms_levels[level]
2325
+ if query.nil?
2326
+ terms_levels[level] = [term]
2327
+ else
2328
+ query << term
2329
+ end
2330
+ end
2331
+ return terms_levels
2332
+ end
2333
+
2334
+ def connect_familiars!(terms_levels)
2335
+ levels = terms_levels.keys.sort
2336
+ while levels.length > 1 # Process when current level has a parental level
2337
+ level = levels.pop
2338
+ parental_level = level - 1
2339
+ parental_terms = terms_levels[parental_level]
2340
+ if parental_terms.nil? # The queried parent level not exists but there is a parental level above of the non existant
2341
+ parental_terms = [] # Initialize required parental level
2342
+ terms_levels[parental_level] = parental_terms
2343
+ levels << parental_level
2344
+ end
2345
+ terms_levels[level].each do |term|
2346
+ path_info = @term_paths[term]
2347
+ shortest_path_length = path_info[:shortest_path]
2348
+ path = path_info[:paths].select{|p| p.length == shortest_path_length}.first
2349
+ parental = path[1] # the first elements is the term itself
2350
+ parental_terms << parental if !parental_terms.include?(parental)
2351
+ end
2352
+ end
2353
+ end
2354
+
2355
+ def compute_relations_elim(terms_levels, external_item_list, total_items, thresold, item_list)
2356
+ results = []
2357
+ penalized_terms = {}
2358
+ levels = terms_levels.keys.sort
2359
+ levels.reverse_each do |level|
2360
+ terms_levels[level].each do |term|
2361
+ associated_items = item_list[term]
2362
+ items_to_remove = penalized_terms[term]
2363
+ items_to_remove = [] if items_to_remove.nil?
2364
+ pval = get_fisher_exact_test(
2365
+ external_item_list - items_to_remove,
2366
+ associated_items - items_to_remove,
2367
+ #((associated_items | external_item_list) - items_to_remove).length
2368
+ total_items
2369
+ )
2370
+ if pval <= thresold
2371
+ parents = get_ancestors(term) # Save the items for each parent term to remove them later in the fisher test
2372
+ parents.each do |prnt|
2373
+ query = penalized_terms[prnt]
2374
+ if query.nil?
2375
+ penalized_terms[prnt] = item_list[term].clone # We need a new array to store the following iterations
2376
+ else
2377
+ query.concat(item_list[term])
2378
+ end
2379
+ end
2380
+ end
2381
+ results << [term, pval]
2382
+ end
2383
+ end
2384
+ return results
2385
+ end
2386
+
2387
+ def compute_relations_weight(terms_levels, external_item_list, total_items, item_list)
2388
+ pvals = {}
2389
+ item_weigths_per_term = Hash.new { |hash, key| Hash.new(1) } #https://mensfeld.pl/2016/09/ruby-hash-default-value-be-cautious-when-you-use-it/
2390
+ levels = terms_levels.keys.sort
2391
+ levels.reverse_each do |level|
2392
+ terms_levels[level].each do |term|
2393
+ associated_items = item_list[term]
2394
+ #initialize observed items in item_weigths_per_term list
2395
+ add_items_to_weigthed_list(term, associated_items, item_weigths_per_term) if !associated_items.nil?
2396
+ children = @dicts[:is_a][:byValue][term]
2397
+ if children.nil?
2398
+ children = []
2399
+ else
2400
+ children = children.select{|ch| item_weigths_per_term[ch].length > 0} # Only use children with items associated to them OR transfered to them
2401
+ end
2402
+ computeTermSig(term, children, external_item_list, total_items, pvals, item_weigths_per_term)
2403
+ end
2404
+ end
2405
+ return pvals.to_a
2406
+ end
2407
+
2408
+ def add_items_to_weigthed_list(term, associated_items, weigthed_list)
2409
+ term_weigthing = weigthed_list[term]
2410
+ associated_items.each{|ai| term_weigthing[ai] = 1}
2411
+ weigthed_list[term] = term_weigthing
2412
+ end
2413
+
2414
+ def computeTermSig(term, children, external_item_list, total_items, pvals, item_weigths_per_term)
2415
+ #puts term.to_s.red
2416
+ #puts @term_paths[term].inspect
2417
+ #puts @dicts[:is_a][:byValue][term].inspect.light_blue
2418
+ associated_items = item_weigths_per_term[term].keys
2419
+ pval = get_fisher_exact_test(external_item_list, associated_items, total_items,
2420
+ 'two_sided', item_weigths_per_term[term], true)
2421
+ pvals[term] = pval
2422
+ if children.length > 0
2423
+ rates = {}
2424
+ sig_child = 0
2425
+ children.each do |child|
2426
+ ratio = sigRatio(pvals[child], pval)
2427
+ rates[child] = ratio
2428
+ sig_child += 1 if ratio >= 1
2429
+ end
2430
+ if sig_child == 0 # CASE 1
2431
+ children.each do |child|
2432
+ current_ratio = rates[child]
2433
+ query_child = item_weigths_per_term[child]
2434
+ query_child.transform_values!{|weight| weight * current_ratio}
2435
+ pvals[child] = get_fisher_exact_test(external_item_list, item_weigths_per_term[child].keys, total_items,
2436
+ 'two_sided', item_weigths_per_term[child], true)
2437
+ end
2438
+ else
2439
+ ancs = get_ancestors(term, filter_alternatives = true)
2440
+ ancs << term
2441
+ rates.each do |ch, ratio|# CASE 2
2442
+ if ratio >= 1 # The child is better than parent
2443
+ ancs.each do |anc|
2444
+ query_anc = item_weigths_per_term[anc]
2445
+ associated_items.each do |item|
2446
+ query_anc[item] /= ratio # /= --> query_anc[item]/ratio
2447
+ end
2448
+ end
2449
+ end
2450
+ end
2451
+ computeTermSig(term, children - rates.keys, external_item_list, total_items, pvals, item_weigths_per_term)
2452
+ end
2453
+ end
2454
+ end
2455
+
2456
+ def sigRatio(pvalA, pvalB)
2457
+ return Math.log(pvalA)/Math.log(pvalB)
2458
+ end
2459
+
2460
+ def profile_stats
2461
+ stats = Hash.new(0)
2462
+ data = @profiles.values.map{|ont_ids| ont_ids.size}
2463
+ stats[:average] = data.sum().fdiv(data.size)
2464
+ sum_devs = data.sum{|element| (element - stats[:avg]) ** 2}
2465
+ stats[:variance] = sum_devs.fdiv(data.size)
2466
+ stats[:standardDeviation] = stats[:variance] ** 0.5
2467
+ stats[:max] = data.max
2468
+ stats[:min] = data.min
2469
+
2470
+ stats[:count] = data.size
2471
+ data.each do |value|
2472
+ stats[:countNonZero] += 1 if value != 0
2473
+ end
2474
+
2475
+ stats[:q1] = data.get_quantiles(0.25)
2476
+ stats[:median] = data.get_quantiles(0.5)
2477
+ stats[:q3] = data.get_quantiles(0.75)
2478
+ return stats
2479
+
2480
+ end
2481
+
2482
+ #============================================================================
2483
+ #============================================================================
2484
+
2485
+ # Check if a given ID is a removable (blacklist) term.
2486
+ # +DEPRECATED+ use is_removable? instead
2487
+ # ===== Parameters
2488
+ # +id+:: to be checked
2489
+ # ===== Returns
2490
+ # true if given term is a removable (blacklist) term or false in other cases
2491
+ def is_removable(id)
2492
+ warn "[DEPRECATION] `is_removable` is deprecated. Please use `is_removable?` instead."
2493
+ return @removable_terms.include?(id.to_sym)
2494
+ end
2495
+
2496
+ # Check if a given ID is a removable (blacklist) term
2497
+ # ===== Parameters
2498
+ # +id+:: to be checked
2499
+ # ===== Returns
2500
+ # true if given term is a removable (blacklist) term or false in other cases
2501
+ def is_removable? id
2502
+ return @removable_terms.include?(id.to_sym)
2503
+ end
2504
+
2505
+ ############################################
2506
+ # SPECIAL METHODS
2507
+ #############################################
2508
+ def ==(other)
2509
+ self.header == other.header &&
2510
+ self.stanzas == other.stanzas &&
2511
+ self.ancestors_index == other.ancestors_index &&
2512
+ self.alternatives_index == other.alternatives_index &&
2513
+ self.obsoletes_index == other.obsoletes_index &&
2514
+ self.structureType == other.structureType &&
2515
+ self.ics == other.ics &&
2516
+ self.meta == other.meta &&
2517
+ self.dicts == other.dicts &&
2518
+ self.profiles == other.profiles &&
2519
+ self.profilesDict == other.profilesDict &&
2520
+ (self.items.keys - other.items.keys).empty? &&
2521
+ self.removable_terms == other.removable_terms &&
2522
+ self.special_tags == other.special_tags &&
2523
+ self.items == other.items &&
2524
+ self.term_paths == other.term_paths &&
2525
+ self.max_freqs == other.max_freqs
2008
2526
  end
2009
2527
 
2010
2528
 
2011
2529
  def clone
2012
- copy = Ontology.new
2013
- copy.header = self.header.clone
2014
- copy.stanzas[:terms] = self.stanzas[:terms].clone
2015
- copy.stanzas[:typedefs] = self.stanzas[:typedefs].clone
2016
- copy.stanzas[:instances] = self.stanzas[:instances].clone
2017
- copy.ancestors_index = self.ancestors_index.clone
2018
- copy.descendants_index = self.descendants_index.clone
2019
- copy.alternatives_index = self.alternatives_index.clone
2020
- copy.obsoletes_index = self.obsoletes_index.clone
2021
- copy.structureType = self.structureType.clone
2022
- copy.ics = self.ics.clone
2023
- copy.meta = self.meta.clone
2024
- copy.dicts = self.dicts.clone
2025
- copy.profiles = self.profiles.clone
2026
- copy.profilesDict = self.profilesDict.clone
2027
- copy.items = self.items.clone
2028
- copy.removable_terms = self.removable_terms.clone
2029
- copy.term_paths = self.term_paths.clone
2030
- copy.max_freqs = self.max_freqs.clone
2031
- return copy
2032
- end
2033
-
2034
-
2035
- #############################################
2036
- # ACCESS CONTROL
2037
- #############################################
2038
-
2039
- attr_reader :file, :header, :stanzas, :ancestors_index, :descendants_index, :special_tags, :alternatives_index, :obsoletes_index, :structureType, :ics, :max_freqs, :meta, :dicts, :profiles, :profilesDict, :items, :removable_terms, :term_paths
2040
- attr_writer :file, :header, :stanzas, :ancestors_index, :descendants_index, :special_tags, :alternatives_index, :obsoletes_index, :structureType, :ics, :max_freqs, :meta, :dicts, :profiles, :profilesDict, :items, :removable_terms, :term_paths
2530
+ copy = Ontology.new
2531
+ copy.header = self.header.clone
2532
+ copy.stanzas[:terms] = self.stanzas[:terms].clone
2533
+ copy.stanzas[:typedefs] = self.stanzas[:typedefs].clone
2534
+ copy.stanzas[:instances] = self.stanzas[:instances].clone
2535
+ copy.ancestors_index = self.ancestors_index.clone
2536
+ copy.descendants_index = self.descendants_index.clone
2537
+ copy.alternatives_index = self.alternatives_index.clone
2538
+ copy.obsoletes_index = self.obsoletes_index.clone
2539
+ copy.structureType = self.structureType.clone
2540
+ copy.ics = self.ics.clone
2541
+ copy.meta = self.meta.clone
2542
+ copy.dicts = self.dicts.clone
2543
+ copy.profiles = self.profiles.clone
2544
+ copy.profilesDict = self.profilesDict.clone
2545
+ copy.items = self.items.clone
2546
+ copy.removable_terms = self.removable_terms.clone
2547
+ copy.term_paths = self.term_paths.clone
2548
+ copy.max_freqs = self.max_freqs.clone
2549
+ return copy
2550
+ end
2551
+
2552
+
2553
+ #############################################
2554
+ # ACCESS CONTROL
2555
+ #############################################
2556
+
2557
+ attr_reader :file, :header, :stanzas, :ancestors_index, :descendants_index, :special_tags, :alternatives_index, :obsoletes_index, :structureType, :ics, :max_freqs, :meta, :dicts, :profiles, :profilesDict, :items, :removable_terms, :term_paths
2558
+ attr_writer :file, :header, :stanzas, :ancestors_index, :descendants_index, :special_tags, :alternatives_index, :obsoletes_index, :structureType, :ics, :max_freqs, :meta, :dicts, :profiles, :profilesDict, :items, :removable_terms, :term_paths
2041
2559
  end