semtools 0.1.8 → 0.1.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ class FileParser
2
+ #############################################
3
+ # FIELDS
4
+ #############################################
5
+ # Handled class variables
6
+ # => @@basic_tags :: hash with main OBO structure tags
7
+ # => @@symbolizable_ids :: tags which can be symbolized
8
+ # => @@tags_with_trailing_modifiers :: tags which can include extra info after specific text modifiers
9
+
10
+ @@basic_tags = {ancestors: [:is_a], obsolete: :is_obsolete, alternative: [:replaced_by,:consider,:alt_id]}
11
+ @@symbolizable_ids = [:id, :alt_id, :replaced_by, :consider]
12
+ @@tags_with_trailing_modifiers = [:is_a, :union_of, :disjoint_from, :relationship, :subsetdef, :synonymtypedef, :property_value]
13
+ @@multivalue_tags = [:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider, :subsetdef, :synonymtypedef, :property_value, :remark]
14
+ @@symbolizable_ids.concat(@@tags_with_trailing_modifiers)
15
+
16
+ # Symboliza all values into hashs using symbolizable tags as keys
17
+ # ===== Parameters
18
+ # +item_hash+:: hash to be checked
19
+ def self.symbolize_ids(item_hash)
20
+ @@symbolizable_ids.each do |tag|
21
+ query = item_hash[tag]
22
+ if !query.nil?
23
+ if query.kind_of?(Array)
24
+ query.map!{|item| item.to_sym}
25
+ else
26
+ item_hash[tag] = query.to_sym if !query.nil?
27
+ end
28
+ end
29
+ end
30
+ end
31
+
32
+ end
@@ -0,0 +1,84 @@
1
+ class JsonParser < FileParser
2
+
3
+ def self.load(ontology, file, build: true)
4
+ read(ontology, file)
5
+ end
6
+
7
+ # Read a JSON file with an OBO_Handler object stored
8
+ # ===== Parameters
9
+ # +file+:: with object info
10
+ # +file+:: if true, calculate indexes. Default: true
11
+ # ===== Return
12
+ # OBO_Handler internal fields
13
+ def self.read(ontology, file, build: true)
14
+ # Read file
15
+ jsonFile = File.open(file)
16
+ jsonInfo = JSON.parse(jsonFile.read, :symbolize_names => true)
17
+ # Pre-process (Symbolize some hashs values)
18
+ jsonInfo[:terms].map{|id,info| symbolize_ids(info)} # STANZAS
19
+ # Optional
20
+ jsonInfo[:alternatives_index] = jsonInfo[:alternatives_index].map{|id,value| [id, value.to_sym]}.to_h unless jsonInfo[:alternatives_index].nil?
21
+ jsonInfo[:ancestors_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}} unless jsonInfo[:ancestors_index].nil?
22
+ jsonInfo[:descendants_index].map {|id,family_arr| family_arr.map!{|item| item.to_sym}} unless jsonInfo[:descendants_index].nil?
23
+ jsonInfo[:dicts] = jsonInfo[:dicts].each do |flag, dictionaries|
24
+ next if dictionaries.nil?
25
+ # Special case: byTerm
26
+ dictionaries[:byTerm] = dictionaries[:byTerm].map do |term, value|
27
+ if !term.to_s.scan(/\A[-+]?[0-9]*\.?[0-9]+\Z/).empty? # Numeric dictionary
28
+ [term.to_s.to_i, value.map{|term| term.to_sym}]
29
+ elsif value.is_a? Numeric # Numeric dictionary
30
+ [term.to_sym, value]
31
+ elsif value.kind_of?(Array) && flag == :is_a
32
+ [term.to_sym, value.map{|v| v.to_sym}]
33
+ else
34
+ [term.to_sym, value]
35
+ end
36
+ end
37
+ dictionaries[:byTerm] = dictionaries[:byTerm].to_h
38
+ # By value
39
+ dictionaries[:byValue] = dictionaries[:byValue].map do |value, term|
40
+ if value.is_a? Numeric # Numeric dictionary
41
+ [value, term.to_sym]
42
+ elsif term.is_a? Numeric # Numeric dictionary
43
+ [value.to_s.to_sym, term]
44
+ elsif flag == :is_a
45
+ [value.to_sym, term.map{|v| v.to_sym}]
46
+ elsif term.kind_of?(Array)
47
+ [value.to_sym, term.map{|t| t.to_sym}]
48
+ else
49
+ [value.to_s, term.to_sym]
50
+ end
51
+ end
52
+ dictionaries[:byValue] = dictionaries[:byValue].to_h
53
+ end
54
+ if !jsonInfo[:profiles].nil?
55
+ jsonInfo[:profiles].map{|id,terms| terms.map!{|term| term.to_sym}}
56
+ jsonInfo[:profiles].keys.map{|id| jsonInfo[:profiles][id.to_s.to_i] = jsonInfo[:profiles].delete(id) if self.is_number?(id.to_s)}
57
+ end
58
+ jsonInfo[:removable_terms] = jsonInfo[:removable_terms].map{|term| term.to_sym} unless jsonInfo[:removable_terms].nil?
59
+ jsonInfo[:items].each{|k,v| jsonInfo[:items][k] = v.map{|item| item.to_sym}} unless jsonInfo[:items].nil?
60
+ jsonInfo[:term_paths].each{|term,info| jsonInfo[:term_paths][term][:paths] = info[:paths].map{|path| path.map{|t| t.to_sym}}} unless jsonInfo[:term_paths].nil?
61
+
62
+ # Store info
63
+ ontology.terms = jsonInfo[:terms]
64
+ ontology.ancestors_index = jsonInfo[:ancestors_index]
65
+ ontology.descendants_index = jsonInfo[:descendants_index]
66
+ ontology.alternatives_index = jsonInfo[:alternatives_index]
67
+ jsonInfo[:structureType] = jsonInfo[:structureType].to_sym unless jsonInfo[:structureType].nil?
68
+ ontology.structureType = jsonInfo[:structureType]
69
+ ontology.ics = jsonInfo[:ics]
70
+ ontology.meta = jsonInfo[:meta]
71
+ ontology.max_freqs = jsonInfo[:max_freqs]
72
+ ontology.dicts = jsonInfo[:dicts]
73
+ ontology.profiles = jsonInfo[:profiles]
74
+ ontology.items = jsonInfo[:items]
75
+ ontology.term_paths = jsonInfo[:term_paths]
76
+
77
+ ontology.precompute() if build
78
+ end
79
+
80
+ def self.is_number? string
81
+ true if Float(string) rescue false
82
+ end
83
+
84
+ end
@@ -0,0 +1,511 @@
1
+ class OboParser < FileParser
2
+
3
+ #############################################
4
+ # FIELDS
5
+ #############################################
6
+ # => @header :: file header (if is available)
7
+ # => @stanzas :: OBO stanzas {:terms,:typedefs,:instances}
8
+ # => @ancestors_index :: hash of ancestors per each term handled with any structure relationships
9
+ # => @descendants_index :: hash of descendants per each term handled with any structure relationships
10
+ # => @alternatives_index :: has of alternative IDs (include alt_id and obsoletes)
11
+ # => @special_tags :: set of special tags to be expanded (:is_a, :obsolete, :alt_id)
12
+ # => @structureType :: type of ontology structure depending on ancestors relationship. Allowed: {atomic, sparse, circular, hierarchical}
13
+ # => @dicts :: bidirectional dictionaries with three levels <key|value>: 1º) <tag|hash2>; 2º) <(:byTerm/:byValue)|hash3>; 3º) dictionary <k|v>
14
+ # => @removable_terms :: array of terms to not be considered
15
+
16
+ @@header = nil
17
+ @@stanzas = {terms: {}, typedefs: {}, instances: {}}
18
+ @@removable_terms = []
19
+ @@alternatives_index = {}
20
+ @@obsoletes = {}
21
+ @@structureType = nil
22
+ @@ancestors_index = {}
23
+ @@descendants_index = {}
24
+ @@reroot = false
25
+ @@dicts = {}
26
+
27
+ def self.reset
28
+ @@header = nil
29
+ @@stanzas = {terms: {}, typedefs: {}, instances: {}}
30
+ @@removable_terms = []
31
+ @@alternatives_index = {}
32
+ @@obsoletes = {}
33
+ @@structureType = nil
34
+ @@ancestors_index = {}
35
+ @@descendants_index = {}
36
+ @@reroot = false
37
+ @@dicts = {}
38
+ end
39
+
40
+ def self.each(att = false, only_main = true)
41
+ warn('stanzas terms empty') if @@stanzas[:terms].empty?
42
+ @@stanzas[:terms].each do |id, tags|
43
+ next if only_main && (@@alternatives_index.include?(id) || @@obsoletes.include?(id))
44
+ if att
45
+ yield(id, tags)
46
+ else
47
+ yield(id)
48
+ end
49
+ end
50
+ end
51
+
52
+ def self.load(ontology, file, build: true, black_list: [], extra_dicts: [])
53
+ reset # Clean class variables to avoid the mix of several obo loads
54
+ @@removable_terms = black_list
55
+ _, header, stanzas = self.load_obo(file)
56
+ @@header = header
57
+ @@stanzas = stanzas
58
+ self.remove_black_list_terms() if !@@removable_terms.empty?
59
+ self.build_index(ontology, extra_dicts: extra_dicts) if build
60
+ end
61
+
62
+ # Class method to load an OBO format file (based on OBO 1.4 format). Specially focused on load
63
+ # the Header, the Terms, the Typedefs and the Instances.
64
+ # ===== Parameters
65
+ # +file+:: OBO file to be loaded
66
+ # ===== Returns
67
+ # Hash with FILE, HEADER and STANZAS info
68
+ def self.load_obo(file)
69
+ raise("File is not defined") if file.nil?
70
+ # Data variables
71
+ header = ''
72
+ stanzas = {terms: {}, typedefs: {}, instances: {}}
73
+ # Auxiliar variables
74
+ infoType = 'Header'
75
+ currInfo = []
76
+ stanzas_flags = %w[[Term] [Typedef] [Instance]]
77
+ # Read file
78
+ File.open(file).each do |line|
79
+ line.chomp!
80
+ next if line.empty?
81
+ fields = line.split(':', 2)
82
+ # Check if new instance is found
83
+ if stanzas_flags.include?(line)
84
+ header = self.process_entity(header, infoType, stanzas, currInfo)
85
+ # Update info variables
86
+ currInfo = []
87
+ infoType = line.gsub!(/[\[\]]/, '')
88
+ next
89
+ end
90
+ # Concat info
91
+ currInfo << fields
92
+ end
93
+ # Store last loaded info
94
+ header = self.process_entity(header, infoType, stanzas, currInfo) if !currInfo.empty?
95
+ # Prepare to return
96
+ finfo = {:file => file, :name => File.basename(file, File.extname(file))}
97
+ return finfo, header, stanzas
98
+ end
99
+
100
+ # Handle OBO loaded info and stores it into correct container and format
101
+ # ===== Parameters
102
+ # +header+:: container
103
+ # +infoType+:: current ontology item type detected
104
+ # +stanzas+:: container
105
+ # +currInfo+:: info to be stored
106
+ # ===== Returns
107
+ # header newly/already stored
108
+ def self.process_entity(header, infoType, stanzas, currInfo)
109
+ info = self.info2hash(currInfo)
110
+ # Store current info
111
+ if infoType.eql?('Header')
112
+ header = info
113
+ else
114
+ id = info[:id]
115
+ case infoType
116
+ when 'Term'
117
+ stanzas[:terms][id] = info
118
+ when 'Typedef'
119
+ stanzas[:typedefs][id] = info
120
+ when 'Instance'
121
+ stanzas[:instances][id] = info
122
+ end
123
+ end
124
+ return header
125
+ end
126
+
127
+ # Class method to transform string with <tag : info> into hash structure
128
+ # ===== Parameters
129
+ # +attributes+:: array tuples with info to be transformed into hash format
130
+ # ===== Returns
131
+ # Attributes stored into hash structure
132
+ def self.info2hash(attributes, split_char = " ! ", selected_field = 0)
133
+ # Load info
134
+ info_hash = {}
135
+ # Only TERMS multivalue tags (future add Typedefs and Instance)
136
+ # multivalue_tags = [:alt_id, :is_a, :subset, :synonym, :xref, :intersection_of, :union_of, :disjoint_from, :relationship, :replaced_by, :consider]
137
+ attributes.each do |tag, value|
138
+ value.gsub!(/{[\\\":A-Za-z0-9\/\.\-, =?&_]+} /, '') if tag == 'is_a' # To delete extra attributes (source, xref) in is_a tag of MONDO ontology
139
+ # Check
140
+ raise EncodingError, 'Info element incorrect format' if (tag.nil?) || (value.nil?)
141
+ # Prepare
142
+ tag = tag.lstrip.to_sym
143
+ value.lstrip!
144
+ value = value.split(split_char)[selected_field].to_sym if @@tags_with_trailing_modifiers.include?(tag)
145
+
146
+ # Store
147
+ query = info_hash[tag]
148
+ if !query.nil? # Tag already exists
149
+ if !query.kind_of?(Array) # Check that tag is multivalue
150
+ raise('Attempt to concatenate plain text with another. The tag is not declared as multivalue. [' + tag.to_s + '](' + query + ')')
151
+ else
152
+ query << value # Add new value to tag
153
+ end
154
+ else # New entry
155
+ if @@multivalue_tags.include?(tag)
156
+ info_hash[tag] = [value]
157
+ else
158
+ info_hash[tag] = value
159
+ end
160
+ end
161
+ end
162
+ self.symbolize_ids(info_hash)
163
+ return info_hash
164
+ end
165
+
166
+ def self.remove_black_list_terms()
167
+ @@removable_terms.each{|removableID| @@stanzas[:terms].delete(removableID)}
168
+ end
169
+
170
+ # Executes basic expansions of tags (alternatives, obsoletes and parentals) with default values
171
+ # ===== Returns
172
+ # true if eprocess ends without errors and false in other cases
173
+ def self.build_index(ontology, extra_dicts: [])
174
+ self.get_index_obsoletes
175
+ self.get_index_alternatives
176
+ self.remove_obsoletes_in_terms
177
+ self.get_index_child_parent_relations
178
+ @@alternatives_index.transform_values!{|v| self.extract_id(v)}
179
+ @@alternatives_index.compact!
180
+ @@ancestors_index.each{|k,v| @@ancestors_index[k] = v.map{|t| self.extract_id(t)}.compact}
181
+ @@descendants_index.each{|k,v| @@descendants_index[k] = v.map{|t| self.extract_id(t)}.compact}
182
+ self.calc_dictionary(:name)
183
+ self.calc_dictionary(:synonym, select_regex: /\"(.*)\"/)
184
+ self.calc_ancestors_dictionary
185
+ extra_dicts.each do |dict_tag, extra_parameters|
186
+ self.calc_dictionary(dict_tag, **extra_parameters) # https://www.justinweiss.com/articles/fun-with-keyword-arguments/
187
+ end
188
+ ontology.terms = @@stanzas[:terms]
189
+ ontology.alternatives_index = @@alternatives_index
190
+ ontology.obsoletes = @@obsoletes
191
+ ontology.ancestors_index = @@ancestors_index
192
+ ontology.descendants_index = @@descendants_index
193
+ ontology.reroot = @@reroot
194
+ ontology.structureType = @@structureType
195
+ ontology.dicts = @@dicts
196
+
197
+ end
198
+
199
+ def self.remove_obsoletes_in_terms() # once alternative and obsolete indexes are loaded, use this to keep only working terms
200
+ terms = @@stanzas[:terms]
201
+ @@obsoletes.each do |term, val|
202
+ terms.delete(term)
203
+ end
204
+ end
205
+
206
+
207
+ # Expand obsoletes set and link info to their alternative IDs
208
+ # ===== Parameters
209
+ # +obs_tags+:: tags to be used to find obsoletes
210
+ # +alt_tags+:: tags to find alternative IDs (if are available)
211
+ # ===== Returns
212
+ # true if process ends without errors and false in other cases
213
+ def self.get_index_obsoletes(obs_tag: @@basic_tags[:obsolete], alt_tags: @@basic_tags[:alternative])
214
+ each(att = true) do |id, term_tags|
215
+ obs_value = term_tags[obs_tag]
216
+ if obs_value == 'true' # Obsolete tag presence, must be checked as string
217
+ alt_ids = alt_tags.map{|alt| term_tags[alt]}.compact # Check if alternative value is available
218
+ if !alt_ids.empty?
219
+ alt_id = alt_ids.first.first #FIRST tag, FIRST id
220
+ @@alternatives_index[id] = alt_id
221
+ end
222
+ @@obsoletes[id] = true
223
+ end
224
+ end
225
+ end
226
+
227
+ # Expand alternative IDs arround all already stored terms
228
+ # ===== Parameters
229
+ # +alt_tag+:: tag used to expand alternative IDs
230
+ # ===== Returns
231
+ # true if process ends without errors and false in other cases
232
+ def self.get_index_alternatives(alt_tag: @@basic_tags[:alternative].last)
233
+ each(att = true) do |id, tags|
234
+ alt_ids = tags[alt_tag]
235
+ if !alt_ids.nil?
236
+ alt_ids = alt_ids - @@removable_terms - [id]
237
+ alt_ids.each do |alt_term|
238
+ @@alternatives_index[alt_term] = id
239
+ end
240
+ end
241
+ end
242
+ end
243
+
244
+ # Expand parentals set. Also launch frequencies process
245
+ # ===== Parameters
246
+ # +tag+:: tag used to expand parentals
247
+ # ===== Returns
248
+ # true if process ends without errors and false in other cases
249
+ def self.get_index_child_parent_relations(tag: @@basic_tags[:ancestors][0])
250
+ structType, parentals = self.get_related_ids_by_tag(terms: @@stanzas[:terms],
251
+ target_tag: tag,
252
+ reroot: @@reroot)
253
+ if structType.nil? || parentals.nil?
254
+ raise('Error expanding parentals')
255
+ elsif ![:atomic,:sparse].include?(structType) # Check structure
256
+ structType = structType == :circular ? :circular : :hierarchical
257
+ end
258
+ @@structureType = structType
259
+
260
+ parentals.each do |id, parents|
261
+ parents = parents - @@removable_terms
262
+ @@ancestors_index[id] = parents
263
+ parents.each{|anc_id| self.add2hash(@@descendants_index, anc_id, id)}
264
+ end
265
+ end
266
+
267
+
268
+ # Expand terms using a specific tag and return all extended terms into an array and
269
+ # the relationship structuture observed (hierarchical or circular). If circular structure is
270
+ # foumd, extended array will be an unique vector without starting term (no loops)
271
+ # ===== Parameters
272
+ # +terms+:: set to be used to expand
273
+ # +target_tag+:: tag used to expand
274
+ # ===== Returns
275
+ # A vector with the observed structure (string) and the hash with extended terms
276
+ def self.get_related_ids_by_tag(terms:, target_tag:, reroot: false)
277
+ structType = :hierarchical
278
+ related_ids = {}
279
+ terms.each do |id, tags|
280
+ if !tags[target_tag].nil?
281
+ set_structure, _ = self.get_related_ids(id, terms, target_tag, related_ids)
282
+ structType = :circular if set_structure == :circular # Check structure
283
+ end
284
+ end
285
+
286
+ # Check special case
287
+ structType = :atomic if related_ids.length <= 0
288
+ structType = :sparse if reroot || (related_ids.length > 0 && ((terms.length - related_ids.length ) >= 2) )
289
+ return structType, related_ids
290
+ end
291
+
292
+ # Expand a (starting) term using a specific tag and return all extended terms into an array and
293
+ # the relationship structuture observed (hierarchical or circular). If circular structure is
294
+ # foumd, extended array will be an unique vector without starting term (no loops).
295
+ # +Note+: we extremly recomend use get_related_ids_by_tag function instead of it (directly)
296
+ # ===== Parameters
297
+ # +start+:: term where start to expand
298
+ # +terms+:: set to be used to expand
299
+ # +target_tag+:: tag used to expand
300
+ # +eexpansion+:: already expanded info
301
+ # ===== Returns
302
+ # A vector with the observed structure (string) and the array with extended terms.
303
+ def self.get_related_ids(start_id, terms, target_tag, related_ids = {})
304
+ # Take start_id term available info and already accumulated info
305
+ current_associations = related_ids[start_id]
306
+ current_associations = [] if current_associations.nil?
307
+ return [:no_term,[]] if terms[start_id].nil?
308
+ id_relations = terms[start_id][target_tag]
309
+ return [:source,[]] if id_relations.nil?
310
+
311
+ struct = :hierarchical
312
+
313
+ # Study direct extensions
314
+ id_relations.each do |id|
315
+ # Handle
316
+ if current_associations.include?(id) # Check if already have been included into this expansion
317
+ next
318
+ #struct = :circular # Old code that give circular status in real obo files. The apparent logic no makes sense. The change gives no error in tests.
319
+ # TODO: CHECK CAREFULLY THIS METHOD.
320
+ else
321
+ current_associations << id
322
+ if related_ids.include?(id) # Check if current already has been expanded
323
+ current_associations = current_associations | related_ids[id]
324
+ if current_associations.include?(start_id) # Check circular case
325
+ struct = :circular
326
+ current_associations = current_associations - [id, start_id]
327
+ end
328
+ else # Expand
329
+ related_ids[start_id] = current_associations
330
+ structExp, current_related_ids = self.get_related_ids(id, terms, target_tag, related_ids) # Expand current
331
+ current_associations = current_associations | current_related_ids
332
+ struct = :circular if structExp == :circular # Check struct
333
+ if current_associations.include?(start_id) # Check circular case
334
+ struct = :circular
335
+ current_associations.delete(start_id)
336
+ end
337
+ end
338
+ end
339
+ end
340
+ related_ids[start_id] = current_associations
341
+
342
+ return struct, current_associations
343
+ end
344
+
345
+ # Calculates :is_a dictionary
346
+ def self.calc_ancestors_dictionary
347
+ self.calc_dictionary(:is_a, self_type_references: true, multiterm: true)
348
+ end
349
+
350
+ # Generate a bidirectinal dictionary set using a specific tag and terms stanzas set
351
+ # This functions stores calculated dictionary into @dicts field.
352
+ # This functions stores first value for multivalue tags
353
+ # This function does not handle synonyms for byValue dictionaries
354
+ # ===== Parameters
355
+ # +tag+:: to be used to calculate dictionary
356
+ # +select_regex+:: gives a regfex that can be used to modify value to be stored
357
+ # +store_tag+:: flag used to store dictionary. If nil, mandatory tag given will be used
358
+ # +multiterm+:: if true, byValue will allows multi-term linkage (array)
359
+ # +self_type_references+:: if true, program assumes that refrences will be between Ontology terms, and it term IDs will be checked
360
+ # ===== Return
361
+ # hash with dict data. And stores calcualted bidirectional dictonary into dictionaries main container
362
+ def self.calc_dictionary(tag, select_regex: nil, store_tag: nil, multiterm: false, self_type_references: false)
363
+ tag = tag.to_sym
364
+ store_tag = tag if store_tag.nil?
365
+
366
+ byTerm = {}
367
+ byValue = {}
368
+ # Calc per term
369
+ each(att = true, only_main = false) do |term, tags|
370
+ referenceTerm = term
371
+ queryTag = tags[tag]
372
+ if !queryTag.nil?
373
+ # Pre-process
374
+ if !select_regex.nil?
375
+ if queryTag.kind_of?(Array)
376
+ queryTag = queryTag.map{|value| value.scan(select_regex).first}
377
+ queryTag.flatten!
378
+ else
379
+ queryTag = queryTag.scan(select_regex).first
380
+ end
381
+ queryTag.compact!
382
+ end
383
+ if queryTag.kind_of?(Array) # Store
384
+ if !queryTag.empty?
385
+ if byTerm.include?(referenceTerm)
386
+ byTerm[referenceTerm] = (byTerm[referenceTerm] + queryTag).uniq
387
+ else
388
+ byTerm[referenceTerm] = queryTag
389
+ end
390
+ if multiterm
391
+ queryTag.each do |value|
392
+ byValue[value] = [] if byValue[value].nil?
393
+ byValue[value] << referenceTerm
394
+ end
395
+ else
396
+ queryTag.each{|value| byValue[value] = referenceTerm}
397
+ end
398
+ end
399
+ else
400
+ if byTerm.include?(referenceTerm)
401
+ byTerm[referenceTerm] = (byTerm[referenceTerm] + [queryTag]).uniq
402
+ else
403
+ byTerm[referenceTerm] = [queryTag]
404
+ end
405
+ if multiterm
406
+ byValue[queryTag] = [] if byValue[queryTag].nil?
407
+ byValue[queryTag] << referenceTerm
408
+ else
409
+ byValue[queryTag] = referenceTerm
410
+ end
411
+ end
412
+ end
413
+ end
414
+
415
+ # Check self-references
416
+ if self_type_references
417
+ byTerm.map do |term, references|
418
+ corrected_references = references.map do |t|
419
+ checked = self.extract_id(t)
420
+ if checked.nil?
421
+ t
422
+ else
423
+ byValue[checked] = byValue.delete(t) if checked != t && byValue[checked].nil? # Update in byValue
424
+ checked
425
+ end
426
+ end
427
+ byTerm[term] = corrected_references.uniq
428
+ end
429
+ end
430
+
431
+ # Check order
432
+ byTerm.map do |term,values|
433
+ if self.exists?(term)
434
+ referenceValue = @@stanzas[:terms][term][tag]
435
+ if !referenceValue.nil?
436
+ if !select_regex.nil?
437
+ if referenceValue.kind_of?(Array)
438
+ referenceValue = referenceValue.map{|value| value.scan(select_regex).first}
439
+ referenceValue.flatten!
440
+ else
441
+ referenceValue = referenceValue.scan(select_regex).first
442
+ end
443
+ referenceValue.compact!
444
+ end
445
+ if self_type_references
446
+ if referenceValue.kind_of?(Array)
447
+ aux = referenceValue.map{|t| self.extract_id(t)}
448
+ else
449
+ aux = self.extract_id(referenceValue)
450
+ end
451
+ aux.compact! unless aux.nil?
452
+ referenceValue = aux unless aux.nil?
453
+ end
454
+ referenceValue = [referenceValue] if !referenceValue.kind_of?(Array)
455
+ byTerm[term] = referenceValue + (values - referenceValue)
456
+ end
457
+ end
458
+ end
459
+
460
+ # Store
461
+ dict = {byTerm: byTerm, byValue: byValue}
462
+ @@dicts[store_tag] = dict
463
+ return dict
464
+ end
465
+
466
+ # Check if a given ID is stored as term into this object
467
+ # ===== Parameters
468
+ # +id+:: to be checked
469
+ # ===== Return
470
+ # True if term is allowed or false in other cases
471
+ def self.exists? id
472
+ return @@stanzas[:terms].include?(id)
473
+ end
474
+
475
+ # Check if a term given is marked as obsolete
476
+ def self.is_obsolete? term
477
+ return @@obsoletes.include?(term)
478
+ end
479
+
480
+ # Check if a term given is marked as alternative
481
+ def self.is_alternative? term
482
+ return @@alternatives_index.include?(term)
483
+ end
484
+
485
+ # This method assumes that a text given contains an allowed ID. And will try to obtain it splitting it
486
+ # ===== Parameters
487
+ # +text+:: to be checked
488
+ # ===== Return
489
+ # The correct ID if it can be found or nil in other cases
490
+ def self.extract_id(text, splitBy: ' ')
491
+ if self.exists?(text)
492
+ return text
493
+ else
494
+ splittedText = text.to_s.split(splitBy).first.to_sym
495
+ return self.exists?(splittedText) ? splittedText : nil
496
+ end
497
+ end
498
+
499
+
500
+ private
501
+
502
+ def self.add2hash(hash, key, val)
503
+ query = hash[key]
504
+ if query.nil?
505
+ hash[key] = [val]
506
+ else
507
+ query << val
508
+ end
509
+ end
510
+
511
+ end
@@ -1,3 +1,3 @@
1
1
  module Semtools
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.91"
3
3
  end
data/lib/semtools.rb CHANGED
@@ -1,6 +1,9 @@
1
1
  require "semtools/version"
2
2
  require "semtools/sim_handler"
3
3
  require "semtools/ontology"
4
+ require "semtools/parsers/file_parser"
5
+ require "semtools/parsers/json_parser"
6
+ require "semtools/parsers/oboparser"
4
7
 
5
8
  module Semtools
6
9
  # Your code goes here...
data/semtools.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
  spec.email = ["seoanezonjic@hotmail.com", "fmjabato@gmail.com"]
11
11
 
12
12
  spec.summary = %q{Gem to handle semantic based calculations in text and defined ontologies as GO or HPO.}
13
- spec.description = %q{This gem allows to perform ontology based operations and calculation of Semantic similarity and information coefficient using different implementations.}
13
+ spec.description = %q{DEPRECATED PROJECT. MIGRATED TO PYTHON: https://github.com/seoanezonjic/py_semtools. This gem allows to perform ontology based operations and calculation of Semantic similarity and information coefficient using different implementations.}
14
14
  spec.homepage = "https://github.com/seoanezonjic/semtools"
15
15
  spec.license = "MIT"
16
16