RubyGems - pets - Versions diffs - 0.2.4 → 0.2.5 - Mend

pets 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/bin/coPatReporter.rb +5 -0
data/bin/evidence_profiler.rb +79 -14
data/bin/get_gen_features.rb +146 -0
data/bin/install_deps.rb +3 -2
data/bin/profiles2phenopacket.rb +1 -25
data/external_code/install_R_dependencies.R +6 -1
data/lib/pets/coPatReporterMethods.rb +50 -4
data/lib/pets/cohort.rb +10 -8
data/lib/pets/common_optparse.rb +30 -0
data/lib/pets/constants.rb +2 -1
data/lib/pets/generalMethods.rb +21 -2
data/lib/pets/genomic_features.rb +106 -10
data/lib/pets/io.rb +32 -8
data/lib/pets/parsers/cohort_parser.rb +8 -3
data/lib/pets/parsers/reference_parser.rb +39 -0
data/lib/pets/version.rb +1 -1
data/lib/pets.rb +2 -1
data/pets.gemspec +6 -3
data/templates/cluster_report.erb +25 -5
data/templates/evidence_profile.erb +20 -4
metadata +65 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7dad87a5083408e6049bd3edeac19e8bc106d93304453e9030841a620c2d7c3a
-  data.tar.gz: 2176d3f49726443447c0d7e9d8f031bed39c998db02ddf36ef2cb3d28d41f2c3
+  metadata.gz: 0f1d5c3ad0cb57b26b2c67e02b38a282139965472ded083acf0d1fcae48c0fec
+  data.tar.gz: 8b34f2440afe74f0b9c0e6024c2a05daee4a7be0efd0c6a3d80aef49673c7c7a
 SHA512:
-  metadata.gz: 0755027e17a0a986895ef6bf2ed5da7ff38c2de66486a8f41e42a8c5d189337381a167c85ec2ab4e5f3905af608ba4e5648a91b5b26737b483e94acaae8928f7
-  data.tar.gz: f106584e515da71e1224f6f453c0e39694bccdf9b76d172d2b28e08eea5bc8929871070f7645833e145b9ddcfe5e6b52b4ba4b18c339141cef1150b48eb8a7ba
+  metadata.gz: d3e9bc8559bb3f3e0c9a7ce1e0658645f54afc83fc49e7415cc3177576af975e4f7268a1f37e017d83fd88042197f102a4290cc29d7b0a16e12ec4964feea39d
+  data.tar.gz: a2aa8fe161b52d2f3e86e0d04f2a1a762298de95582098e553287ad905ff7b97eae6f96893de8bd78676c01b412a16973b1b53cf6ebbd9cb48e49c929c7f1d74

data/bin/coPatReporter.rb CHANGED Viewed

@@ -42,6 +42,11 @@ OptionParser.new do |opts|
     options[:id_col] = data
   end
+  options[:detailed_clusters] = false
+  opts.on("-D", "--detailed_clusters", "Show detiled cluster comparation using heatmaps. Default false") do
+    options[:detailed_clusters] = true
+  end
   options[:excluded_hpo] = nil
   opts.on("-E", "--excluded_hpo PATH", "List of HPO phenotypes to exclude (low informative)") do |excluded_hpo|
     options[:excluded_hpo] = excluded_hpo

data/bin/evidence_profiler.rb CHANGED Viewed

@@ -12,13 +12,24 @@ require 'pets'
 #############################################################################################
 ## METHODS
 ############################################################################################
+def load_pathogenic_scores(path)
+	scores = {}
+	File.open(path).each do |line|
+		feature, score = line.split("\t")
+		scores[feature] = score.to_f
+	end
+	return scores
+end
 def get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
+	coords = nil
 	all_coordinates = genomic_coordinates[entity]
-	coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)}
+	coords = all_coordinates.select{|id, coordinates| candidates_ids.include?(id.to_sym)} if !all_coordinates.nil?
 	return coords
 end
-def make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, output)
+def make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs,
+							evidences, prof_vars, hotspots_with_pat_vars, template, output)
 	var_ids, var_coors = format_variants4report(prof_vars)
 	container = {
 		profile_id: profile_id,
@@ -27,7 +38,8 @@ def make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_
 		similarity_matrixs: similarity_matrixs,
 		evidences: evidences,
 		var_ids: var_ids,
-		var_coordinates: var_coors
+		var_coordinates: var_coors,
+		hotspot_table: hotspots_with_pat_vars
 	}
 	report = Report_html.new(container, 'Evidence profile report')
 	report.build(template)
@@ -51,12 +63,39 @@ def format_variants4report(var_data)
 	return var_ids, var_coors
 end
-def get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
-	regions = Genomic_Feature.new(all_genomic_coordinates.values.map{|g| g[0..2]})
-	candidates_by_window, genome_windows = regions.generate_cluster_regions(:reg_overlap, 'A', 1)
-	# TODO: COMPLETE UNTIL FULL PREDICTOR
+def generate_prediction(similarity_matrixs, all_genomic_coordinates, prof_vars)
+	hotspots_with_pat_vars = []
+	if !prof_vars.nil?
+		phen_regions = Genomic_Feature.hash2genomic_feature(all_genomic_coordinates){|k, v| v[0..2].concat([k])}
+		phen_candidates_by_hotspot, phen_genome_hotspots = phen_regions.generate_cluster_regions(:reg_overlap, 'A', 0, true)
+		genome_matches = phen_genome_hotspots.match(prof_vars)
+		hotspot_with_phen_candidates = invert_hash(phen_candidates_by_hotspot)
+		genome_matches.each do |hotspot_id, pat_vars|
+			reg = phen_genome_hotspots.region_by_to(hotspot_id)
+			coords = [reg[:chr], reg[:start], reg[:stop]]
+			hotspots_with_pat_vars << [hotspot_id, coords, hotspot_with_phen_candidates[hotspot_id], pat_vars]
+		end
+		# TODO: see to use original similarities without use top candidates in similarity_matrixs
+		# TODO: COMPLETE UNTIL FULL PREDICTOR
+	end
+	return hotspots_with_pat_vars
 end
+def invert_hash(h)
+	new_h = {}
+	h.each do |k, vals|
+		vals.each do |v|
+			query = new_h[v]
+			if query.nil?
+				new_h[v] = [k]
+			else
+				query << k
+			end
+		end
+	end
+	return new_h
+end
 #############################################################################################
 ## OPTPARSE
@@ -96,6 +135,11 @@ OptionParser.new do |opts|
     options[:variant_data] = item
   end
+  options[:pathogenic_scores] = nil # TODO: Generalize to a folder with a table per patient
+  opts.on("-P", "--pathogenic_scores PATH", 'File with genome features an their pathogenic scores') do |item|
+    options[:pathogenic_scores] = item
+  end
  opts.on_tail("-h", "--help", "Show this message") do
     puts opts
     exit
@@ -108,12 +152,12 @@ end.parse!
 ############################################################################################
 hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
-hpo = Ontology.new
-hpo.read(hpo_file)
+hpo = Ontology.new(file: hpo_file, load_file: true)
 profiles = load_profiles(options[:profiles_file], hpo)
 profile_variants = options[:variant_data].nil? ? {} : load_variants(options[:variant_data])
 evidences, genomic_coordinates = load_evidences(options[:evidences], hpo)
+pathogenic_scores = options[:pathogenic_scores].nil? ? {} : load_pathogenic_scores(options[:pathogenic_scores])
 hpo.load_profiles(profiles)
 evidences_similarity = {}
@@ -122,7 +166,8 @@ evidences.each do |pair, data|
 	if profile_type == 'HP'
 		evidence_profiles = data[:prof]
 		evidence_profiles.transform_keys!{|prof_id, terms| prof_id.to_sym}
-		evidences_similarity[pair] = hpo.compare_profiles(external_profiles: evidence_profiles, sim_type: :lin, bidirectional: false)
+		similarities = hpo.compare_profiles(external_profiles: evidence_profiles, sim_type: :lin, bidirectional: false)
+		evidences_similarity[pair] = similarities if !similarities.empty?
 	end
 end
@@ -136,13 +181,33 @@ profiles.each do |profile_id, reference_prof|
 		entity = pair.split('_').first
 		similarities = ev_profiles_similarity[profile_id.to_sym]
 		candidate_sim_matrix, candidates, candidates_ids = get_similarity_matrix(reference_prof, similarities, evidences[pair][:prof], hpo, 40, 40)
-		candidate_sim_matrix.unshift(['HP'] + candidates_ids)
+		coords = get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
+		candidate_sim_matrix.unshift(['HP'] + candidates_ids)
+		if !pathogenic_scores.empty? # priorize by pathogenic scores
+			candidate_sim_matrix_patho, candidates_patho, candidates_ids_patho = get_similarity_matrix(
+				reference_prof, similarities,
+				evidences[pair][:prof], hpo, 40, 40,
+				other_scores = pathogenic_scores, id2label = evidences[pair][:id2lab])
+			if !candidate_sim_matrix_patho.empty?
+				candidate_sim_matrix_patho.unshift(['HP'] + candidates_ids_patho)
+				similarity_matrixs[pair + '_path_vars'] = candidate_sim_matrix_patho
+				evidences[pair + '_path_vars'] = evidences[pair]
+			end
+		end
+		next if coords.nil?
 		all_candidates.concat(candidates)
 		similarity_matrixs[pair] = candidate_sim_matrix
-		coords = get_evidence_coordinates(entity, genomic_coordinates, candidates_ids)
 		all_genomic_coordinates.merge!(coords)
 	end
-	get_genome_hotspots(similarity_matrixs, all_genomic_coordinates)
 	prof_vars = profile_variants[profile_id]
-	make_report(profile_id, all_candidates, all_genomic_coordinates, similarity_matrixs, evidences, prof_vars, template, options[:output_folder])
+	hotspots_with_pat_vars = generate_prediction(similarity_matrixs, all_genomic_coordinates, prof_vars)
+	make_report(
+		profile_id,
+		all_candidates,
+		all_genomic_coordinates,
+		similarity_matrixs,
+		evidences, prof_vars,
+		hotspots_with_pat_vars,
+		template, options[:output_folder]
+	)
 end

data/bin/get_gen_features.rb ADDED Viewed

@@ -0,0 +1,146 @@
+#! /usr/bin/env ruby
+ROOT_PATH = File.dirname(__FILE__)
+$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
+require 'optparse'
+require 'pets'
+##################################
+## METHODS
+##################################
+def get_data(options)
+	fields2extract = get_fields2extract(options)
+	field_numbers = fields2extract.values
+	records = read_records(options, fields2extract, field_numbers)
+end
+def read_records(options, fields2extract, field_numbers) # Modified from cohort_parset
+	records = []
+	count = 0
+	File.open(options[:input_file]).each do |line|
+		line.chomp!
+		if options[:header] && count == 0
+			line.gsub!(/#\s*/,'') # correct comment like	headers
+			field_names = line.split("\t")
+			get_field_numbers2extract(field_names, fields2extract)
+			field_numbers = fields2extract.values
+		else
+			fields = line.split("\t")
+			record = field_numbers.map{|n| fields[n]}
+			if fields2extract[:id_col].nil?
+				id = "rec_#{count}" #generate ids
+			else
+				id = record.shift
+			end
+			record[1] = record[1].to_i
+			record[2] = record[2].to_i
+			record << id
+			records << record
+		end
+		count +=1
+	end
+	return records
+end
+def get_fields2extract(options)
+	fields2extract = {}
+	[:id_col, :chromosome_col, :start_col, :end_col].each do |field|
+		col = options[field]
+		if !col.nil?
+			col = col.to_i if !options[:header]
+			fields2extract[field] = col
+		end
+	end
+	return fields2extract
+end
+def get_field_numbers2extract(field_names, fields2extract)
+	fields2extract.each do |field, name|
+		fields2extract[field] = field_names.index(name)
+	end
+end
+##########################
+#OPT-PARSER
+##########################
+options = {}
+OptionParser.new do |opts|
+  opts.banner = "Usage: #{__FILE__} [options]"
+  options[:chromosome_col] = nil
+  opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
+    options[:chromosome_col] = data
+  end
+  options[:id_col] = nil
+  opts.on("-d", "--id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the id") do |data|
+    options[:id_col] = data
+  end
+  options[:end_col] = nil
+  opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
+    options[:end_col] = data
+  end
+  options[:header] = true
+  #chr\tstart\tstop
+  opts.on("-H", "--header", "Set if the file has a line header. Default true") do
+    options[:header] = false
+  end
+  options[:input_file] = nil
+  opts.on("-i", "--input_file PATH", "Input file path") do |data|
+    options[:input_file] = data
+  end
+  options[:reference_file] = nil
+  opts.on("-r", "--reference_file PATH", "Reference file with genome annotation") do |data|
+    options[:reference_file] = data
+  end
+  options[:output_file] = nil
+  opts.on("-o", "--output_file PATH", "Output file with patient data") do |data|
+    options[:output_file] = data
+  end
+  options[:start_col] = nil
+  opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
+  	options[:start_col] = data
+  end
+  options[:feature_type] = nil
+  opts.on("-t", "--feature_type STRING", "Keep features from reference whose are tagged with this feature type") do |data|
+  	options[:feature_type] = data
+  end
+  options[:feature_name] = nil
+  opts.on("-n", "--feature_name STRING", "Use this feature id that is present in attributes/annotation field of reference") do |data|
+  	options[:feature_name] = data
+  end
+  opts.on_tail("-h", "--help", "Show this message") do
+    puts opts
+    exit
+  end
+end.parse!
+regions = Genomic_Feature.new(get_data(options))
+Genomic_Feature.add_reference(
+	Reference_parser.load(
+		options[:reference_file],
+		feature_type: options[:feature_type]
+	)
+)
+gene_features = regions.get_features(attr_type: options[:feature_name])
+File.open(options[:output_file], 'w') do |f|
+	gene_features.each do |id, feat_ids|
+		feat_ids.each do |ft_id|
+			f.puts "#{id}\t#{ft_id}"
+		end
+	end
+end

data/bin/install_deps.rb CHANGED Viewed

@@ -1,7 +1,8 @@
 #! /usr/bin/env ruby
 ROOT_PATH = File.dirname(__FILE__)
-EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code')
+EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))
 $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
+require 'pets'
-system_call(EXTERNAL_CODE, 'install_R_dependencies.R')
+system_call(EXTERNAL_CODE, 'install_R_dependencies.R', '')

data/bin/profiles2phenopacket.rb CHANGED Viewed

@@ -27,31 +27,7 @@ options = {}
 OptionParser.new do |opts|
   opts.banner = "Usage: #{__FILE__} [options]"
-  options[:chromosome_col] = nil
-  opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
-    options[:chromosome_col] = data
-  end
-  options[:id_col] = nil
-  opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
-    options[:id_col] = data
-  end
-  options[:end_col] = nil
-  opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
-    options[:end_col] = data
-  end
-  options[:genome_assembly] = 'hg38'
-  opts.on("-G", "--genome_assembly STRING", "Genome assembly version. Please choose between hg18, hg19 and hg38. Default hg38") do |data|
-    options[:genome_assembly] = data
-  end
-  options[:header] = true
-  #chr\tstart\tstop
-  opts.on("-H", "--header", "Set if the file has a line header. Default true") do
-    options[:header] = false
-  end
+  eval(File.open(COMMON_OPTPARSE).read)
   options[:input_file] = nil
   opts.on("-i", "--input_file PATH", "Input file with patient data") do |data|

data/external_code/install_R_dependencies.R CHANGED Viewed

@@ -7,5 +7,10 @@ print("Installing libraries from CRAN")
 packages_list <-c("optparse","RcppCNPy","ggplot2","fastcluster","dplyr","gplots","RColorBrewer","tidyr","data.table","gridExtra", "dynamicTreeCut", "ggExtra", "ontologyIndex", "magrittr")
 installed <- library()$results[,1]
 packages_list <- setdiff(packages_list, installed)
-install.packages(packages_list, repos='https://cloud.r-project.org')
+if(length(packages_list) == 0){
+	print('All needed packages are installed')
+}else{
+	install.packages(packages_list, repos='https://cloud.r-project.org')
+}

data/lib/pets/coPatReporterMethods.rb CHANGED Viewed

@@ -62,7 +62,8 @@ def get_profile_ic(hpo_names, phenotype_ic)
   profile_length = 0
   hpo_names.each do |hpo_id|
     hpo_ic = phenotype_ic[hpo_id]
-    ic += hpo_ic if !hpo_ic.nil?
+    raise("The term #{hpo_id} not exists in the given ic table") if hpo_ic.nil?
+    ic += hpo_ic
     profile_length += 1
   end
   profile_length = 1 if profile_length == 0
@@ -187,7 +188,18 @@ def get_semantic_similarity_clustering(options, patient_data, temp_folder)
     profiles_similarity_filename = File.join(temp_folder, ['profiles_similarity', method_name].join('_').concat('.txt'))
     clusters_distribution_filename = File.join(temp_folder, ['clusters_distribution', method_name].join('_').concat('.txt'))
     if !File.exists?(matrix_filename)
-      profiles_similarity = patient_data.compare_profiles(sim_type: method_name.to_sym, external_profiles: reference_profiles)
+      if reference_profiles.nil?
+        profiles_similarity = patient_data.compare_profiles(sim_type: method_name.to_sym, external_profiles: reference_profiles)
+      else # AS reference profiles are constant, the sematic comparation will be A => B (A reference). So, we have to invert the elements to perform the comparation
+        ont = Cohort.get_ontology(:hpo)
+        pat_profiles = ont.profiles
+        ont.load_profiles(reference_profiles, reset_stored: true)
+        profiles_similarity = ont.compare_profiles(sim_type: method_name.to_sym,
+          external_profiles: pat_profiles,
+          bidirectional: false)
+        ont.load_profiles(pat_profiles, reset_stored: true)
+        profiles_similarity = invert_nested_hash(profiles_similarity)
+      end
       remove_nested_entries(profiles_similarity){|id, sim| sim >= options[:sim_thr] } if !options[:sim_thr].nil?
       write_profile_pairs(profiles_similarity, profiles_similarity_filename)
       if reference_profiles.nil?
@@ -219,13 +231,30 @@ def get_semantic_similarity_clustering(options, patient_data, temp_folder)
     write_patient_hpo_stat(get_cluster_metadata(clusters_info), clusters_distribution_filename)
     out_file = File.join(temp_folder, ['clusters_distribution', method_name].join('_'))
     system_call(EXTERNAL_CODE, 'xyplot_graph.R', "-d #{clusters_distribution_filename} -o #{out_file} -x PatientsNumber -y HPOAverage") if !File.exists?(out_file)
+    sim_mat4cluster = {}
+    if options[:detailed_clusters]
+      clusters_codes.each do |cluster|
+        cluster_cohort = Cohort.new
+        clID, patient_number, patient_ids, hpo_codes = cluster
+        patient_ids.each_with_index {|patID, i| cluster_cohort.add_record([patID, hpo_codes[i], []])}
+        cluster_profiles = cluster_cohort.profiles
+        ref_profile = cluster_cohort.get_general_profile
+        hpo.load_profiles({ref: ref_profile}, reset_stored: true)
+        similarities = hpo.compare_profiles(external_profiles: cluster_profiles, sim_type: :lin, bidirectional: false)
+        candidate_sim_matrix, candidates, candidates_ids = get_similarity_matrix(ref_profile, similarities[:ref], cluster_profiles, hpo, 100, 100)
+        candidate_sim_matrix.unshift(['HP'] + candidates_ids)
+        sim_mat4cluster[clID] = candidate_sim_matrix
+      end
+    end
     clusters = translate_codes(clusters_codes, hpo)
     container = {
       :temp_folder => temp_folder,
       :cluster_name => method_name,
       :clusters => clusters,
-      :hpo => hpo
+      :hpo => hpo,
+      :sim_mat4cluster => sim_mat4cluster
      }
     report = Report_html.new(container, 'Patient clusters report')
@@ -235,6 +264,23 @@ def get_semantic_similarity_clustering(options, patient_data, temp_folder)
   end
 end
+def invert_nested_hash(h)
+  new_h = {}
+  h.each do |k1, vals1|
+    vals1.each do |v1|
+      vals1.each do |k2, vals2|
+        query = new_h[k2]
+        if query.nil?
+          new_h[k2] = {k1 => vals2}
+        else
+          query[k1] = vals2
+        end
+      end
+    end
+  end
+  return new_h
+end
 def get_cluster_metadata(clusters_info)
   average_hp_per_pat_distribution = []
   clusters_info.each do |cl_id, pat_info|

data/lib/pets/cohort.rb CHANGED Viewed

@@ -24,11 +24,6 @@ class Cohort
 		else
 			ont = Ontology.new
 			ont.read(ont_file)
-			if !excluded_terms_file.nil?
-				ont.add_removable_terms(read_excluded_ont_file(excluded_terms_file))
-				ont.remove_removable()
-				ont.build_index()
-			end
 		end
 		@@ont[ont_name] = ont
 	end
@@ -44,12 +39,14 @@ class Cohort
 	def initialize()
 		@profiles = {}
 		@vars = {}
+		@extra_attr = {}
 		@var_idx = Genomic_Feature.new([])
 	end
-	def add_record(rec) #[id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]]
+	def add_record(rec, extra_attr = nil) #[id, [profile], [[chr1, start1, stop1],[chr1, start1, stop1]]]
 		id, profile, vars = rec
 		@profiles[id] = profile.map{|t| t.to_sym} if !profile.nil?
+		@extra_attr[id] = extra_attr if !extra_attr.nil?
 		add_gen_feat(id, vars) if !vars.nil?
 	end
@@ -111,7 +108,7 @@ class Cohort
 		term_count = Hash.new(0)
 		each_profile do |id, prof|
 			prof.each do |term|
-				general_profile[prof] += 1
+				term_count[term] += 1
 			end
 		end
 		records = @profiles.length
@@ -271,7 +268,12 @@ class Cohort
 		@profiles.each do |id, terms|
 			phenopacket = {metaData: metaData}
-			phenopacket[:subject] = {id: id}
+			query_sex = @extra_attr.dig(id, :sex)
+			sex = query_sex.nil? ? 'UNKNOWN_SEX' : query_sex
+			phenopacket[:subject] = {
+				id: id,
+				sex: sex
+			}
 			phenotypicFeatures = []
 			terms.each do |term|
 				term_name = ont.translate_id(term)

data/lib/pets/common_optparse.rb ADDED Viewed

@@ -0,0 +1,30 @@
+options[:chromosome_col] = nil
+opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
+  options[:chromosome_col] = data
+end
+options[:id_col] = nil
+opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
+  options[:id_col] = data
+end
+options[:end_col] = nil
+opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
+  options[:end_col] = data
+end
+options[:genome_assembly] = 'hg38'
+opts.on("-G", "--genome_assembly STRING", "Genome assembly version. Please choose between hg18, hg19 and hg38. Default hg38") do |data|
+  options[:genome_assembly] = data
+end
+options[:header] = true
+#chr\tstart\tstop
+opts.on("-H", "--header", "Set if the file has a line header. Default true") do
+  options[:header] = false
+end
+options[:sex_col] = nil
+opts.on("-x", "--sex_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient sex") do |data|
+  options[:sex_col] = data
+end

data/lib/pets/constants.rb CHANGED Viewed

@@ -1,4 +1,5 @@
-# Needs define ROOT_PATH constant in file requiring this file
+# Needs define ROOT_PATH constant in file requiring this file
+COMMON_OPTPARSE = File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets', 'common_optparse.rb'))
 REPORT_FOLDER = File.expand_path(File.join(ROOT_PATH, '..', 'templates'))
 EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
 EXTERNAL_CODE = File.expand_path(File.join(ROOT_PATH, '..', 'external_code'))

data/lib/pets/generalMethods.rb CHANGED Viewed

@@ -243,8 +243,27 @@ def get_detailed_similarity(profile, candidates, evidences, hpo)
 	return matrix
 end
-def get_similarity_matrix(reference_prof, similarities, evidence_profiles, hpo, term_limit, candidate_limit)
-		candidates = similarities.to_a.sort{|s1, s2| s2.last <=> s1.last}.first(candidate_limit)
+def get_similarity_matrix(reference_prof, similarities, evidence_profiles, hpo, term_limit, candidate_limit, other_scores = {}, id2label = {})
+		candidates = similarities.to_a
+		if other_scores.empty?
+			candidates.sort!{|s1, s2| s2.last <=> s1.last}
+			candidates = candidates.first(candidate_limit)
+		else # Prioritize first by the external list of scores, select the candidates and then rioritize by similarities
+			selected_candidates = []
+			candidates.each do |cand|
+				cand_id = cand[0]
+				cand_lab = id2label[cand_id.to_s]
+				next if cand_lab.nil?
+				other_score = other_scores[cand_lab]
+				next if other_score.nil?
+				cand << other_score
+				selected_candidates << cand
+			end
+			selected_candidates.sort!{|e1, e2| e2[2] <=> e1[2]}
+			candidates = selected_candidates.first(candidate_limit)
+			candidates.sort!{|e1, e2| e2[1] <=> e1[1]}
+			candidates.each{|c| c.pop}
+		end
 		candidates_ids = candidates.map{|c| c.first}
 		candidate_similarity_matrix = get_detailed_similarity(reference_prof, candidates, evidence_profiles, hpo)
 		candidate_similarity_matrix.each_with_index do |row, i|

data/lib/pets/genomic_features.rb CHANGED Viewed

@@ -1,23 +1,59 @@
 class Genomic_Feature
+	@@ref = nil
+	def self.array2genomic_feature(arr)
+		new(arr.map{|r| yield(r)})
+	end
+	def self.hash2genomic_feature(h)
+		vars = []
+		h.each do |h, v|
+			vars << yield(h, v)
+		end
+		new(vars)
+	end
+	def self.add_reference(genomic_regions)
+		@@ref = genomic_regions
+	end
 	#If any method use gen_fet as name is a Genomic_Feature object
-	def initialize(feat_array) # [[chr1, start1, stop1],[chr1, start1, stop1]]
+	def initialize(feat_array, annotations: nil) # [[chr1, start1, stop1],[chr1, start1, stop1]]
 		@regions = {}
+		@reg_by_to = {}
 		@reg_id = -1
 		load_features(feat_array)
+		load_annotations(annotations) if !annotations.nil?
 	end
 	def load_features(feat_array)
-		feat_array.each do |chr, start, stop|
+		feat_array.each do |chr, start, stop, to|
 			chr = chr.to_sym
-			region = {start: start, stop: stop, to: @reg_id +=1 }
+			@reg_id +=1
+			id = to.nil? ? @reg_id : to
+			region = {chr: chr, start: start, stop: stop, to: id }
+			@reg_by_to[id] = region
 			add_record(@regions, chr, region)
 		end
 	end
+	def load_annotations(annotations)
+		each do |chr, reg|
+			annot = annotations[reg[:to]]
+			reg[:attrs] = annot if !annot.nil?
+		end
+	end
 	def length
 		return @regions.length
 	end
+	def each_chr()
+		@regions.each do |chr, regs|
+			yield(chr, regs)
+		end
+	end
 	def each()
 		@regions.each do |chr, regs|
 			regs.each do |region|
@@ -30,6 +66,14 @@ class Genomic_Feature
 		return @regions.keys
 	end
+	def get_chr_regs(chr)
+		return @regions[chr]
+	end
+	def region_by_to(to)
+		return @reg_by_to[to]
+	end
 	def get_sizes
 		sizes = []
 		each do |chr, region|
@@ -39,6 +83,35 @@ class Genomic_Feature
 		return sizes
 	end
+	def get_features(attr_type: nil)
+		features = match(@@ref)
+		if !attr_type.nil?
+			features.each do |reg_id, feat_ids|
+				new_feat_ids = feat_ids.map{|fi| @@ref.region_by_to(fi).dig(:attrs, attr_type)}
+				features[reg_id] = new_feat_ids.compact.uniq
+			end
+		end
+		return features
+	end
+	def match(other_gen_feat)
+		all_matches = {}
+		each_chr do |chr, regs|
+			other_regs = other_gen_feat.get_chr_regs(chr)
+			next if other_regs.nil?
+			regs.each do |reg|
+				local_matches = []
+				start = reg[:start]
+				stop = reg[:stop]
+				other_regs.each do |other_reg|
+					local_matches << other_reg[:to] if coor_overlap?(start, stop, other_reg)
+				end
+				all_matches[reg[:to]] = local_matches if !local_matches.empty?
+			end
+		end
+		return all_matches
+	end
 	def get_summary_sizes
 		sizes = Hash.new(0)
 		each do |chr, region|
@@ -60,21 +133,21 @@ class Genomic_Feature
 		reference.each do |start, stop|
 			reg_ids = []
 			genomic_ranges.each do |reg|
-				reg_ids << reg[:to] if coor_overlap?(start, stop, reg)
+				overlap = coor_overlap?(start, stop, reg)
+				reg_ids << reg[:to] if overlap
 			end
 			overlaps << reg_ids.uniq
 		end
 		return overlaps
 	end
-	def generate_cluster_regions(meth, tag, ids_per_reg = 1)
+	def generate_cluster_regions(meth, tag, ids_per_reg = 1, obj = false)
 		compute_windows(meth) # Get putative genome windows
-		patients_out_of_cluster = 0
 		ids_by_cluster = {}
 		annotated_full_ref = [] # All reference windows wit uniq id and chr tagged
 		@regions.each do |chr, regs|
 			reference = @windows[chr]
-			overlaps = get_reference_overlaps(regs, reference) # See what patient has match with a overlap region
+			overlaps = get_reference_overlaps(regs, reference)
 			clust_numb = 0
 			reference.each_with_index do |ref, i|
 				current_ids = overlaps[i]
@@ -87,6 +160,7 @@ class Genomic_Feature
 				end
 			end
 		end
+		annotated_full_ref = Genomic_Feature.array2genomic_feature(annotated_full_ref){|r| [r[2], r[0], r[1], r[3]]} if obj
 		return ids_by_cluster, annotated_full_ref
 	end
@@ -116,15 +190,37 @@ class Genomic_Feature
 	def compute_region_overlap_windows(genomic_ranges)
 		reference = []
-		reference.concat(genomic_ranges.map{|gr| gr[:start]})# get start
-		reference.concat(genomic_ranges.map{|gr| gr[:stop]})# get stop
+		single_nt = []
+		genomic_ranges.each do |gr|
+			start = gr[:start]
+			stop = gr[:stop]
+			if stop - start > 0
+				reference << start # get start
+				reference << stop # get stop
+			else # Build a window of at least one nt for snv
+				single_nt << start
+			end
+		end
 		reference.uniq!
+		single_nt.each do |snt| # add start stop for snv
+			reference << snt
+			reference << snt
+		end
 		reference.sort!
 		#Define overlap ranges
 		final_reference = []
+		last_len = 1
 		reference.each_with_index do |coord,i|
 			next_coord = reference[i + 1]
-			final_reference << [coord, next_coord] if !next_coord.nil?
+			if !next_coord.nil?
+				current_len = next_coord - coord
+				coord = coord + 1 if last_len == 0 # Separate SNV window from others
+				if current_len == 0 && last_len > 0 && !final_reference.empty?
+					final_reference.last[1] -= 1 # Separate SNV window from others
+				end
+				final_reference << [coord, next_coord]
+				last_len = current_len
+			end
 		end
 		return final_reference
 	end

data/lib/pets/io.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require 'csv'
+require 'bio-vcf'
 def load_hpo_ontology(hpo_file, excluded_hpo_file)
   hpo = nil
@@ -201,20 +202,39 @@ end
 def load_variants(variant_folder)
   variants = {}
-  Dir.glob(File.join(variant_folder, '*.tab')).each do |path|
-    profile_id = File.basename(path, '.tab')
-    vars = []
-    File.open(path).each do |line|
-      fields = line.chomp.split("\t")
-      chr = fields[0]
-      start = fields[1].to_i
-      vars << [chr, start, start]
+  Dir.glob(File.join(variant_folder, '*.{tab,vcf,vcf.gz}')).each do |path|
+    profile_id, ext = File.basename(path).split(".", 2)
+    if ext == 'tab' || ext == 'txt'
+      vars = load_tabular_vars(path)
+    elsif ext == 'vcf' || ext == 'vcf.gz'
+      vars = load_vcf(path, ext)
     end
     variants[profile_id] = Genomic_Feature.new(vars)
   end
   return variants
 end
+def load_tabular_vars(path)
+  vars = []
+  File.open(path).each do |line|
+    fields = line.chomp.split("\t")
+    chr = fields[0].gsub('chr','')
+    start = fields[1].to_i
+    vars << [chr, start, start]
+  end
+  return vars
+end
+def load_vcf(path, ext) # Some compressed files are fragmented internally. If so, VCFfile only reads first fragment
+  vars = []             # Use zcat original.vcf.gz | gzip > new.vcf.gz to obtain a contigous file
+  vcf  = BioVcf::VCFfile.new(file: path, is_gz: ext == 'vcf.gz' ? true : false )
+  vcf.each do |var|
+    vars << [var.chrom.gsub('chr',''), var.pos, var.pos]
+  end
+  puts vars.length
+  return vars
+end
 def load_evidences(evidences_path, hpo)
   genomic_coordinates = {}
   coord_files = Dir.glob(File.join(evidences_path, '*.coords'))
@@ -242,6 +262,10 @@ def load_coordinates(file_path)
       header = false
     else
       entity, chr, strand, start, stop = fields
+      if chr == 'NA'
+        STDERR.puts "Warning: Record #{fields.inspect} is undefined"
+        next
+      end
       coordinates[entity] = [chr, start.to_i, stop.to_i, strand]
     end
   end

data/lib/pets/parsers/cohort_parser.rb CHANGED Viewed

@@ -3,6 +3,7 @@ class Cohort_Parser
 		fields2extract = get_fields2extract(options)
 		field_numbers = fields2extract.values
 		records = read_records(options, fields2extract, field_numbers)
+		options[:extracted_fields] = fields2extract.keys
 		cohort, rejected_terms, rejected_recs = create_cohort(records, options)
 		return cohort, rejected_terms, rejected_recs
 	end
@@ -46,7 +47,7 @@ class Cohort_Parser
 	def self.get_fields2extract(options)
 		fields2extract = {}
-		[:id_col, :ont_col, :chromosome_col, :start_col, :end_col].each do |field|
+		[:id_col, :ont_col, :chromosome_col, :start_col, :end_col, :sex_col].each do |field|
 			col = options[field]
 			if !col.nil?
 				col = col.to_i if !options[:header]
@@ -70,7 +71,7 @@ class Cohort_Parser
 		records.each do |id, record|
 			rec = record.first
 			terms = rec.first
-			if options[:names]
+			if options[:names] # Translate hpo names 2 codes
 				init_term_number = terms.length
 				terms, rec_rejected_terms = ont.translate_names(terms)
 				if !rec_rejected_terms.empty?
@@ -87,7 +88,11 @@ class Cohort_Parser
 			else
 				variants = [] # Not exists genomic region attributes so we create a empty array
 			end
-			cohort.add_record([id, terms, check_variants(variants)])
+			other_attr = {}
+			if options[:extracted_fields].include?(:sex_col) # Check for additional attributes. -1 is applied to ignore :id in extracted fields
+				other_attr[:sex] = record.first[options[:extracted_fields].index(:sex_col) -1]
+			end
+			cohort.add_record([id, terms, check_variants(variants)], other_attr)
 		end
 		return cohort, rejected_terms.uniq, rejected_recs
 	end

data/lib/pets/parsers/reference_parser.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'genomic_features'
+class Reference_parser
+	def self.load(file_path, file_format: nil, feature_type: nil)
+		file_format = file_path.split('.', 2).last if file_format.nil?
+		if file_format == 'gtf'
+			regions, all_attrs = parse_gtf(file_path, feature_type: feature_type)
+		end
+		return Genomic_Feature.new(regions, annotations: all_attrs)
+	end
+	def self.parse_gtf(file_path, feature_type: nil) # https://www.ensembl.org/info/website/upload/gff.html
+		features = []
+		all_attrs = {}
+		File.open(file_path).each do |line|
+			next if /^#/ =~ line
+			seqname, source, feature, start, stop, score, strand, frame, attribute = line.chomp.split("\t")
+			if feature_type.nil? || feature_type == feature
+				attrs = process_attrs(attribute, ';', ' ')
+				attrs['source'] = source
+				attrs['feature'] = feature
+				id = attrs['gene_id']
+				features << [seqname.gsub('chr',''), start.to_i, stop.to_i, id]
+				all_attrs[id] = attrs
+			end
+		end
+		return features, all_attrs
+	end
+	private
+	def self.process_attrs(attributes, tuple_sep, field_sep)
+		return attributes.split(tuple_sep).map{|attr_pair|
+			tuple = attr_pair.strip.split(field_sep, 2)
+			tuple.last.gsub!('"','')
+			tuple
+		}.to_h
+	end
+end

data/lib/pets/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Pets
-  VERSION = "0.2.4"
+  VERSION = "0.2.5"
 end

data/lib/pets.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 require "pets/version"
 require "pets/constants"
-require "parsers/cohort_parser"
+require "pets/parsers/cohort_parser"
+require "pets/parsers/reference_parser"
 require "pets/coPatReporterMethods"
 require "pets/generalMethods"
 require "pets/io"

data/pets.gemspec CHANGED Viewed

@@ -38,15 +38,18 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "bundler", "~> 2.0"
   spec.add_development_dependency "rake", "~> 13.0.3"
-  spec.add_development_dependency "rspec", "~> 3.10.0"
+  spec.add_development_dependency "rspec", "~> 3.11.0"
   spec.add_dependency "statistics2"
   spec.add_dependency "terminal-table"
   spec.add_dependency "semtools", "~> 0.1.0"
+  spec.add_dependency "NetAnalyzer"
   spec.add_dependency "report_html"
   spec.add_dependency "numo-narray"
   spec.add_dependency "npy"
   spec.add_dependency "expcalc"
-  spec.add_dependency "parallel", "~> 1.20.1"
+  spec.add_dependency "bio-vcf"
+  spec.add_dependency "parallel", "~> 1.20.1"
+  spec.add_runtime_dependency 'net-ftp'
+  spec.add_runtime_dependency 'net-http'
 end

data/templates/cluster_report.erb CHANGED Viewed

@@ -25,14 +25,34 @@
 <div style="width: 90%; background-color:#ecf0f1; margin: 0 auto;">
 	<h1 style="text-align: center; background-color:#d6eaf8">Patient HPO profiles by cluster.</h1>
 		<%= table(id: :clusters, header: true, border: 2, row_names: false, text: true,
-			cell_align: %w( center )) do |data|
-				data.each do |element|
+			cell_align: %w( center ), styled: 'dt', attrib: {'class' => 'table'}) do |data|
+				patient_list = []
+				data.each do |element| # Cluster
+					clID, patient_number, patient_ids, hpo_codes, hpo_names = element
+					# TODO: mostrar registro por paciente
 					#STDERR.puts element.inspect
-					element[2] = element[2].map{|patID| "<p>#{patID}</p>" }.join("\n")
-					element[3] = element[3].map{|profile| "<p>#{profile.map{|hpo_code| get_hpo_link(hpo_code)}.join(', ')}</p>" }.join("\n")
-					element[4] = element[4].map{|transl_profile| "<p>#{transl_profile.join(', ')}</p>"}.join("\n")
+					patient_ids.each_with_index do |patID, i|
+						patient_record = [clID, patient_number]
+						patient_record << patID
+						patient_record << hpo_codes[i].map{|hpo_code| get_hpo_link(hpo_code)}.join(', ')
+						patient_record << hpo_names[i].join(', ')
+						patient_list << patient_record
+					end
 				end
+				data.clear
+				data.concat(patient_list)
 				data.unshift(["Cluster ID","Patients in Cluster","Patient IDs", "HPO codes", "Phenotypes"])
 			end
 		%>
 </div>
+<div style="width: 90%; background-color:#ecf0f1; margin: 0 auto;">
+	<h1 style="text-align: center; background-color:#d6eaf8"> Cluster detailed view.</h1>
+	<%
+		@hash_vars[:sim_mat4cluster].each do |clID, sim_matrix|
+			@hash_vars[:sim_matrix] = sim_matrix %>
+			<%= heatmap(id: :sim_matrix, header: true, row_names: true, title: "Cluster #{clID}" )%>
+		<% end
+	%>
+</div>

data/templates/evidence_profile.erb CHANGED Viewed

@@ -4,7 +4,7 @@
 	<% @hash_vars[:similarity_matrixs].each do |pair, similarity_matrix| %>
 		<%
 			matrix_name = pair + '_sim_matrix'
-			if pair == 'gene_HP'
+			if pair.include?('gene_HP')
 				dict = @hash_vars[:evidences][pair][:id2lab]
 				header = similarity_matrix.first
 				header.map! do |item|
@@ -27,8 +27,24 @@
 	<% end %>
 	<%=circular_genome(id: :candidates, header: false, row_names: true, transpose: false,
 		 genomic_coordinates: @hash_vars[:genomic_coordinates] )%>
-	<% if !@hash_vars[:var_ids].nil? %>
-		<%=circular_genome(id: :var_ids, header: false, row_names: true, transpose: false,
-		 genomic_coordinates: @hash_vars[:var_coordinates] )%>
+	<% if !@hash_vars[:var_ids].nil?
+		if @hash_vars[:var_ids].length > 200 %>
+			<p> Too much variant records</p>
+		<% else %>
+			<%=circular_genome(id: :var_ids, header: false,
+				row_names: true, transpose: false,
+		 		genomic_coordinates: @hash_vars[:var_coordinates] )%>
+		<% end %>
 	<% end %>
+	<h1 style="text-align: center; background-color:#d6eaf8">Candidate regions</h1>
+	<%=  table(id: :hotspot_table) do |data|
+			data.each do |row|
+				row[1] = row[1].join(" ")
+				row[2] = row[2].join(",")
+				row[3] = row[3].join(",")
+			end
+		end
+	%>
 </div>

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: pets
 version: !ruby/object:Gem::Version
-  version: 0.2.4
+  version: 0.2.5
 platform: ruby
 authors:
 - Elena Rojano, Pedro Seoane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-05-19 00:00:00.000000000 Z
+date: 2023-08-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -44,14 +44,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 3.10.0
+        version: 3.11.0
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 3.10.0
+        version: 3.11.0
 - !ruby/object:Gem::Dependency
   name: statistics2
   requirement: !ruby/object:Gem::Requirement
@@ -94,6 +94,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.1.0
+- !ruby/object:Gem::Dependency
+  name: NetAnalyzer
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: report_html
   requirement: !ruby/object:Gem::Requirement
@@ -150,6 +164,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: bio-vcf
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: parallel
   requirement: !ruby/object:Gem::Requirement
@@ -164,6 +192,34 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 1.20.1
+- !ruby/object:Gem::Dependency
+  name: net-ftp
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: net-http
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: PETS suite includes three different tools. CohortAnalyzer performs the
   calculation of several statistics that gives an overview of a cohort of patients
   to analyse. Reg2Phen uses associations between pathological phenotypes and regions
@@ -185,6 +241,7 @@ executables:
 - fmeasure_index.rb
 - generate_HPO_IC_table.rb
 - get_PR_values.rb
+- get_gen_features.rb
 - get_network_nodes.rb
 - get_sorted_profs.rb
 - install_deps.rb
@@ -218,6 +275,7 @@ files:
 - bin/fmeasure_index.rb
 - bin/generate_HPO_IC_table.rb
 - bin/get_PR_values.rb
+- bin/get_gen_features.rb
 - bin/get_network_nodes.rb
 - bin/get_sorted_profs.rb
 - bin/install_deps.rb
@@ -269,11 +327,13 @@ files:
 - lib/pets.rb
 - lib/pets/coPatReporterMethods.rb
 - lib/pets/cohort.rb
+- lib/pets/common_optparse.rb
 - lib/pets/constants.rb
 - lib/pets/generalMethods.rb
 - lib/pets/genomic_features.rb
 - lib/pets/io.rb
 - lib/pets/parsers/cohort_parser.rb
+- lib/pets/parsers/reference_parser.rb
 - lib/pets/phen2reg_methods.rb
 - lib/pets/reg2phen_methods.rb
 - lib/pets/version.rb
@@ -303,7 +363,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.15
+rubygems_version: 3.3.7
 signing_key:
 specification_version: 4
 summary: Suite with predictive tools.