pets 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +6 -0
- data/bin/area_under_curve_pr.rb +118 -0
- data/bin/association_metrics_average.rb +94 -0
- data/bin/coPatReporter.rb +531 -0
- data/bin/console +14 -0
- data/bin/fmeasure_index.rb +72 -0
- data/bin/get_PR_values.rb +90 -0
- data/bin/get_clusters.R +18 -0
- data/bin/get_network_nodes.rb +197 -0
- data/bin/lines.R +77 -0
- data/bin/merge_by_cluster.rb +62 -0
- data/bin/merge_pairs.rb +138 -0
- data/bin/paco_translator.rb +102 -0
- data/bin/phen2reg.rb +385 -0
- data/bin/phen2reg_predictor_check.rb +297 -0
- data/bin/plot_area.R +71 -0
- data/bin/plot_boxplot.R +21 -0
- data/bin/plot_density.R +46 -0
- data/bin/plot_scatterplot.R +25 -0
- data/bin/reg2phen.rb +116 -0
- data/bin/region_to_patients_generator.rb +84 -0
- data/bin/relate_CI_to_association_value.rb +90 -0
- data/bin/setup +8 -0
- data/bin/standardize_scores.R +40 -0
- data/bin/xyplot_graph.R +60 -0
- data/external_data/biosystems_gene.gz +0 -0
- data/external_data/bsid2info.gz +0 -0
- data/external_data/chromosome_sizes_hg19.txt +24 -0
- data/external_data/gene_data.gz +0 -0
- data/external_data/gene_data_with_pathways.gz +0 -0
- data/external_data/gene_location.gz +0 -0
- data/external_data/hp.obo +146363 -0
- data/external_data/remove +0 -0
- data/lib/pets.rb +6 -0
- data/lib/pets/coPatReporterMethods.rb +77 -0
- data/lib/pets/generalMethods.rb +556 -0
- data/lib/pets/phen2reg_methods.rb +432 -0
- data/lib/pets/version.rb +3 -0
- data/pets.gemspec +47 -0
- data/templates/cohort_report.erb +93 -0
- data/templates/patient_report.erb +209 -0
- metadata +183 -0
@@ -0,0 +1,297 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
##########################
|
4
|
+
#LIBRARIES
|
5
|
+
##########################
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
##########################
|
9
|
+
#METHODS
|
10
|
+
##########################
|
11
|
+
@success_percentage_distribution = []
|
12
|
+
@prediction_vector = []
|
13
|
+
@rankings = { :in => [], :out => []}
|
14
|
+
@genome_fraction_predicted = 0 #All positive cases
|
15
|
+
@good_predicted_subregions = 0 #True positive cases
|
16
|
+
|
17
|
+
def compute_rankings
|
18
|
+
if !@prediction_vector.empty?
|
19
|
+
n_preds = @prediction_vector.length.fdiv(100)
|
20
|
+
@prediction_vector.each_with_index do |in_control, i|
|
21
|
+
ranking = (i + 1).fdiv(n_preds)
|
22
|
+
if in_control
|
23
|
+
@rankings[:in] << ranking
|
24
|
+
else
|
25
|
+
@rankings[:out] << ranking
|
26
|
+
end
|
27
|
+
end
|
28
|
+
@prediction_vector = []
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def load_prediction_file(input_file)
|
33
|
+
predicted_regions = []
|
34
|
+
File.open(input_file).each do |line|
|
35
|
+
line.chomp!
|
36
|
+
predicted_info = line.split("\t")
|
37
|
+
profile_index = predicted_info[0].gsub('ProfID:','').to_i
|
38
|
+
if predicted_info[1] != 'Results not found'
|
39
|
+
predicted_hpos_number = predicted_info[4].split(',').length
|
40
|
+
predicted_regions << [profile_index, predicted_info[1], predicted_info[2].to_i, predicted_info[3].to_i, predicted_info[6].to_f, predicted_hpos_number]
|
41
|
+
else
|
42
|
+
predicted_regions << [profile_index]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
return predicted_regions # profile_index, pred_chr, pred_start, pred_stop, score
|
46
|
+
end
|
47
|
+
|
48
|
+
def get_imputation_scores(predicted_regions, integration_method)
|
49
|
+
# predicted_regions.map{|pred_reg| pred_reg.pop}
|
50
|
+
# STDERR.puts predicted_regions.inspect
|
51
|
+
selected_regions = predicted_regions.select{|r| r[4].class == Float }
|
52
|
+
score_regionLength_pairs = selected_regions.map{|r| [r[4], r[3] - r[2]]} #Get combined score and region length
|
53
|
+
score_regionLength_pairs.sort!{|p1, p2| p1.first <=> p2.first}
|
54
|
+
score_regionLength_pairs.reverse! if integration_method == 'fisher'
|
55
|
+
total_region_length = score_regionLength_pairs.map{|p| p.last }.inject{|sum, l| sum + l }
|
56
|
+
|
57
|
+
length2inspect = total_region_length/1000
|
58
|
+
acumulated_score = 0
|
59
|
+
inspected_length = 0
|
60
|
+
score_regionLength_pairs.each do |score, length|
|
61
|
+
acumulated_score += score * length
|
62
|
+
inspected_length += length
|
63
|
+
break if inspected_length >= length2inspect
|
64
|
+
end
|
65
|
+
return acumulated_score.fdiv(inspected_length)
|
66
|
+
end
|
67
|
+
|
68
|
+
def generate_random_imp_score(imputation_score, desv)
|
69
|
+
range = imputation_score * desv * rand()
|
70
|
+
if [true, false].sample
|
71
|
+
final_score = imputation_score - range
|
72
|
+
else
|
73
|
+
final_score = imputation_score + range
|
74
|
+
end
|
75
|
+
return final_score
|
76
|
+
end
|
77
|
+
|
78
|
+
def load_patient_data(input_data_file)
|
79
|
+
patient_data = []
|
80
|
+
File.open(input_data_file).each do |line|
|
81
|
+
line.chomp!
|
82
|
+
mutation_coords, hpo_profile = line.split("\t")
|
83
|
+
number_of_phenotypes = hpo_profile.split('|').length
|
84
|
+
chr, start_pos, stop_pos = mutation_coords.split(':')
|
85
|
+
patient_data << [chr, start_pos.to_i, stop_pos.to_i, number_of_phenotypes]
|
86
|
+
end
|
87
|
+
return patient_data #ctrl_chr, ctrl_start, ctrl_stop, #number_of_phens
|
88
|
+
end
|
89
|
+
|
90
|
+
def get_perfomance_table(ctrl_regions, predicted_regions, scale, imputation_score, hpo_min_recovery, apply_imputation)
|
91
|
+
table = []
|
92
|
+
last_profile_id = ctrl_chr = ctrl_start = ctrl_stop = predicted_hpos_number = number_of_phenotypes = nil
|
93
|
+
in_out_regions = []
|
94
|
+
predicted_regions.each do |profile_index, pred_chr, pred_start, pred_stop, score, predicted_hpos_number|
|
95
|
+
if last_profile_id != profile_index && !last_profile_id.nil?
|
96
|
+
table.concat(process_in_out_regions(ctrl_start, ctrl_stop, scale, in_out_regions, imputation_score, apply_imputation))
|
97
|
+
@genome_fraction_predicted = 0
|
98
|
+
@good_predicted_subregions = 0
|
99
|
+
in_out_regions = []
|
100
|
+
compute_rankings
|
101
|
+
end
|
102
|
+
ctrl_chr, ctrl_start, ctrl_stop, number_of_phenotypes = ctrl_regions[profile_index] #get position in array, for each prediction
|
103
|
+
unless predicted_hpos_number.nil? || number_of_phenotypes.nil?
|
104
|
+
hpo_recovery_percentage = ( predicted_hpos_number / number_of_phenotypes.to_f ) * 100
|
105
|
+
#STDERR.puts "#{predicted_hpos_number}\t#{number_of_phenotypes}"
|
106
|
+
if hpo_recovery_percentage > hpo_min_recovery
|
107
|
+
in_out_regions.concat(get_in_out_regions(ctrl_chr, ctrl_start, ctrl_stop, pred_chr, pred_start, pred_stop, score))
|
108
|
+
last_profile_id = profile_index
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
table.concat(process_in_out_regions(ctrl_start, ctrl_stop, scale, in_out_regions, imputation_score, apply_imputation))
|
113
|
+
return table
|
114
|
+
end
|
115
|
+
|
116
|
+
def process_in_out_regions(ctrl_start, ctrl_stop, scale, in_out_regions, imputation_score, apply_imputation)
|
117
|
+
@success_percentage_distribution << get_sucess_percentage(in_out_regions)
|
118
|
+
table = []
|
119
|
+
ctrl_length = ctrl_stop - ctrl_start
|
120
|
+
non_predicted_regions = ctrl_length - @good_predicted_subregions
|
121
|
+
if non_predicted_regions > 0
|
122
|
+
#total_predicted_region_length = in_out_regions.map{|s| s.last}.inject(0){|i, sum| i + sum}
|
123
|
+
#imputation_score = in_out_regions.map{|s| s[1] * s.last}.inject(0){|i, sum| i + sum}.fdiv(total_predicted_region_length)
|
124
|
+
#imputation_score += 0.25 * imputation_score
|
125
|
+
#imputation_score = in_out_regions.map{|s| s[1]}.max
|
126
|
+
#index = (9 * in_out_regions.length).fdiv(10).ceil
|
127
|
+
#imputation_score = in_out_regions.map{|s| s[1]}.sort[index]
|
128
|
+
|
129
|
+
|
130
|
+
#in_out_regions << ["in", generate_random_imp_score(0.764, 0.35), non_predicted_regions] if apply_imputation
|
131
|
+
in_out_regions << ["in", generate_random_imp_score(imputation_score, 0.35), non_predicted_regions] if apply_imputation
|
132
|
+
end
|
133
|
+
evaluated_genome_fraction = @genome_fraction_predicted + ctrl_length
|
134
|
+
in_out_regions.each do |group, score, region_length|
|
135
|
+
list_entries = (region_length.fdiv(evaluated_genome_fraction) * scale).ceil
|
136
|
+
table.concat(Array.new(list_entries, [group, score]))
|
137
|
+
end
|
138
|
+
return table
|
139
|
+
end
|
140
|
+
|
141
|
+
def get_in_out_regions(ctrl_chr, ctrl_start, ctrl_stop, pred_chr, pred_start, pred_stop, score)
|
142
|
+
in_out_regions = []
|
143
|
+
if ctrl_chr == pred_chr
|
144
|
+
if pred_start < ctrl_start && pred_stop > ctrl_stop # predicted region larger than ctrl region
|
145
|
+
region_length = ctrl_stop - ctrl_start
|
146
|
+
in_out_regions << ["in", score, region_length]
|
147
|
+
@good_predicted_subregions += region_length
|
148
|
+
region_length = ctrl_start - pred_start
|
149
|
+
in_out_regions << ["out", score, region_length]
|
150
|
+
@genome_fraction_predicted += region_length
|
151
|
+
region_length = pred_stop - ctrl_stop
|
152
|
+
in_out_regions << ["out", score, region_length]
|
153
|
+
@genome_fraction_predicted += region_length
|
154
|
+
elsif pred_start >= ctrl_start && pred_stop <= ctrl_stop #within ctrl region
|
155
|
+
region_length = pred_stop - pred_start
|
156
|
+
in_out_regions << ["in", score, region_length]
|
157
|
+
@good_predicted_subregions += region_length
|
158
|
+
elsif ctrl_start < pred_stop && ctrl_stop >= pred_stop #upstream region out of ctrl region
|
159
|
+
region_length = pred_stop - ctrl_start
|
160
|
+
in_out_regions << ["in", score, region_length]
|
161
|
+
@good_predicted_subregions += region_length
|
162
|
+
region_length = ctrl_start - pred_start
|
163
|
+
in_out_regions << ["out", score, region_length]
|
164
|
+
@genome_fraction_predicted += region_length
|
165
|
+
elsif ctrl_start <= pred_start && ctrl_stop > pred_start #downstream region out of ctrl region
|
166
|
+
region_length = ctrl_stop - pred_start
|
167
|
+
in_out_regions << ["in", score, region_length]
|
168
|
+
@good_predicted_subregions += region_length
|
169
|
+
region_length = pred_stop - ctrl_stop
|
170
|
+
in_out_regions << ["out", score, region_length]
|
171
|
+
@genome_fraction_predicted += region_length
|
172
|
+
else #in same chr but not in ctrl region
|
173
|
+
region_length = pred_stop - pred_start
|
174
|
+
in_out_regions << ["out", score, region_length]
|
175
|
+
@genome_fraction_predicted += region_length
|
176
|
+
end
|
177
|
+
elsif !pred_chr.nil? #in different chr
|
178
|
+
region_length = pred_stop - pred_start
|
179
|
+
in_out_regions << ["out", score, region_length]
|
180
|
+
@genome_fraction_predicted += region_length
|
181
|
+
end
|
182
|
+
if in_out_regions.map{|reg| reg.first }.include?('in')
|
183
|
+
@prediction_vector << true
|
184
|
+
else
|
185
|
+
@prediction_vector << false
|
186
|
+
end
|
187
|
+
return in_out_regions
|
188
|
+
end
|
189
|
+
|
190
|
+
def get_sucess_percentage(in_out_regions)
|
191
|
+
percentage = 0
|
192
|
+
if !in_out_regions.empty?
|
193
|
+
count = 0
|
194
|
+
total = 0
|
195
|
+
in_out_regions.each do |group, score, reg_length|
|
196
|
+
count += reg_length if group == 'in'
|
197
|
+
total += reg_length
|
198
|
+
end
|
199
|
+
percentage = count.fdiv(total)
|
200
|
+
end
|
201
|
+
return percentage
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
##########################
|
206
|
+
#OPT-PARSE
|
207
|
+
##########################
|
208
|
+
|
209
|
+
options = {}
|
210
|
+
OptionParser.new do |opts|
|
211
|
+
opts.banner = "Usage: #{__FILE__} [options]"
|
212
|
+
|
213
|
+
options[:input_prediction] = nil
|
214
|
+
opts.on("-i", "--input_prediction PATH", "Input prediction file for checking") do |input_prediction|
|
215
|
+
options[:input_prediction] = input_prediction
|
216
|
+
end
|
217
|
+
|
218
|
+
options[:meth] = nil
|
219
|
+
opts.on("-m", "--meth STRING", "Method used in score integration calculation, affects to the imputation algorithm (if used)") do |meth|
|
220
|
+
options[:meth] = meth
|
221
|
+
end
|
222
|
+
|
223
|
+
options[:output_file] = 'final_values_for_pr_curve.txt'
|
224
|
+
opts.on("-o", "--output_file PATH", "Output results for PR curve") do |output_file|
|
225
|
+
options[:output_file] = output_file
|
226
|
+
end
|
227
|
+
|
228
|
+
options[:hpo_recovery] = 0
|
229
|
+
opts.on("-p", "--hpo_recovery INTEGER", "Minimum percentage of HPO terms to consider predictions") do |hpo_recovery|
|
230
|
+
options[:hpo_recovery] = hpo_recovery.to_f
|
231
|
+
abort("Please, choose a recovery value higher than 0") if options[:hpo_recovery] <= 0
|
232
|
+
end
|
233
|
+
|
234
|
+
options[:input_regions] = nil
|
235
|
+
opts.on("-r", "--input_regions PATH", "Input patients true affected regions (ctrl file)") do |input_regions|
|
236
|
+
options[:input_regions] = input_regions
|
237
|
+
end
|
238
|
+
|
239
|
+
options[:success_percentage] = 'success_percentage'
|
240
|
+
opts.on("-s", "--success_percentage PATH", "Output results with success percentage for each prediction") do |success_percentage|
|
241
|
+
options[:success_percentage] = success_percentage
|
242
|
+
end
|
243
|
+
|
244
|
+
options[:apply_imputation] = false
|
245
|
+
opts.on("-y", "--apply_imputation", "Activates imputation") do
|
246
|
+
options[:apply_imputation] = true
|
247
|
+
end
|
248
|
+
|
249
|
+
options[:scale_size] = 100
|
250
|
+
opts.on("-z", "--scale_size INTEGER", "Scale region size to avoid long range regions") do |scale_size|
|
251
|
+
options[:scale_size] = scale_size.to_i
|
252
|
+
abort("Please, choose a scale value higher than 0") if options[:scale_size] <= 0
|
253
|
+
end
|
254
|
+
|
255
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
256
|
+
puts opts
|
257
|
+
exit
|
258
|
+
end
|
259
|
+
|
260
|
+
end.parse!
|
261
|
+
|
262
|
+
##########################
|
263
|
+
#MAIN
|
264
|
+
##########################
|
265
|
+
|
266
|
+
predicted_regions = load_prediction_file(options[:input_prediction])
|
267
|
+
imputation_score = get_imputation_scores(predicted_regions, options[:meth])
|
268
|
+
patient_data = load_patient_data(options[:input_regions])
|
269
|
+
|
270
|
+
table = get_perfomance_table(patient_data, predicted_regions, options[:scale_size], imputation_score, options[:hpo_recovery], options[:apply_imputation])
|
271
|
+
File.open(options[:output_file], 'w') do |f|
|
272
|
+
f.puts "group\tscore"
|
273
|
+
table.each do |output, score|
|
274
|
+
if options[:apply_imputation]
|
275
|
+
#score = generate_random_imp_score(0.764, 0.35) if score.nil?
|
276
|
+
score = generate_random_imp_score(imputation_score, 0.35) if score.nil?
|
277
|
+
else
|
278
|
+
next if score.nil? #when no imputation
|
279
|
+
end
|
280
|
+
f.puts "#{output}\t#{score}"
|
281
|
+
end
|
282
|
+
end
|
283
|
+
File.open(options[:success_percentage], 'w') do |f|
|
284
|
+
f.puts 'perc'
|
285
|
+
@success_percentage_distribution.each do |pg|
|
286
|
+
f.puts pg
|
287
|
+
end
|
288
|
+
end
|
289
|
+
File.open('ranking', 'w') do |f|
|
290
|
+
f.puts "reg\tranking"
|
291
|
+
@rankings.each do |reg, ranks|
|
292
|
+
ranks.each do |rank|
|
293
|
+
f.puts "#{reg.to_s}\t#{rank}"
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
data/bin/plot_area.R
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
#! /usr/bin/env Rscript
|
2
|
+
# x,y graph
|
3
|
+
|
4
|
+
library(ggplot2)
|
5
|
+
library(optparse)
|
6
|
+
|
7
|
+
################################################################
|
8
|
+
# OPTPARSE
|
9
|
+
################################################################
|
10
|
+
option_list <- list(
|
11
|
+
make_option(c("-d", "--data_file"), type="character",
|
12
|
+
help="Tabulated file with information about each sample"),
|
13
|
+
make_option(c("-o", "--output"), type="character", default="results",
|
14
|
+
help="Output figure file"),
|
15
|
+
make_option(c("-x", "--x_values"), type="character",
|
16
|
+
help="Name of column with values to be plotted"),
|
17
|
+
make_option(c("-y", "--y_values"), type="character",
|
18
|
+
help="Name of column with values to be plotted"),
|
19
|
+
make_option(c("-f", "--density_values"), type="character",
|
20
|
+
help="Name of column to be used as density values"),
|
21
|
+
make_option(c("-H", "--header"), action="store_false", default=TRUE,
|
22
|
+
help="The input table not have header line"),
|
23
|
+
make_option(c("-X", "--x_title"), type="character",
|
24
|
+
help="Name of column to be used for bars titles"),
|
25
|
+
make_option(c("-Y", "--y_title"), type="character",
|
26
|
+
help="Title of y axis"),
|
27
|
+
make_option(c("-F", "--output_format"), type="character", default="pdf",
|
28
|
+
help="pdf or jpeg file output format"),
|
29
|
+
make_option(c("-m", "--maxs_file"), type="character", default="",
|
30
|
+
help="Tabulated file maximum of each sample"),
|
31
|
+
make_option(c("-t", "--graph_title"), type="character", default="",
|
32
|
+
help="Title of the graph")
|
33
|
+
|
34
|
+
)
|
35
|
+
opt <- parse_args(OptionParser(option_list=option_list))
|
36
|
+
|
37
|
+
|
38
|
+
################################################################
|
39
|
+
## MAIN
|
40
|
+
################################################################
|
41
|
+
|
42
|
+
data <- read.table(opt$data_file, sep="\t", header=opt$header)
|
43
|
+
if (opt$output_format == "pdf"){
|
44
|
+
pdf(paste(opt$output, '.pdf', sep=""))
|
45
|
+
}else if(opt$output_format == "jpeg"){
|
46
|
+
jpeg(paste(opt$output, '.jpeg', sep=""))
|
47
|
+
}
|
48
|
+
goodChrOrder <- c(1:22,"X","Y")
|
49
|
+
data$V1 <- factor(data$V1,levels=goodChrOrder)
|
50
|
+
|
51
|
+
maxs <- c()
|
52
|
+
if(opt$maxs_file != ""){
|
53
|
+
maxs <- read.table(opt$maxs_file, sep="\t", header=FALSE)
|
54
|
+
#print(maxs)
|
55
|
+
}
|
56
|
+
#ggplot(data=data, aes(x=data[[opt$x_values]], y=data[[opt$y_values]] )) +
|
57
|
+
obj <- ggplot(data=data, aes(x=V2, y=V3 ))
|
58
|
+
#geom_area(aes(fill=data[[opt$density_values]], )) +
|
59
|
+
obj <- obj + geom_area(aes(fill=V1, ))
|
60
|
+
obj <- obj + facet_wrap(~ V1, ncol=2, strip.position = "right" )
|
61
|
+
if(length(maxs) > 0){
|
62
|
+
obj <- obj + geom_vline(data = maxs, aes(xintercept = V2))
|
63
|
+
}
|
64
|
+
obj <- obj + xlab(opt$x_title)
|
65
|
+
obj <- obj + ylab(opt$y_title)
|
66
|
+
obj <- obj + theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
67
|
+
obj <- obj + guides(fill=FALSE)
|
68
|
+
#obj <- obj + labs(title = opt$graph_title)
|
69
|
+
obj <- obj + ggtitle(label = opt$graph_title)
|
70
|
+
obj
|
71
|
+
dev.off()
|
data/bin/plot_boxplot.R
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#! /usr/bin/env Rscript
|
2
|
+
|
3
|
+
library(ggplot2)
|
4
|
+
args <- commandArgs(trailingOnly = TRUE)
|
5
|
+
|
6
|
+
data <- read.table(args[1], header=TRUE)
|
7
|
+
output <- args[2]
|
8
|
+
x_axis <- args[3]
|
9
|
+
y_axis <- args[4]
|
10
|
+
x_tag <- args[5]
|
11
|
+
y_tag <- args[6]
|
12
|
+
x_order <- unique(data[[x_axis]])
|
13
|
+
data[[x_axis]] <- factor(data[[x_axis]], levels = x_order)
|
14
|
+
|
15
|
+
pdf(file.path(output, 'boxplot.pdf'))
|
16
|
+
ggplot(data, aes(x=data[[x_axis]], y=data[[y_axis]])) +
|
17
|
+
geom_boxplot() +
|
18
|
+
xlab(x_tag) +
|
19
|
+
ylab(y_tag) +
|
20
|
+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
21
|
+
dev.off()
|
data/bin/plot_density.R
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#! /usr/bin/env Rscript
|
2
|
+
|
3
|
+
library(ggplot2)
|
4
|
+
|
5
|
+
args <- commandArgs(trailingOnly = TRUE)
|
6
|
+
|
7
|
+
file <- args[1]
|
8
|
+
|
9
|
+
values <- args[2]
|
10
|
+
|
11
|
+
#x_axis_limit <- strtoi(args[3])
|
12
|
+
x_axis_limit <- as.numeric(args[3])
|
13
|
+
|
14
|
+
groups <- args[4]
|
15
|
+
x_axis_limit_min <- as.numeric(args[5])
|
16
|
+
#categories <- args[3]
|
17
|
+
|
18
|
+
#xtitle <- args[4]
|
19
|
+
|
20
|
+
#ytitle <- args[5]
|
21
|
+
|
22
|
+
data <- read.table(file, header = TRUE , sep="\t")
|
23
|
+
|
24
|
+
pdf('out.pdf')
|
25
|
+
#ggplot(data, aes(x=data[[values]], colour=data[[categories]], fill=data[[categories]])) +
|
26
|
+
#geom_histogram(binwidth=.5, position="dodge") +
|
27
|
+
#geom_histogram(position="dodge") +
|
28
|
+
#xlab(xtitle) +
|
29
|
+
#ylab('Count') +
|
30
|
+
if(is.na(groups)){
|
31
|
+
obj <- ggplot(data, aes(x=data[[values]]))
|
32
|
+
obj <- obj + geom_density()
|
33
|
+
}else{
|
34
|
+
obj <- ggplot(data, aes(x=data[[values]], fill=data[[groups]]))
|
35
|
+
obj <- obj + geom_density(alpha=.3)
|
36
|
+
}
|
37
|
+
obj <- obj + theme(legend.title=element_blank())
|
38
|
+
if(!is.na(x_axis_limit)){
|
39
|
+
xmin <- 0
|
40
|
+
if(!is.na(x_axis_limit_min)){
|
41
|
+
xmin <-x_axis_limit_min
|
42
|
+
}
|
43
|
+
obj <- obj + xlim(xmin, x_axis_limit)
|
44
|
+
}
|
45
|
+
obj
|
46
|
+
dev.off()
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#! /usr/bin/env Rscript
|
2
|
+
|
3
|
+
library(ggplot2)
|
4
|
+
args <- commandArgs(trailingOnly = TRUE)
|
5
|
+
|
6
|
+
data <- read.table(args[1], header=TRUE)
|
7
|
+
output <- args[2]
|
8
|
+
x_axis <- args[3]
|
9
|
+
y_axis <- args[4]
|
10
|
+
density <- args[5]
|
11
|
+
x_tag <- args[6]
|
12
|
+
y_tag <- args[7]
|
13
|
+
size_tag <- args[8]
|
14
|
+
x_order <- unique(data[[x_axis]])
|
15
|
+
data[[x_axis]] <- factor(data[[x_axis]], levels = x_order)
|
16
|
+
|
17
|
+
pdf(file.path(output, 'scatterplot.pdf'))
|
18
|
+
ggplot(data, aes(x=data[[x_axis]], y=data[[y_axis]])) +
|
19
|
+
geom_point(aes(size=data[[density]])) +
|
20
|
+
xlab(x_tag) +
|
21
|
+
ylab(y_tag) +
|
22
|
+
labs(size = size_tag) +
|
23
|
+
theme(axis.text.x = element_text(angle = 45, hjust = 1))
|
24
|
+
dev.off()
|
25
|
+
|