@sjcrh/proteinpaint-rust 2.167.0 → 2.169.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/aichatbot.rs +131 -111
- package/src/manhattan_plot.rs +2 -0
- package/src/test_ai.rs +23 -7
package/package.json
CHANGED
package/src/aichatbot.rs
CHANGED
|
@@ -30,21 +30,51 @@ pub struct AiJsonFormat {
|
|
|
30
30
|
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
31
31
|
enum Charts {
|
|
32
32
|
// More chart types will be added here later
|
|
33
|
-
Summary(
|
|
34
|
-
DE(
|
|
33
|
+
Summary(TrainTestDataSummary),
|
|
34
|
+
DE(TrainTestDataDE),
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
38
|
-
struct
|
|
38
|
+
struct TrainTestDataSummary {
|
|
39
39
|
SystemPrompt: String,
|
|
40
|
-
TrainingData: Vec<
|
|
41
|
-
TestData: Vec<
|
|
40
|
+
TrainingData: Vec<QuestionAnswerSummary>,
|
|
41
|
+
TestData: Vec<QuestionAnswerSummary>,
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
45
|
-
struct
|
|
45
|
+
struct QuestionAnswerSummary {
|
|
46
46
|
question: String,
|
|
47
|
-
answer:
|
|
47
|
+
answer: SummaryType,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
51
|
+
struct TrainTestDataDE {
|
|
52
|
+
SystemPrompt: String,
|
|
53
|
+
TrainingData: Vec<QuestionAnswerDE>,
|
|
54
|
+
TestData: Vec<QuestionAnswerDE>,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
58
|
+
struct QuestionAnswerDE {
|
|
59
|
+
question: String,
|
|
60
|
+
answer: DEType,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
64
|
+
struct DEType {
|
|
65
|
+
action: String,
|
|
66
|
+
DE_output: DETerms,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
70
|
+
struct DETerms {
|
|
71
|
+
group1: GroupType,
|
|
72
|
+
group2: GroupType,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
76
|
+
struct GroupType {
|
|
77
|
+
name: String,
|
|
48
78
|
}
|
|
49
79
|
|
|
50
80
|
#[allow(non_camel_case_types)]
|
|
@@ -77,6 +107,27 @@ async fn main() -> Result<()> {
|
|
|
77
107
|
None => panic!("user_input field is missing in input json"),
|
|
78
108
|
}
|
|
79
109
|
|
|
110
|
+
let dataset_db_json: &JsonValue = &json_string["dataset_db"];
|
|
111
|
+
let dataset_db_str: &str;
|
|
112
|
+
match dataset_db_json.as_str() {
|
|
113
|
+
Some(inp) => dataset_db_str = inp,
|
|
114
|
+
None => panic!("dataset_db field is missing in input json"),
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
let genedb_json: &JsonValue = &json_string["genedb"];
|
|
118
|
+
let genedb_str: &str;
|
|
119
|
+
match genedb_json.as_str() {
|
|
120
|
+
Some(inp) => genedb_str = inp,
|
|
121
|
+
None => panic!("genedb field is missing in input json"),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
let aiRoute_json: &JsonValue = &json_string["aiRoute"];
|
|
125
|
+
let aiRoute_str: &str;
|
|
126
|
+
match aiRoute_json.as_str() {
|
|
127
|
+
Some(inp) => aiRoute_str = inp,
|
|
128
|
+
None => panic!("aiRoute field is missing in input json"),
|
|
129
|
+
}
|
|
130
|
+
|
|
80
131
|
if user_input.len() == 0 {
|
|
81
132
|
panic!("The user input is empty");
|
|
82
133
|
}
|
|
@@ -124,8 +175,9 @@ async fn main() -> Result<()> {
|
|
|
124
175
|
let ai_json: AiJsonFormat =
|
|
125
176
|
serde_json::from_str(&ai_data).expect("AI JSON file does not have the correct format");
|
|
126
177
|
|
|
127
|
-
let genedb = String::from(tpmasterdir) + &"/" + &
|
|
128
|
-
let dataset_db = String::from(tpmasterdir) + &"/" + &
|
|
178
|
+
let genedb = String::from(tpmasterdir) + &"/" + &genedb_str;
|
|
179
|
+
let dataset_db = String::from(tpmasterdir) + &"/" + &dataset_db_str;
|
|
180
|
+
let airoute = String::from(binpath) + &"/../../" + &aiRoute_str;
|
|
129
181
|
|
|
130
182
|
let apilink_json: &JsonValue = &json_string["apilink"];
|
|
131
183
|
let apilink: &str;
|
|
@@ -185,6 +237,7 @@ async fn main() -> Result<()> {
|
|
|
185
237
|
&dataset_db,
|
|
186
238
|
&genedb,
|
|
187
239
|
&ai_json,
|
|
240
|
+
&airoute,
|
|
188
241
|
testing,
|
|
189
242
|
)
|
|
190
243
|
.await;
|
|
@@ -208,6 +261,7 @@ async fn main() -> Result<()> {
|
|
|
208
261
|
&dataset_db,
|
|
209
262
|
&genedb,
|
|
210
263
|
&ai_json,
|
|
264
|
+
&airoute,
|
|
211
265
|
testing,
|
|
212
266
|
)
|
|
213
267
|
.await;
|
|
@@ -241,6 +295,7 @@ pub async fn run_pipeline(
|
|
|
241
295
|
dataset_db: &str,
|
|
242
296
|
genedb: &str,
|
|
243
297
|
ai_json: &AiJsonFormat,
|
|
298
|
+
ai_route: &str,
|
|
244
299
|
testing: bool,
|
|
245
300
|
) -> Option<String> {
|
|
246
301
|
let mut classification: String = classify_query_by_dataset_type(
|
|
@@ -251,6 +306,7 @@ pub async fn run_pipeline(
|
|
|
251
306
|
temperature,
|
|
252
307
|
max_new_tokens,
|
|
253
308
|
top_p,
|
|
309
|
+
ai_route,
|
|
254
310
|
)
|
|
255
311
|
.await;
|
|
256
312
|
classification = classification.replace("\"", "");
|
|
@@ -376,101 +432,33 @@ pub async fn run_pipeline(
|
|
|
376
432
|
async fn classify_query_by_dataset_type(
|
|
377
433
|
user_input: &str,
|
|
378
434
|
comp_model: impl rig::completion::CompletionModel + 'static,
|
|
379
|
-
|
|
435
|
+
_embedding_model: impl rig::embeddings::EmbeddingModel + 'static,
|
|
380
436
|
llm_backend_type: &llm_backend,
|
|
381
437
|
temperature: f64,
|
|
382
438
|
max_new_tokens: usize,
|
|
383
439
|
top_p: f32,
|
|
440
|
+
ai_route: &str,
|
|
384
441
|
) -> String {
|
|
385
|
-
//
|
|
386
|
-
let
|
|
387
|
-
|
|
388
|
-
If a ProteinPaint dataset contains SNV/Indel/SV data then return JSON with single key, 'snv_indel'.
|
|
389
|
-
|
|
390
|
-
---
|
|
391
|
-
|
|
392
|
-
Copy number variation (CNV) is a phenomenon in which sections of the genome are repeated and the number of repeats in the genome varies between individuals.[1] Copy number variation is a special type of structural variation: specifically, it is a type of duplication or deletion event that affects a considerable number of base pairs.
|
|
393
|
-
|
|
394
|
-
If a ProteinPaint dataset contains copy number variation data then return JSON with single key, 'cnv'.
|
|
395
|
-
|
|
396
|
-
---
|
|
397
|
-
|
|
398
|
-
Structural variants/fusions (SV) are genomic mutations when eith a DNA region is translocated or copied to an entirely different genomic locus. In case of transcriptomic data, when RNA is fused from two different genes its called a gene fusion.
|
|
399
|
-
|
|
400
|
-
If a ProteinPaint dataset contains structural variation or gene fusion data then return JSON with single key, 'sv_fusion'.
|
|
401
|
-
---
|
|
402
|
-
|
|
403
|
-
Hierarchical clustering of gene expression is an unsupervised learning technique where several number of relevant genes and the samples are clustered so as to determine (previously unknown) cohorts of samples (or patients) or structure in data. It is very commonly used to determine subtypes of a particular disease based on RNA sequencing data.
|
|
404
|
-
|
|
405
|
-
If a ProteinPaint dataset contains hierarchical data then return JSON with single key, 'hierarchical'.
|
|
406
|
-
|
|
407
|
-
---
|
|
408
|
-
|
|
409
|
-
Differential Gene Expression (DGE or DE) is a technique where the most upregulated (or highest) and downregulated (or lowest) genes between two cohorts of samples (or patients) are determined from a pool of THOUSANDS of genes. Differential gene expression CANNOT be computed for a SINGLE gene. A volcano plot is shown with fold-change in the x-axis and adjusted p-value on the y-axis. So, the upregulated and downregulared genes are on opposite sides of the graph and the most significant genes (based on adjusted p-value) is on the top of the graph. Following differential gene expression generally GeneSet Enrichment Analysis (GSEA) is carried out where based on the genes and their corresponding fold changes the upregulation/downregulation of genesets (or pathways) is determined.
|
|
410
|
-
|
|
411
|
-
Sample Query1: \"Which gene has the highest expression between the two genders\"
|
|
412
|
-
Sample Answer1: { \"answer\": \"dge\" }
|
|
413
|
-
|
|
414
|
-
Sample Query2: \"Which gene has the lowest expression between the two races\"
|
|
415
|
-
Sample Answer2: { \"answer\": \"dge\" }
|
|
416
|
-
|
|
417
|
-
Sample Query1: \"Which genes are the most upregulated genes between group A and group B\"
|
|
418
|
-
Sample Answer1: { \"answer\": \"dge\" }
|
|
419
|
-
|
|
420
|
-
Sample Query3: \"Which gene are overexpressed between male and female\"
|
|
421
|
-
Sample Answer3: { \"answer\": \"dge\" }
|
|
442
|
+
// Read the file
|
|
443
|
+
let ai_route_data = fs::read_to_string(ai_route).unwrap();
|
|
422
444
|
|
|
423
|
-
|
|
424
|
-
|
|
445
|
+
// Parse the JSON data
|
|
446
|
+
let ai_json: Value = serde_json::from_str(&ai_route_data).expect("AI JSON file does not have the correct format");
|
|
425
447
|
|
|
448
|
+
// Create a string to hold the file contents
|
|
449
|
+
let mut contents = String::from("");
|
|
426
450
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
There are two main methods of survival analysis:
|
|
434
|
-
|
|
435
|
-
1) Kaplan-Meier (HM) analysis is a univariate test that only takes into account a single categorical variable.
|
|
436
|
-
2) Cox proportional hazards model (coxph) is a multivariate test that can take into account multiple variables.
|
|
437
|
-
|
|
438
|
-
The hazard ratio (HR) is an indicator of the effect of the stimulus (e.g. drug dose, treatment) between two cohorts of patients.
|
|
439
|
-
HR = 1: No effect
|
|
440
|
-
HR < 1: Reduction in the hazard
|
|
441
|
-
HR > 1: Increase in Hazard
|
|
442
|
-
|
|
443
|
-
Sample Query1: \"Compare survival rates between group A and B\"
|
|
444
|
-
Sample Answer1: { \"answer\": \"survival\" }
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
If a ProteinPaint dataset contains survival data then return JSON with single key, 'survival'.
|
|
448
|
-
|
|
449
|
-
---
|
|
450
|
-
|
|
451
|
-
Next generation sequencing reads (NGS) are mapped to a human genome using alignment algorithm such as burrows-wheelers alignment algorithm. Then these reads are called using variant calling algorithms such as GATK (Genome Analysis Toolkit). However this type of analysis is too compute intensive and beyond the scope of visualization software such as ProteinPaint.
|
|
452
|
-
|
|
453
|
-
If a user query asks about variant calling or mapping reads then JSON with single key, 'variant_calling'.
|
|
454
|
-
|
|
455
|
-
---
|
|
456
|
-
|
|
457
|
-
Summary plot in ProteinPaint shows the various facets of the datasets. Show expression of a SINGLE gene or compare the expression of a SINGLE gene across two different cohorts defined by the user. It may show all the samples according to their respective diagnosis or subtypes of cancer. It is also useful for comparing and correlating different clinical variables. It can show all possible distributions, frequency of a category, overlay, correlate or cross-tabulate with another variable on top of it. If a user query asks about a SINGLE gene expression or correlating clinical variables then return JSON with single key, 'summary'.
|
|
458
|
-
|
|
459
|
-
Sample Query1: \"Show all fusions for patients with age less than 30\"
|
|
460
|
-
Sample Answer1: { \"answer\": \"summary\" }
|
|
461
|
-
|
|
462
|
-
Sample Query2: \"List all molecular subtypes of leukemia\"
|
|
463
|
-
Sample Answer2: { \"answer\": \"summary\" }
|
|
464
|
-
|
|
465
|
-
Sample Query3: \"is tp53 expression higher in men than women ?\"
|
|
466
|
-
Sample Answer3: { \"answer\": \"summary\" }
|
|
467
|
-
|
|
468
|
-
Sample Query4: \"Compare ATM expression between races for women greater than 80yrs\"
|
|
469
|
-
Sample Answer4: { \"answer\": \"summary\" }
|
|
470
|
-
|
|
451
|
+
if let Some(object) = ai_json.as_object() {
|
|
452
|
+
for (_key, value) in object {
|
|
453
|
+
contents += &value.as_str().unwrap();
|
|
454
|
+
contents += "---"; // Adding delimiter
|
|
455
|
+
}
|
|
456
|
+
}
|
|
471
457
|
|
|
472
|
-
|
|
473
|
-
|
|
458
|
+
// Removing the last "---" characters
|
|
459
|
+
contents.pop();
|
|
460
|
+
contents.pop();
|
|
461
|
+
contents.pop();
|
|
474
462
|
|
|
475
463
|
// Split the contents by the delimiter "---"
|
|
476
464
|
let parts: Vec<&str> = contents.split("---").collect();
|
|
@@ -501,18 +489,18 @@ If a query does not match any of the fields described above, then return JSON wi
|
|
|
501
489
|
rag_docs.push(part.trim().to_string())
|
|
502
490
|
}
|
|
503
491
|
|
|
504
|
-
//let top_k: usize = 3;
|
|
492
|
+
//let top_k: usize = 3; // Embedding model not used currently
|
|
505
493
|
// Create embeddings and add to vector store
|
|
506
|
-
let embeddings = EmbeddingsBuilder::new(embedding_model.clone())
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
494
|
+
//let embeddings = EmbeddingsBuilder::new(embedding_model.clone())
|
|
495
|
+
// .documents(rag_docs)
|
|
496
|
+
// .expect("Reason1")
|
|
497
|
+
// .build()
|
|
498
|
+
// .await
|
|
499
|
+
// .unwrap();
|
|
512
500
|
|
|
513
|
-
|
|
514
|
-
let mut vector_store = InMemoryVectorStore::<String>::default();
|
|
515
|
-
InMemoryVectorStore::add_documents(&mut vector_store, embeddings);
|
|
501
|
+
//// Create vector store
|
|
502
|
+
//let mut vector_store = InMemoryVectorStore::<String>::default();
|
|
503
|
+
//InMemoryVectorStore::add_documents(&mut vector_store, embeddings);
|
|
516
504
|
|
|
517
505
|
// Create RAG agent
|
|
518
506
|
let agent = AgentBuilder::new(comp_model).preamble(&(String::from("Generate classification for the user query into summary, dge, hierarchical, snv_indel, cnv, variant_calling, sv_fusion and none categories. Return output in JSON with ALWAYS a single word answer { \"answer\": \"dge\" }, that is 'summary' for summary plot, 'dge' for differential gene expression, 'hierarchical' for hierarchical clustering, 'snv_indel' for SNV/Indel, 'cnv' for CNV and 'sv_fusion' for SV/fusion, 'variant_calling' for variant calling, 'surivial' for survival data, 'none' for none of the previously described categories. The summary plot list and summarizes the cohort of patients according to the user query. The answer should always be in lower case\n The options are as follows:\n") + &contents + "\nQuestion= {question} \nanswer")).temperature(temperature).additional_params(additional).build();
|
|
@@ -909,7 +897,7 @@ async fn extract_summary_information(
|
|
|
909
897
|
.filter(|x| user_words2.contains(&x.to_lowercase()))
|
|
910
898
|
.collect();
|
|
911
899
|
|
|
912
|
-
let mut summary_data_check: Option<
|
|
900
|
+
let mut summary_data_check: Option<TrainTestDataSummary> = None;
|
|
913
901
|
for chart in ai_json.charts.clone() {
|
|
914
902
|
if let Charts::Summary(traindata) = chart {
|
|
915
903
|
summary_data_check = Some(traindata);
|
|
@@ -922,6 +910,7 @@ async fn extract_summary_information(
|
|
|
922
910
|
let mut training_data: String = String::from("");
|
|
923
911
|
let mut train_iter = 0;
|
|
924
912
|
for ques_ans in summary_data.TrainingData {
|
|
913
|
+
let summary_answer: SummaryType = ques_ans.answer;
|
|
925
914
|
train_iter += 1;
|
|
926
915
|
training_data += "Example question";
|
|
927
916
|
training_data += &train_iter.to_string();
|
|
@@ -931,7 +920,7 @@ async fn extract_summary_information(
|
|
|
931
920
|
training_data += "Example answer";
|
|
932
921
|
training_data += &train_iter.to_string();
|
|
933
922
|
training_data += &":";
|
|
934
|
-
training_data += &
|
|
923
|
+
training_data += &serde_json::to_string(&summary_answer).unwrap();
|
|
935
924
|
training_data += &"\n";
|
|
936
925
|
}
|
|
937
926
|
|
|
@@ -1014,7 +1003,7 @@ struct SummaryType {
|
|
|
1014
1003
|
|
|
1015
1004
|
impl SummaryType {
|
|
1016
1005
|
#[allow(dead_code)]
|
|
1017
|
-
pub fn sort_summarytype_struct(
|
|
1006
|
+
pub fn sort_summarytype_struct(mut self) -> SummaryType {
|
|
1018
1007
|
// This function is necessary for testing (test_ai.rs) to see if two variables of type "SummaryType" are equal or not. Without this a vector of two Summarytype holding the same values but in different order will be classified separately.
|
|
1019
1008
|
self.summaryterms.sort();
|
|
1020
1009
|
|
|
@@ -1022,6 +1011,7 @@ impl SummaryType {
|
|
|
1022
1011
|
Some(ref mut filterterms) => filterterms.sort(),
|
|
1023
1012
|
None => {}
|
|
1024
1013
|
}
|
|
1014
|
+
self.clone()
|
|
1025
1015
|
}
|
|
1026
1016
|
}
|
|
1027
1017
|
|
|
@@ -1039,7 +1029,7 @@ impl PartialOrd for SummaryTerms {
|
|
|
1039
1029
|
(SummaryTerms::clinical(_), SummaryTerms::clinical(_)) => Some(std::cmp::Ordering::Equal),
|
|
1040
1030
|
(SummaryTerms::geneExpression(_), SummaryTerms::geneExpression(_)) => Some(std::cmp::Ordering::Equal),
|
|
1041
1031
|
(SummaryTerms::clinical(_), SummaryTerms::geneExpression(_)) => Some(std::cmp::Ordering::Greater),
|
|
1042
|
-
(SummaryTerms::geneExpression(_), SummaryTerms::clinical(_)) => Some(std::cmp::Ordering::
|
|
1032
|
+
(SummaryTerms::geneExpression(_), SummaryTerms::clinical(_)) => Some(std::cmp::Ordering::Less),
|
|
1043
1033
|
}
|
|
1044
1034
|
}
|
|
1045
1035
|
}
|
|
@@ -1313,10 +1303,40 @@ fn validate_summary_output(
|
|
|
1313
1303
|
+ &categorical_filter.value
|
|
1314
1304
|
+ &"\"},";
|
|
1315
1305
|
validated_filter_terms_PP += &string_json;
|
|
1316
|
-
filter_hits += 1; // Once numeric term is also implemented, this statement will go outside the match block
|
|
1317
1306
|
}
|
|
1318
|
-
FilterTerm::Numeric(
|
|
1307
|
+
FilterTerm::Numeric(numeric_filter) => {
|
|
1308
|
+
let string_json;
|
|
1309
|
+
if numeric_filter.greaterThan.is_some() && numeric_filter.lessThan.is_none() {
|
|
1310
|
+
string_json = "{\"term\":\"".to_string()
|
|
1311
|
+
+ &numeric_filter.term
|
|
1312
|
+
+ &"\", \"gt\":\""
|
|
1313
|
+
+ &numeric_filter.greaterThan.unwrap().to_string()
|
|
1314
|
+
+ &"\"},";
|
|
1315
|
+
} else if numeric_filter.greaterThan.is_none() && numeric_filter.lessThan.is_some() {
|
|
1316
|
+
string_json = "{\"term\":\"".to_string()
|
|
1317
|
+
+ &numeric_filter.term
|
|
1318
|
+
+ &"\", \"lt\":\""
|
|
1319
|
+
+ &numeric_filter.lessThan.unwrap().to_string()
|
|
1320
|
+
+ &"\"},";
|
|
1321
|
+
} else if numeric_filter.greaterThan.is_some() && numeric_filter.lessThan.is_some() {
|
|
1322
|
+
string_json = "{\"term\":\"".to_string()
|
|
1323
|
+
+ &numeric_filter.term
|
|
1324
|
+
+ &"\", \"lt\":\""
|
|
1325
|
+
+ &numeric_filter.lessThan.unwrap().to_string()
|
|
1326
|
+
+ &"\", \"gt\":\""
|
|
1327
|
+
+ &numeric_filter.greaterThan.unwrap().to_string()
|
|
1328
|
+
+ &"\"},";
|
|
1329
|
+
} else {
|
|
1330
|
+
// When both greater and less than are none
|
|
1331
|
+
panic!(
|
|
1332
|
+
"Numeric filter term {} is missing both greater than and less than values. One of them must be defined",
|
|
1333
|
+
&numeric_filter.term
|
|
1334
|
+
);
|
|
1335
|
+
}
|
|
1336
|
+
validated_filter_terms_PP += &string_json;
|
|
1337
|
+
}
|
|
1319
1338
|
};
|
|
1339
|
+
filter_hits += 1;
|
|
1320
1340
|
}
|
|
1321
1341
|
println!("validated_filter_terms_PP:{}", validated_filter_terms_PP);
|
|
1322
1342
|
if filter_hits > 0 {
|
package/src/manhattan_plot.rs
CHANGED
|
@@ -217,8 +217,10 @@ fn grin2_file_read(
|
|
|
217
217
|
Some(q) => q,
|
|
218
218
|
None => continue,
|
|
219
219
|
};
|
|
220
|
+
|
|
220
221
|
let q_val: f64 = match q_val_str.parse() {
|
|
221
222
|
Ok(v) if v > 0.0 => v,
|
|
223
|
+
Ok(v) if v == 0.0 => 1e-300, // Treat exact 0 as ~1e-300 so we can still show q-values that are 0 and not filter them out
|
|
222
224
|
_ => continue,
|
|
223
225
|
};
|
|
224
226
|
let neg_log10_q = -q_val.log10();
|
package/src/test_ai.rs
CHANGED
|
@@ -20,6 +20,7 @@ mod tests {
|
|
|
20
20
|
ollama_comp_model_name: String,
|
|
21
21
|
ollama_embedding_model_name: String,
|
|
22
22
|
genomes: Vec<Genomes>,
|
|
23
|
+
aiRoute: String,
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
|
|
@@ -49,7 +50,7 @@ mod tests {
|
|
|
49
50
|
|
|
50
51
|
// Parse the JSON data
|
|
51
52
|
let serverconfig: ServerConfig = serde_json::from_str(&data).expect("JSON not in serverconfig.json format");
|
|
52
|
-
|
|
53
|
+
let airoute = String::from("../../") + &serverconfig.aiRoute;
|
|
53
54
|
for genome in &serverconfig.genomes {
|
|
54
55
|
for dataset in &genome.datasets {
|
|
55
56
|
match &dataset.aifiles {
|
|
@@ -100,14 +101,16 @@ mod tests {
|
|
|
100
101
|
&dataset_db,
|
|
101
102
|
&genedb,
|
|
102
103
|
&ai_json,
|
|
104
|
+
&airoute,
|
|
103
105
|
testing,
|
|
104
106
|
)
|
|
105
107
|
.await;
|
|
106
|
-
let
|
|
107
|
-
let
|
|
108
|
+
let llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
|
|
109
|
+
let sum: super::super::SummaryType = ques_ans.answer;
|
|
110
|
+
//println!("expected answer:{:?}", &sum);
|
|
108
111
|
assert_eq!(
|
|
109
112
|
llm_json_value.sort_summarytype_struct(),
|
|
110
|
-
|
|
113
|
+
sum.sort_summarytype_struct()
|
|
111
114
|
);
|
|
112
115
|
}
|
|
113
116
|
}
|
|
@@ -143,14 +146,27 @@ mod tests {
|
|
|
143
146
|
&dataset_db,
|
|
144
147
|
&genedb,
|
|
145
148
|
&ai_json,
|
|
149
|
+
&airoute,
|
|
146
150
|
testing,
|
|
147
151
|
)
|
|
148
152
|
.await;
|
|
149
|
-
|
|
150
|
-
|
|
153
|
+
//println!("user_input:{}", user_input);
|
|
154
|
+
//println!("llm_answer:{:?}", llm_output);
|
|
155
|
+
//println!("expected answer:{:?}", &ques_ans.answer);
|
|
156
|
+
let llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
|
|
157
|
+
//println!(
|
|
158
|
+
// "llm_answer:{:?}",
|
|
159
|
+
// llm_json_value.clone().sort_summarytype_struct()
|
|
160
|
+
//);
|
|
161
|
+
//println!(
|
|
162
|
+
// "expected answer:{:?}",
|
|
163
|
+
// &expected_json_value.clone().sort_summarytype_struct()
|
|
164
|
+
//);
|
|
165
|
+
let sum: super::super::SummaryType = ques_ans.answer;
|
|
166
|
+
//println!("expected answer:{:?}", &sum);
|
|
151
167
|
assert_eq!(
|
|
152
168
|
llm_json_value.sort_summarytype_struct(),
|
|
153
|
-
|
|
169
|
+
sum.sort_summarytype_struct()
|
|
154
170
|
);
|
|
155
171
|
} else {
|
|
156
172
|
panic!("The user input is empty");
|