npm - @sjcrh/proteinpaint-rust - Versions diffs - 2.166.0 → 2.167.0 - Mend

@sjcrh/proteinpaint-rust 2.166.0 → 2.167.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-	"version": "2.166.0",
+	"version": "2.167.0",
 	"name": "@sjcrh/proteinpaint-rust",
 	"type": "module",
 	"description": "Rust-based utilities for proteinpaint",

package/src/aichatbot.rs CHANGED Viewed

@@ -160,7 +160,7 @@ async fn main() -> Result<()> {
                     let temperature: f64 = 0.01;
                     let max_new_tokens: usize = 512;
                     let top_p: f32 = 0.95;
+                    let testing = false; // This variable is always false in production, this is true in test_ai.rs for testing code
                     if llm_backend_name != "ollama" && llm_backend_name != "SJ" {
                         panic!(
                             "This code currently supports only Ollama and SJ provider. llm_backend_name must be \"ollama\" or \"SJ\""
@@ -185,6 +185,7 @@ async fn main() -> Result<()> {
                             &dataset_db,
                             &genedb,
                             &ai_json,
+                            testing,
                         )
                         .await;
                     } else if llm_backend_name == "SJ".to_string() {
@@ -207,6 +208,7 @@ async fn main() -> Result<()> {
                             &dataset_db,
                             &genedb,
                             &ai_json,
+                            testing,
                         )
                         .await;
                     }
@@ -239,6 +241,7 @@ pub async fn run_pipeline(
     dataset_db: &str,
     genedb: &str,
     ai_json: &AiJsonFormat,
+    testing: bool,
 ) -> Option<String> {
     let mut classification: String = classify_query_by_dataset_type(
         user_input,
@@ -263,13 +266,20 @@ pub async fn run_pipeline(
             top_p,
         )
         .await;
-        final_output = format!(
-            "{{\"{}\":\"{}\",\"{}\":[{}}}",
-            "action",
-            "dge",
-            "DE_output",
-            de_result + &"]"
-        );
+        if testing == true {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":[{}}}",
+                "action",
+                "dge",
+                "DE_output",
+                de_result + &"]"
+            );
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "DE agent not implemented yet"
+            );
+        }
     } else if classification == "summary".to_string() {
         final_output = extract_summary_information(
             user_input,
@@ -282,30 +292,83 @@ pub async fn run_pipeline(
             dataset_db,
             genedb,
             ai_json,
+            testing,
         )
         .await;
     } else if classification == "hierarchical".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "hierarchical");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "hierarchical");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "hierarchical clustering agent not implemented yet"
+            );
+        }
     } else if classification == "snv_indel".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "snv_indel");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "snv_indel");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "snv_indel agent not implemented yet"
+            );
+        }
     } else if classification == "cnv".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "cnv");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "cnv");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "cnv agent not implemented yet"
+            );
+        }
     } else if classification == "variant_calling".to_string() {
         // Not implemented yet and will never be supported. Need a separate messages for this
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "variant_calling");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "variant_calling");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "variant_calling agent not implemented yet"
+            );
+        }
     } else if classification == "survival".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "surivial");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "surivial");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "survival agent not implemented yet"
+            );
+        }
     } else if classification == "none".to_string() {
-        final_output = format!(
-            "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
-            "action", "none", "message", "The input query did not match any known features in Proteinpaint"
-        );
+        if testing == true {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "action", "none", "message", "The input query did not match any known features in Proteinpaint"
+            );
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "The input query did not match any known features in Proteinpaint"
+            );
+        }
     } else {
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "unknown:".to_string() + &classification);
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "unknown:".to_string() + &classification);
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type",
+                "html",
+                "html",
+                "unknown:".to_string() + &classification
+            );
+        }
     }
     Some(final_output)
 }
@@ -801,6 +864,7 @@ async fn extract_summary_information(
     dataset_db: &str,
     genedb: &str,
     ai_json: &AiJsonFormat,
+    testing: bool,
 ) -> String {
     let (rag_docs, db_vec) = parse_dataset_db(dataset_db).await;
     let additional;
@@ -919,7 +983,8 @@ async fn extract_summary_information(
                 }
             }
             //println!("final_llm_json:{}", final_llm_json);
-            let final_validated_json = validate_summary_output(final_llm_json.clone(), db_vec, common_genes, ai_json);
+            let final_validated_json =
+                validate_summary_output(final_llm_json.clone(), db_vec, common_genes, ai_json, testing);
             final_validated_json
         }
         None => {
@@ -1063,6 +1128,7 @@ fn validate_summary_output(
     db_vec: Vec<DbRows>,
     common_genes: Vec<String>,
     ai_json: &AiJsonFormat,
+    testing: bool,
 ) -> String {
     let json_value: SummaryType =
        serde_json::from_str(&raw_llm_json).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
@@ -1094,7 +1160,7 @@ fn validate_summary_output(
                     match term_verification.correct_field {
                         Some(tm) => validated_summary_terms.push(SummaryTerms::clinical(tm)),
                         None => {
-                            message = message + &"\"" + &clin + &"\"" + &" not found in db.";
+                            message = message + &"'" + &clin + &"'" + &" not found in db.";
                         }
                     }
                 } else if Some(term_verification.correct_field.clone()).is_some()
@@ -1122,7 +1188,7 @@ fn validate_summary_output(
                         if num_gene_verification == 0 || common_genes.len() == 0 {
                             if message.to_lowercase().contains(&gene.to_lowercase()) { // Check if the LLM has already added the message, if not then add it
                             } else {
-                                message = message + &"\"" + &gene + &"\"" + &" not found in genedb.";
+                                message = message + &"'" + &gene + &"'" + &" not found in genedb.";
                             }
                         }
                     }
@@ -1138,6 +1204,8 @@ fn validate_summary_output(
         }
     }
+    let mut pp_plot_json: Value; // The PP compliant plot JSON
+    pp_plot_json = serde_json::from_str(&"{\"chartType\":\"summary\"}").expect("Not a valid JSON");
     match &json_value.filter {
         Some(filter_terms_array) => {
             let mut validated_filter_terms = Vec::<FilterTerm>::new();
@@ -1168,21 +1236,21 @@ fn validate_summary_output(
                             validated_filter_terms.push(categorical_filter_term);
                         }
                         if term_verification.correct_field.is_none() {
-                            message = message + &"\"" + &categorical.term + &"\" filter term not found in db";
+                            message = message + &"'" + &categorical.term + &"' filter term not found in db";
                         }
                         if value_verification.is_none() {
                             message = message
-                                + &"\""
+                                + &"'"
                                 + &categorical.value
-                                + &"\" filter value not found for filter field \""
+                                + &"' filter value not found for filter field '"
                                 + &categorical.term
-                                + "\" in db";
+                                + "' in db";
                         }
                     }
                     FilterTerm::Numeric(numeric) => {
                         let term_verification = verify_json_field(&numeric.term, &db_vec);
                         if term_verification.correct_field.is_none() {
-                            message = message + &"\"" + &numeric.term + &"\" filter term not found in db";
+                            message = message + &"'" + &numeric.term + &"' filter term not found in db";
                         } else {
                             let numeric_filter_term: FilterTerm = FilterTerm::Numeric(numeric.clone());
                             validated_filter_terms.push(numeric_filter_term);
@@ -1229,8 +1297,38 @@ fn validate_summary_output(
             }
             if validated_filter_terms.len() > 0 {
-                if let Some(obj) = new_json.as_object_mut() {
-                    obj.insert(String::from("filter"), serde_json::json!(validated_filter_terms));
+                if testing == true {
+                    if let Some(obj) = new_json.as_object_mut() {
+                        obj.insert(String::from("filter"), serde_json::json!(validated_filter_terms));
+                    }
+                } else {
+                    let mut validated_filter_terms_PP: String = "[".to_string();
+                    let mut filter_hits = 0;
+                    for validated_term in validated_filter_terms {
+                        match validated_term {
+                            FilterTerm::Categorical(categorical_filter) => {
+                                let string_json = "{\"term\":\"".to_string()
+                                    + &categorical_filter.term
+                                    + &"\", \"category\":\""
+                                    + &categorical_filter.value
+                                    + &"\"},";
+                                validated_filter_terms_PP += &string_json;
+                                filter_hits += 1; // Once numeric term is also implemented, this statement will go outside the match block
+                            }
+                            FilterTerm::Numeric(_numeric_term) => {} // To be implemented later
+                        };
+                    }
+                    println!("validated_filter_terms_PP:{}", validated_filter_terms_PP);
+                    if filter_hits > 0 {
+                        validated_filter_terms_PP.pop();
+                        validated_filter_terms_PP += &"]";
+                        if let Some(obj) = pp_plot_json.as_object_mut() {
+                            obj.insert(
+                                String::from("simpleFilter"),
+                                serde_json::from_str(&validated_filter_terms_PP).expect("Not a valid JSON"),
+                            );
+                        }
+                    }
                 }
             }
         }
@@ -1240,6 +1338,10 @@ fn validate_summary_output(
     // Removing terms that are found both in filter term as well summary
     let mut validated_summary_terms_final = Vec::<SummaryTerms>::new();
+    let mut sum_iter = 0;
+    let mut pp_json: Value; // New JSON value that will contain items of the final PP compliant JSON
+    pp_json = serde_json::from_str(&"{\"type\":\"plot\"}").expect("Not a valid JSON");
     for summary_term in &validated_summary_terms {
         let mut hit = 0;
         match summary_term {
@@ -1276,9 +1378,53 @@ fn validate_summary_output(
                 }
             }
         }
         if hit == 0 {
+            let mut termidpp: Option<TermIDPP> = None;
+            let mut geneexp: Option<GeneExpressionPP> = None;
+            match summary_term {
+                SummaryTerms::clinical(clinical_term) => {
+                    termidpp = Some(TermIDPP {
+                        id: clinical_term.to_string(),
+                    });
+                }
+                SummaryTerms::geneExpression(gene) => {
+                    geneexp = Some(GeneExpressionPP {
+                        gene: gene.to_string(),
+                        r#type: "geneExpression".to_string(),
+                    });
+                }
+            }
+            if sum_iter == 0 {
+                if termidpp.is_some() {
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term"), serde_json::json!(Some(termidpp)));
+                    }
+                }
+                if geneexp.is_some() {
+                    let gene_term = GeneTerm { term: geneexp.unwrap() };
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term"), serde_json::json!(gene_term));
+                    }
+                }
+            } else if sum_iter == 1 {
+                if termidpp.is_some() {
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term2"), serde_json::json!(Some(termidpp)));
+                    }
+                }
+                if geneexp.is_some() {
+                    let gene_term = GeneTerm { term: geneexp.unwrap() };
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term2"), serde_json::json!(gene_term));
+                    }
+                }
+            }
             validated_summary_terms_final.push(summary_term.clone())
         }
+        sum_iter += 1
     }
     if let Some(obj) = new_json.as_object_mut() {
@@ -1288,14 +1434,61 @@ fn validate_summary_output(
         );
     }
+    if let Some(obj) = pp_json.as_object_mut() {
+        // The `if let` ensures we only proceed if the top-level JSON is an object.
+        // Append a new string field.
+        obj.insert(String::from("plot"), serde_json::json!(pp_plot_json));
+    }
+    let mut err_json: Value; // Error JSON containing the error message (if present)
     if message.len() > 0 {
-        if let Some(obj) = new_json.as_object_mut() {
-            // The `if let` ensures we only proceed if the top-level JSON is an object.
-            // Append a new string field.
-            obj.insert(String::from("message"), serde_json::json!(message));
+        if testing == false {
+            err_json = serde_json::from_str(&"{\"type\":\"html\"}").expect("Not a valid JSON");
+            if let Some(obj) = err_json.as_object_mut() {
+                // The `if let` ensures we only proceed if the top-level JSON is an object.
+                // Append a new string field.
+                obj.insert(String::from("html"), serde_json::json!(message));
+            };
+            serde_json::to_string(&err_json).unwrap()
+        } else {
+            if let Some(obj) = new_json.as_object_mut() {
+                // The `if let` ensures we only proceed if the top-level JSON is an object.
+                // Append a new string field.
+                obj.insert(String::from("message"), serde_json::json!(message));
+            };
+            serde_json::to_string(&new_json).unwrap()
+        }
+    } else {
+        if testing == true {
+            // When testing script output native LLM JSON
+            serde_json::to_string(&new_json).unwrap()
+        } else {
+            // When in production output PP compliant JSON
+            serde_json::to_string(&pp_json).unwrap()
         }
     }
-    serde_json::to_string(&new_json).unwrap()
+}
+fn getGeneExpression() -> String {
+    "geneExpression".to_string()
+}
+#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+struct TermIDPP {
+    id: String,
+}
+#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+struct GeneTerm {
+    term: GeneExpressionPP,
+}
+#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+struct GeneExpressionPP {
+    gene: String,
+    // Serde uses this for deserialization.
+    #[serde(default = "getGeneExpression")]
+    r#type: String,
 }
 #[derive(Debug, Clone)]

package/src/manhattan_plot.rs CHANGED Viewed

@@ -57,6 +57,7 @@ struct InteractiveData {
     x_buffer: i64,
     y_min: f64,
     y_max: f64,
+    device_pixel_ratio: f64,
 }
 #[derive(Serialize)]
@@ -335,12 +336,8 @@ fn plot_grin2_manhattan(
     let png_width = plot_width + 2 * png_dot_radius;
     let png_height = plot_height + 2 * png_dot_radius;
-    let w: u32 = (png_width * device_pixel_ratio as u64)
-        .try_into()
-        .expect("PNG width too large for u32");
-    let h: u32 = (png_height * device_pixel_ratio as u64)
-        .try_into()
-        .expect("PNG height too large for u32");
+    let w: u32 = ((png_width as f64) * dpr) as u32;
+    let h: u32 = ((png_height as f64) * dpr) as u32;
     // Create RGB buffer for Plotters
     let mut buffer = vec![0u8; w as usize * h as usize * 3];
@@ -402,8 +399,8 @@ fn plot_grin2_manhattan(
         for (i, p) in point_details.iter_mut().enumerate() {
             let (px, py) = pixel_positions[*&sig_indices[i]];
-            p.pixel_x = px;
-            p.pixel_y = py;
+            p.pixel_x = px / dpr;
+            p.pixel_y = py / dpr;
         }
         // flush root drawing area
@@ -469,6 +466,7 @@ fn plot_grin2_manhattan(
         x_buffer,
         y_min,
         y_max,
+        device_pixel_ratio: dpr,
     };
     Ok((png_data, interactive_data))
 }

package/src/test_ai.rs CHANGED Viewed

@@ -42,6 +42,7 @@ mod tests {
         let top_p: f32 = 0.95;
         let serverconfig_file_path = Path::new("../../serverconfig.json");
         let absolute_path = serverconfig_file_path.canonicalize().unwrap();
+        let testing = true; // This causes the JSON being output from run_pipeline() to be in LLM JSON format
         // Read the file
         let data = fs::read_to_string(absolute_path).unwrap();
@@ -83,7 +84,6 @@ mod tests {
                                 .expect("Ollama server not found");
                             let embedding_model = ollama_client.embedding_model(ollama_embedding_model_name);
                             let comp_model = ollama_client.completion_model(ollama_comp_model_name);
                             for chart in ai_json.charts.clone() {
                                 match chart {
                                     super::super::Charts::Summary(testdata) => {
@@ -100,6 +100,7 @@ mod tests {
                                                 &dataset_db,
                                                 &genedb,
                                                 &ai_json,
+                                                testing,
                                             )
                                             .await;
                                             let mut llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
@@ -142,6 +143,7 @@ mod tests {
                                                     &dataset_db,
                                                     &genedb,
                                                     &ai_json,
+                                                    testing,
                                                 )
                                                 .await;
                                                 let mut llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");