npm - @sjcrh/proteinpaint-rust - Versions diffs - 2.157.0 → 2.167.0 - Mend

@sjcrh/proteinpaint-rust 2.157.0 → 2.167.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -5,19 +5,17 @@ This directory holds the source code for rust-compiled utilities.
 ## Rust version
-Current rust version is 1.89.0. TODO introduce `rust-toolchain` file, and pin the rust version there.
+The current rust version is defined in `rust/rust-toolchain.toml`. When updating the rust version inside the rust docker image the `container/rust/build.sh` script parses the rust version from `rust-toolchain.toml` into `container/rust/Dockerfile` at runtime. This ensures consistency between local PP builds and the docker container in CI and production.
-Currently the version is hardcoded in:
+The Github Actions workflow file `.github/workflows/CD-rust-build.yml` and `.github/workflows/CI-unit.yml` also parses the rust version from the `rust-toolchain.toml` to ensure the correct rust version is used for compiling the current rust code.
-the Github Actions workflow file `.github/workflows/CD-rust-build.yml`.
+When bumping the rust version and publish the new rust build env image using:
-The Github Actions workflow file `.github/workflows/CI-unit.yml`.
-The rust build docker file `container/rust/Dockerfile`.
+https://github.com/stjude/proteinpaint/actions/workflows/CD-publish-rust-bookworm-env-image.yml
-When bumping the rust version, please update these files accordingly, and publish the new rust build env image using:
+For publishing updated rust binaries, use this workflow.
-https://github.com/stjude/proteinpaint/actions/workflows/CD-publish-rust-bookworm-env-image.yml
+https://github.com/stjude/proteinpaint/actions/workflows/CD-publish-rust-binaries.yml
 ## Code layout

package/package.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-	"version": "2.157.0",
+	"version": "2.167.0",
 	"name": "@sjcrh/proteinpaint-rust",
 	"type": "module",
 	"description": "Rust-based utilities for proteinpaint",

package/src/aichatbot.rs CHANGED Viewed

@@ -160,7 +160,7 @@ async fn main() -> Result<()> {
                     let temperature: f64 = 0.01;
                     let max_new_tokens: usize = 512;
                     let top_p: f32 = 0.95;
+                    let testing = false; // This variable is always false in production, this is true in test_ai.rs for testing code
                     if llm_backend_name != "ollama" && llm_backend_name != "SJ" {
                         panic!(
                             "This code currently supports only Ollama and SJ provider. llm_backend_name must be \"ollama\" or \"SJ\""
@@ -185,6 +185,7 @@ async fn main() -> Result<()> {
                             &dataset_db,
                             &genedb,
                             &ai_json,
+                            testing,
                         )
                         .await;
                     } else if llm_backend_name == "SJ".to_string() {
@@ -207,6 +208,7 @@ async fn main() -> Result<()> {
                             &dataset_db,
                             &genedb,
                             &ai_json,
+                            testing,
                         )
                         .await;
                     }
@@ -239,6 +241,7 @@ pub async fn run_pipeline(
     dataset_db: &str,
     genedb: &str,
     ai_json: &AiJsonFormat,
+    testing: bool,
 ) -> Option<String> {
     let mut classification: String = classify_query_by_dataset_type(
         user_input,
@@ -263,13 +266,20 @@ pub async fn run_pipeline(
             top_p,
         )
         .await;
-        final_output = format!(
-            "{{\"{}\":\"{}\",\"{}\":[{}}}",
-            "action",
-            "dge",
-            "DE_output",
-            de_result + &"]"
-        );
+        if testing == true {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":[{}}}",
+                "action",
+                "dge",
+                "DE_output",
+                de_result + &"]"
+            );
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "DE agent not implemented yet"
+            );
+        }
     } else if classification == "summary".to_string() {
         final_output = extract_summary_information(
             user_input,
@@ -282,30 +292,83 @@ pub async fn run_pipeline(
             dataset_db,
             genedb,
             ai_json,
+            testing,
         )
         .await;
     } else if classification == "hierarchical".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "hierarchical");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "hierarchical");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "hierarchical clustering agent not implemented yet"
+            );
+        }
     } else if classification == "snv_indel".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "snv_indel");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "snv_indel");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "snv_indel agent not implemented yet"
+            );
+        }
     } else if classification == "cnv".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "cnv");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "cnv");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "cnv agent not implemented yet"
+            );
+        }
     } else if classification == "variant_calling".to_string() {
         // Not implemented yet and will never be supported. Need a separate messages for this
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "variant_calling");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "variant_calling");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "variant_calling agent not implemented yet"
+            );
+        }
     } else if classification == "survival".to_string() {
         // Not implemented yet
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "surivial");
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "surivial");
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "survival agent not implemented yet"
+            );
+        }
     } else if classification == "none".to_string() {
-        final_output = format!(
-            "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
-            "action", "none", "message", "The input query did not match any known features in Proteinpaint"
-        );
+        if testing == true {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "action", "none", "message", "The input query did not match any known features in Proteinpaint"
+            );
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type", "html", "html", "The input query did not match any known features in Proteinpaint"
+            );
+        }
     } else {
-        final_output = format!("{{\"{}\":\"{}\"}}", "action", "unknown:".to_string() + &classification);
+        if testing == true {
+            final_output = format!("{{\"{}\":\"{}\"}}", "action", "unknown:".to_string() + &classification);
+        } else {
+            final_output = format!(
+                "{{\"{}\":\"{}\",\"{}\":\"{}\"}}",
+                "type",
+                "html",
+                "html",
+                "unknown:".to_string() + &classification
+            );
+        }
     }
     Some(final_output)
 }
@@ -801,6 +864,7 @@ async fn extract_summary_information(
     dataset_db: &str,
     genedb: &str,
     ai_json: &AiJsonFormat,
+    testing: bool,
 ) -> String {
     let (rag_docs, db_vec) = parse_dataset_db(dataset_db).await;
     let additional;
@@ -919,7 +983,8 @@ async fn extract_summary_information(
                 }
             }
             //println!("final_llm_json:{}", final_llm_json);
-            let final_validated_json = validate_summary_output(final_llm_json.clone(), db_vec, common_genes, ai_json);
+            let final_validated_json =
+                validate_summary_output(final_llm_json.clone(), db_vec, common_genes, ai_json, testing);
             final_validated_json
         }
         None => {
@@ -1063,6 +1128,7 @@ fn validate_summary_output(
     db_vec: Vec<DbRows>,
     common_genes: Vec<String>,
     ai_json: &AiJsonFormat,
+    testing: bool,
 ) -> String {
     let json_value: SummaryType =
        serde_json::from_str(&raw_llm_json).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
@@ -1094,7 +1160,7 @@ fn validate_summary_output(
                     match term_verification.correct_field {
                         Some(tm) => validated_summary_terms.push(SummaryTerms::clinical(tm)),
                         None => {
-                            message = message + &"\"" + &clin + &"\"" + &" not found in db.";
+                            message = message + &"'" + &clin + &"'" + &" not found in db.";
                         }
                     }
                 } else if Some(term_verification.correct_field.clone()).is_some()
@@ -1122,7 +1188,7 @@ fn validate_summary_output(
                         if num_gene_verification == 0 || common_genes.len() == 0 {
                             if message.to_lowercase().contains(&gene.to_lowercase()) { // Check if the LLM has already added the message, if not then add it
                             } else {
-                                message = message + &"\"" + &gene + &"\"" + &" not found in genedb.";
+                                message = message + &"'" + &gene + &"'" + &" not found in genedb.";
                             }
                         }
                     }
@@ -1138,6 +1204,8 @@ fn validate_summary_output(
         }
     }
+    let mut pp_plot_json: Value; // The PP compliant plot JSON
+    pp_plot_json = serde_json::from_str(&"{\"chartType\":\"summary\"}").expect("Not a valid JSON");
     match &json_value.filter {
         Some(filter_terms_array) => {
             let mut validated_filter_terms = Vec::<FilterTerm>::new();
@@ -1168,21 +1236,21 @@ fn validate_summary_output(
                             validated_filter_terms.push(categorical_filter_term);
                         }
                         if term_verification.correct_field.is_none() {
-                            message = message + &"\"" + &categorical.term + &"\" filter term not found in db";
+                            message = message + &"'" + &categorical.term + &"' filter term not found in db";
                         }
                         if value_verification.is_none() {
                             message = message
-                                + &"\""
+                                + &"'"
                                 + &categorical.value
-                                + &"\" filter value not found for filter field \""
+                                + &"' filter value not found for filter field '"
                                 + &categorical.term
-                                + "\" in db";
+                                + "' in db";
                         }
                     }
                     FilterTerm::Numeric(numeric) => {
                         let term_verification = verify_json_field(&numeric.term, &db_vec);
                         if term_verification.correct_field.is_none() {
-                            message = message + &"\"" + &numeric.term + &"\" filter term not found in db";
+                            message = message + &"'" + &numeric.term + &"' filter term not found in db";
                         } else {
                             let numeric_filter_term: FilterTerm = FilterTerm::Numeric(numeric.clone());
                             validated_filter_terms.push(numeric_filter_term);
@@ -1229,8 +1297,38 @@ fn validate_summary_output(
             }
             if validated_filter_terms.len() > 0 {
-                if let Some(obj) = new_json.as_object_mut() {
-                    obj.insert(String::from("filter"), serde_json::json!(validated_filter_terms));
+                if testing == true {
+                    if let Some(obj) = new_json.as_object_mut() {
+                        obj.insert(String::from("filter"), serde_json::json!(validated_filter_terms));
+                    }
+                } else {
+                    let mut validated_filter_terms_PP: String = "[".to_string();
+                    let mut filter_hits = 0;
+                    for validated_term in validated_filter_terms {
+                        match validated_term {
+                            FilterTerm::Categorical(categorical_filter) => {
+                                let string_json = "{\"term\":\"".to_string()
+                                    + &categorical_filter.term
+                                    + &"\", \"category\":\""
+                                    + &categorical_filter.value
+                                    + &"\"},";
+                                validated_filter_terms_PP += &string_json;
+                                filter_hits += 1; // Once numeric term is also implemented, this statement will go outside the match block
+                            }
+                            FilterTerm::Numeric(_numeric_term) => {} // To be implemented later
+                        };
+                    }
+                    println!("validated_filter_terms_PP:{}", validated_filter_terms_PP);
+                    if filter_hits > 0 {
+                        validated_filter_terms_PP.pop();
+                        validated_filter_terms_PP += &"]";
+                        if let Some(obj) = pp_plot_json.as_object_mut() {
+                            obj.insert(
+                                String::from("simpleFilter"),
+                                serde_json::from_str(&validated_filter_terms_PP).expect("Not a valid JSON"),
+                            );
+                        }
+                    }
                 }
             }
         }
@@ -1240,6 +1338,10 @@ fn validate_summary_output(
     // Removing terms that are found both in filter term as well summary
     let mut validated_summary_terms_final = Vec::<SummaryTerms>::new();
+    let mut sum_iter = 0;
+    let mut pp_json: Value; // New JSON value that will contain items of the final PP compliant JSON
+    pp_json = serde_json::from_str(&"{\"type\":\"plot\"}").expect("Not a valid JSON");
     for summary_term in &validated_summary_terms {
         let mut hit = 0;
         match summary_term {
@@ -1276,9 +1378,53 @@ fn validate_summary_output(
                 }
             }
         }
         if hit == 0 {
+            let mut termidpp: Option<TermIDPP> = None;
+            let mut geneexp: Option<GeneExpressionPP> = None;
+            match summary_term {
+                SummaryTerms::clinical(clinical_term) => {
+                    termidpp = Some(TermIDPP {
+                        id: clinical_term.to_string(),
+                    });
+                }
+                SummaryTerms::geneExpression(gene) => {
+                    geneexp = Some(GeneExpressionPP {
+                        gene: gene.to_string(),
+                        r#type: "geneExpression".to_string(),
+                    });
+                }
+            }
+            if sum_iter == 0 {
+                if termidpp.is_some() {
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term"), serde_json::json!(Some(termidpp)));
+                    }
+                }
+                if geneexp.is_some() {
+                    let gene_term = GeneTerm { term: geneexp.unwrap() };
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term"), serde_json::json!(gene_term));
+                    }
+                }
+            } else if sum_iter == 1 {
+                if termidpp.is_some() {
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term2"), serde_json::json!(Some(termidpp)));
+                    }
+                }
+                if geneexp.is_some() {
+                    let gene_term = GeneTerm { term: geneexp.unwrap() };
+                    if let Some(obj) = pp_plot_json.as_object_mut() {
+                        obj.insert(String::from("term2"), serde_json::json!(gene_term));
+                    }
+                }
+            }
             validated_summary_terms_final.push(summary_term.clone())
         }
+        sum_iter += 1
     }
     if let Some(obj) = new_json.as_object_mut() {
@@ -1288,14 +1434,61 @@ fn validate_summary_output(
         );
     }
+    if let Some(obj) = pp_json.as_object_mut() {
+        // The `if let` ensures we only proceed if the top-level JSON is an object.
+        // Append a new string field.
+        obj.insert(String::from("plot"), serde_json::json!(pp_plot_json));
+    }
+    let mut err_json: Value; // Error JSON containing the error message (if present)
     if message.len() > 0 {
-        if let Some(obj) = new_json.as_object_mut() {
-            // The `if let` ensures we only proceed if the top-level JSON is an object.
-            // Append a new string field.
-            obj.insert(String::from("message"), serde_json::json!(message));
+        if testing == false {
+            err_json = serde_json::from_str(&"{\"type\":\"html\"}").expect("Not a valid JSON");
+            if let Some(obj) = err_json.as_object_mut() {
+                // The `if let` ensures we only proceed if the top-level JSON is an object.
+                // Append a new string field.
+                obj.insert(String::from("html"), serde_json::json!(message));
+            };
+            serde_json::to_string(&err_json).unwrap()
+        } else {
+            if let Some(obj) = new_json.as_object_mut() {
+                // The `if let` ensures we only proceed if the top-level JSON is an object.
+                // Append a new string field.
+                obj.insert(String::from("message"), serde_json::json!(message));
+            };
+            serde_json::to_string(&new_json).unwrap()
+        }
+    } else {
+        if testing == true {
+            // When testing script output native LLM JSON
+            serde_json::to_string(&new_json).unwrap()
+        } else {
+            // When in production output PP compliant JSON
+            serde_json::to_string(&pp_json).unwrap()
         }
     }
-    serde_json::to_string(&new_json).unwrap()
+}
+fn getGeneExpression() -> String {
+    "geneExpression".to_string()
+}
+#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+struct TermIDPP {
+    id: String,
+}
+#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+struct GeneTerm {
+    term: GeneExpressionPP,
+}
+#[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+struct GeneExpressionPP {
+    gene: String,
+    // Serde uses this for deserialization.
+    #[serde(default = "getGeneExpression")]
+    r#type: String,
 }
 #[derive(Debug, Clone)]

package/src/bigwig.rs CHANGED Viewed

@@ -158,19 +158,11 @@ fn main() {
 }
 fn determine_max(n1: f64, n2: f64) -> f64 {
-    if n1 >= n2 {
-        n1
-    } else {
-        n2
-    }
+    if n1 >= n2 { n1 } else { n2 }
 }
 fn determine_min(n1: f64, n2: f64) -> f64 {
-    if n1 < n2 {
-        n1
-    } else {
-        n2
-    }
+    if n1 < n2 { n1 } else { n2 }
 }
 #[allow(dead_code)]
@@ -179,12 +171,8 @@ fn calculate_appropriate_zoom_level(zoom_headers: Vec<ZoomHeader>, difference: f
     let mut closest_level = Option::<u32>::None; // Zoom level will be none at base-pair resolution
     let mut unity_added = false;
     let max_entries_parsed_limit = 100000; // Maximum number of entries that should be parsed from bigwig file. A very high number will lead to better accuracy as this will lead to selection of a lower zoom level. In contrast, a lower value will decrease run time at the cost of accuracy.
-                                           // Parsing out various zoom levels from bigwig file
-    for reduction_level in zoom_headers
-        .into_iter()
-        .map(|entry| (entry.reduction_level))
-        .rev()
-    {
+    // Parsing out various zoom levels from bigwig file
+    for reduction_level in zoom_headers.into_iter().map(|entry| entry.reduction_level).rev() {
         reduction_levels.push(reduction_level as u32);
     }
     if reduction_levels.contains(&1) == false {
@@ -206,21 +194,14 @@ fn calculate_appropriate_zoom_level(zoom_headers: Vec<ZoomHeader>, difference: f
     closest_level
 }
-fn calculate_appropriate_zoom_level_ucsc(
-    zoom_headers: Vec<ZoomHeader>,
-    exact_offset: f64,
-) -> Option<u32> {
+fn calculate_appropriate_zoom_level_ucsc(zoom_headers: Vec<ZoomHeader>, exact_offset: f64) -> Option<u32> {
     let mut reduction_levels = Vec::<u32>::new();
     let mut closest_level = Option::<u32>::None; // Zoom level will be none at base-pair resolution
     let desired_reduction: u32 = ((exact_offset as f64) / 2.0).floor() as u32;
     let mut unity_added = false;
     if desired_reduction > 1 {
         // Parsing out various zoom levels from bigwig file
-        for reduction_level in zoom_headers
-            .into_iter()
-            .map(|entry| (entry.reduction_level))
-            .rev()
-        {
+        for reduction_level in zoom_headers.into_iter().map(|entry| entry.reduction_level).rev() {
             reduction_levels.push(reduction_level as u32);
         }
         if reduction_levels.contains(&1) == false {
@@ -304,11 +285,9 @@ fn calculate_datapoints<
                                 continue;
                             } else {
                                 if (v.start as f64 <= start_region && end_region < v.end as f64)
-                                    || (v.start as f64 >= start_region
-                                        && (v.start as f64) < end_region)
+                                    || (v.start as f64 >= start_region && (v.start as f64) < end_region)
                                     || (v.end as f64 >= start_region && (v.end as f64) < end_region)
-                                    || (start_region >= v.start as f64
-                                        && (v.end as f64) < end_region)
+                                    || (start_region >= v.start as f64 && (v.end as f64) < end_region)
                                 {
                                     // Calculate sum and number for this region
                                     //println!("i:{}", i);
@@ -316,16 +295,10 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    let start_entry_within_region =
-                                        determine_max(v.start as f64, start_region);
-                                    let stop_entry_within_region =
-                                        determine_min(v.end as f64, end_region);
-                                    datapoints_num[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64;
-                                    datapoints_sum[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64
+                                    let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                    let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                    datapoints_num[i] += (stop_entry_within_region - start_entry_within_region) as f64;
+                                    datapoints_sum[i] += (stop_entry_within_region - start_entry_within_region) as f64
                                         * ((v.summary.sum as f64) / v.summary.bases_covered as f64);
                                     //println!(
                                     //    "start_entry_within_region:{}",
@@ -346,30 +319,21 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    if ((v.start as f64 <= start_region
-                                        && end_region < v.end as f64)
-                                        || (v.start as f64 >= start_region
-                                            && (v.start as f64) < end_region)
-                                        || (v.end as f64 >= start_region
-                                            && (v.end as f64) < end_region)
-                                        || (start_region >= v.start as f64
-                                            && (v.end as f64) < end_region))
+                                    if ((v.start as f64 <= start_region && end_region < v.end as f64)
+                                        || (v.start as f64 >= start_region && (v.start as f64) < end_region)
+                                        || (v.end as f64 >= start_region && (v.end as f64) < end_region)
+                                        || (start_region >= v.start as f64 && (v.end as f64) < end_region))
                                         && iter > 1
                                     {
                                         // Calculate sum and number for this region
                                         //println!("Hello");
-                                        let start_entry_within_region =
-                                            determine_max(v.start as f64, start_region);
-                                        let stop_entry_within_region =
-                                            determine_min(v.end as f64, end_region);
-                                        datapoints_num[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
-                                            as f64;
-                                        datapoints_sum[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
+                                        let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                        let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                        datapoints_num[i] +=
+                                            (stop_entry_within_region - start_entry_within_region) as f64;
+                                        datapoints_sum[i] += (stop_entry_within_region - start_entry_within_region)
                                             as f64
-                                            * ((v.summary.sum as f64)
-                                                / v.summary.bases_covered as f64);
+                                            * ((v.summary.sum as f64) / v.summary.bases_covered as f64);
                                         //println!(
                                         //    "start_entry_within_region inside:{}",
                                         //    start_entry_within_region
@@ -401,9 +365,7 @@ fn calculate_datapoints<
             }
             None => {
                 // To be used in nucleotide resolution
-                let bigwig_output = reader
-                    .get_interval(&chrom, start_pos as u32, stop_pos as u32)
-                    .unwrap();
+                let bigwig_output = reader.get_interval(&chrom, start_pos as u32, stop_pos as u32).unwrap();
                 let mut i = 0;
                 let mut start_region = datapoints_list[i];
                 let mut end_region = datapoints_list[i + 1];
@@ -415,11 +377,9 @@ fn calculate_datapoints<
                                 continue;
                             } else {
                                 if (v.start as f64 <= start_region && end_region < v.end as f64)
-                                    || (v.start as f64 >= start_region
-                                        && (v.start as f64) < end_region)
+                                    || (v.start as f64 >= start_region && (v.start as f64) < end_region)
                                     || (v.end as f64 >= start_region && (v.end as f64) < end_region)
-                                    || (start_region >= v.start as f64
-                                        && (v.end as f64) < end_region)
+                                    || (start_region >= v.start as f64 && (v.end as f64) < end_region)
                                 {
                                     // Calculate sum and number for this region
                                     //println!("i:{}", i);
@@ -427,17 +387,11 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    let start_entry_within_region =
-                                        determine_max(v.start as f64, start_region);
-                                    let stop_entry_within_region =
-                                        determine_min(v.end as f64, end_region);
-                                    datapoints_num[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64;
-                                    datapoints_sum[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64
-                                        * v.value as f64;
+                                    let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                    let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                    datapoints_num[i] += (stop_entry_within_region - start_entry_within_region) as f64;
+                                    datapoints_sum[i] +=
+                                        (stop_entry_within_region - start_entry_within_region) as f64 * v.value as f64;
                                     //println!(
                                     //    "start_entry_within_region:{}",
                                     //    start_entry_within_region
@@ -457,27 +411,19 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    if ((v.start as f64 <= start_region
-                                        && end_region < v.end as f64)
-                                        || (v.start as f64 >= start_region
-                                            && (v.start as f64) < end_region)
-                                        || (v.end as f64 >= start_region
-                                            && (v.end as f64) < end_region)
-                                        || (start_region >= v.start as f64
-                                            && (v.end as f64) < end_region))
+                                    if ((v.start as f64 <= start_region && end_region < v.end as f64)
+                                        || (v.start as f64 >= start_region && (v.start as f64) < end_region)
+                                        || (v.end as f64 >= start_region && (v.end as f64) < end_region)
+                                        || (start_region >= v.start as f64 && (v.end as f64) < end_region))
                                         && iter > 1
                                     {
                                         // Calculate sum and number for this region
                                         //println!("Hello");
-                                        let start_entry_within_region =
-                                            determine_max(v.start as f64, start_region);
-                                        let stop_entry_within_region =
-                                            determine_min(v.end as f64, end_region);
-                                        datapoints_num[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
-                                            as f64;
-                                        datapoints_sum[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
+                                        let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                        let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                        datapoints_num[i] +=
+                                            (stop_entry_within_region - start_entry_within_region) as f64;
+                                        datapoints_sum[i] += (stop_entry_within_region - start_entry_within_region)
                                             as f64
                                             * v.value as f64;
                                         //println!(
@@ -536,11 +482,9 @@ fn calculate_datapoints<
                                 continue;
                             } else {
                                 if (v.start as f64 <= start_region && end_region < v.end as f64)
-                                    || (v.start as f64 >= start_region
-                                        && (v.start as f64) < end_region)
+                                    || (v.start as f64 >= start_region && (v.start as f64) < end_region)
                                     || (v.end as f64 >= start_region && (v.end as f64) < end_region)
-                                    || (start_region >= v.start as f64
-                                        && (v.end as f64) < end_region)
+                                    || (start_region >= v.start as f64 && (v.end as f64) < end_region)
                                 {
                                     // Calculate sum and number for this region
                                     //println!("i:{}", i);
@@ -548,16 +492,10 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    let start_entry_within_region =
-                                        determine_max(v.start as f64, start_region);
-                                    let stop_entry_within_region =
-                                        determine_min(v.end as f64, end_region);
-                                    datapoints_num[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64;
-                                    datapoints_sum[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64
+                                    let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                    let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                    datapoints_num[i] += (stop_entry_within_region - start_entry_within_region) as f64;
+                                    datapoints_sum[i] += (stop_entry_within_region - start_entry_within_region) as f64
                                         * ((v.summary.sum as f64) / v.summary.bases_covered as f64);
                                     //println!(
                                     //    "start_entry_within_region:{}",
@@ -578,29 +516,20 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    if ((v.start as f64 <= start_region
-                                        && end_region < v.end as f64)
-                                        || (v.start as f64 >= start_region
-                                            && (v.start as f64) < end_region)
-                                        || (v.end as f64 >= start_region
-                                            && (v.end as f64) < end_region)
-                                        || (start_region >= v.start as f64
-                                            && (v.end as f64) < end_region))
+                                    if ((v.start as f64 <= start_region && end_region < v.end as f64)
+                                        || (v.start as f64 >= start_region && (v.start as f64) < end_region)
+                                        || (v.end as f64 >= start_region && (v.end as f64) < end_region)
+                                        || (start_region >= v.start as f64 && (v.end as f64) < end_region))
                                         && iter > 1
                                     {
                                         // Calculate sum and number for this region
-                                        let start_entry_within_region =
-                                            determine_max(v.start as f64, start_region);
-                                        let stop_entry_within_region =
-                                            determine_min(v.end as f64, end_region);
-                                        datapoints_num[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
-                                            as f64;
-                                        datapoints_sum[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
+                                        let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                        let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                        datapoints_num[i] +=
+                                            (stop_entry_within_region - start_entry_within_region) as f64;
+                                        datapoints_sum[i] += (stop_entry_within_region - start_entry_within_region)
                                             as f64
-                                            * ((v.summary.sum as f64)
-                                                / v.summary.bases_covered as f64);
+                                            * ((v.summary.sum as f64) / v.summary.bases_covered as f64);
                                         //println!(
                                         //    "start_entry_within_region inside:{}",
                                         //    start_entry_within_region
@@ -634,9 +563,7 @@ fn calculate_datapoints<
             }
             None => {
                 // To be used in nucleotide resolution
-                let bigwig_output = reader
-                    .get_interval(&chrom, start_pos as u32, stop_pos as u32)
-                    .unwrap();
+                let bigwig_output = reader.get_interval(&chrom, start_pos as u32, stop_pos as u32).unwrap();
                 let mut i = 0;
                 let mut start_region = datapoints_list[i];
                 let mut end_region = datapoints_list[i + 1];
@@ -648,11 +575,9 @@ fn calculate_datapoints<
                                 continue;
                             } else {
                                 if (v.start as f64 <= start_region && end_region < v.end as f64)
-                                    || (v.start as f64 >= start_region
-                                        && (v.start as f64) < end_region)
+                                    || (v.start as f64 >= start_region && (v.start as f64) < end_region)
                                     || (v.end as f64 >= start_region && (v.end as f64) < end_region)
-                                    || (start_region >= v.start as f64
-                                        && (v.end as f64) < end_region)
+                                    || (start_region >= v.start as f64 && (v.end as f64) < end_region)
                                 {
                                     // Calculate sum and number for this region
                                     //println!("i:{}", i);
@@ -660,17 +585,11 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    let start_entry_within_region =
-                                        determine_max(v.start as f64, start_region);
-                                    let stop_entry_within_region =
-                                        determine_min(v.end as f64, end_region);
-                                    datapoints_num[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64;
-                                    datapoints_sum[i] += (stop_entry_within_region
-                                        - start_entry_within_region)
-                                        as f64
-                                        * v.value as f64;
+                                    let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                    let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                    datapoints_num[i] += (stop_entry_within_region - start_entry_within_region) as f64;
+                                    datapoints_sum[i] +=
+                                        (stop_entry_within_region - start_entry_within_region) as f64 * v.value as f64;
                                     //println!(
                                     //    "start_entry_within_region:{}",
                                     //    start_entry_within_region
@@ -690,27 +609,19 @@ fn calculate_datapoints<
                                     //println!("v.end:{}", v.end);
                                     //println!("start_region:{}", start_region);
                                     //println!("end_region:{}", end_region);
-                                    if ((v.start as f64 <= start_region
-                                        && end_region < v.end as f64)
-                                        || (v.start as f64 >= start_region
-                                            && (v.start as f64) < end_region)
-                                        || (v.end as f64 >= start_region
-                                            && (v.end as f64) < end_region)
-                                        || (start_region >= v.start as f64
-                                            && (v.end as f64) < end_region))
+                                    if ((v.start as f64 <= start_region && end_region < v.end as f64)
+                                        || (v.start as f64 >= start_region && (v.start as f64) < end_region)
+                                        || (v.end as f64 >= start_region && (v.end as f64) < end_region)
+                                        || (start_region >= v.start as f64 && (v.end as f64) < end_region))
                                         && iter > 1
                                     {
                                         // Calculate sum and number for this region
                                         //println!("Hello");
-                                        let start_entry_within_region =
-                                            determine_max(v.start as f64, start_region);
-                                        let stop_entry_within_region =
-                                            determine_min(v.end as f64, end_region);
-                                        datapoints_num[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
-                                            as f64;
-                                        datapoints_sum[i] += (stop_entry_within_region
-                                            - start_entry_within_region)
+                                        let start_entry_within_region = determine_max(v.start as f64, start_region);
+                                        let stop_entry_within_region = determine_min(v.end as f64, end_region);
+                                        datapoints_num[i] +=
+                                            (stop_entry_within_region - start_entry_within_region) as f64;
+                                        datapoints_sum[i] += (stop_entry_within_region - start_entry_within_region)
                                             as f64
                                             * v.value as f64;
                                         //println!(
@@ -762,4 +673,4 @@ fn calculate_datapoints<
     }
     output_vec.pop();
     println!("{}", output_vec);
-}
+}

package/src/manhattan_plot.rs CHANGED Viewed

@@ -57,6 +57,7 @@ struct InteractiveData {
     x_buffer: i64,
     y_min: f64,
     y_max: f64,
+    device_pixel_ratio: f64,
 }
 #[derive(Serialize)]
@@ -335,12 +336,8 @@ fn plot_grin2_manhattan(
     let png_width = plot_width + 2 * png_dot_radius;
     let png_height = plot_height + 2 * png_dot_radius;
-    let w: u32 = (png_width * device_pixel_ratio as u64)
-        .try_into()
-        .expect("PNG width too large for u32");
-    let h: u32 = (png_height * device_pixel_ratio as u64)
-        .try_into()
-        .expect("PNG height too large for u32");
+    let w: u32 = ((png_width as f64) * dpr) as u32;
+    let h: u32 = ((png_height as f64) * dpr) as u32;
     // Create RGB buffer for Plotters
     let mut buffer = vec![0u8; w as usize * h as usize * 3];
@@ -402,8 +399,8 @@ fn plot_grin2_manhattan(
         for (i, p) in point_details.iter_mut().enumerate() {
             let (px, py) = pixel_positions[*&sig_indices[i]];
-            p.pixel_x = px;
-            p.pixel_y = py;
+            p.pixel_x = px / dpr;
+            p.pixel_y = py / dpr;
         }
         // flush root drawing area
@@ -469,6 +466,7 @@ fn plot_grin2_manhattan(
         x_buffer,
         y_min,
         y_max,
+        device_pixel_ratio: dpr,
     };
     Ok((png_data, interactive_data))
 }

package/src/ollama.rs CHANGED Viewed

@@ -663,6 +663,7 @@ impl From<rig::completion::ToolDefinition> for ToolDefinition {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct ToolCall {
     // pub id: String,
     #[serde(default, rename = "type")]
@@ -872,6 +873,7 @@ impl From<rig::message::ToolCall> for ToolCall {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct SystemContent {
     #[serde(default)]
     r#type: SystemContentType,
@@ -880,6 +882,7 @@ pub struct SystemContent {
 #[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
 #[serde(rename_all = "lowercase")]
+#[allow(dead_code)]
 pub enum SystemContentType {
     #[default]
     Text,
@@ -905,6 +908,7 @@ impl FromStr for SystemContent {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct AssistantContent {
     pub text: String,
 }
@@ -918,6 +922,7 @@ impl FromStr for AssistantContent {
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 #[serde(tag = "type", rename_all = "lowercase")]
+#[allow(dead_code)]
 pub enum UserContent {
     Text { text: String },
     Image { image_url: ImageUrl },
@@ -932,6 +937,7 @@ impl FromStr for UserContent {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct ImageUrl {
     pub url: String,
     #[serde(default)]

package/src/sjprovider.rs CHANGED Viewed

@@ -729,6 +729,7 @@ impl From<rig::completion::ToolDefinition> for ToolDefinition {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct ToolCall {
     // pub id: String,
     #[serde(default, rename = "type")]
@@ -737,11 +738,13 @@ pub struct ToolCall {
 }
 #[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
 #[serde(rename_all = "lowercase")]
+#[allow(dead_code)]
 pub enum ToolType {
     #[default]
     Function,
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct Function {
     pub name: String,
     pub arguments: Value,
@@ -938,6 +941,7 @@ impl From<rig::message::ToolCall> for ToolCall {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct SystemContent {
     #[serde(default)]
     r#type: SystemContentType,
@@ -971,6 +975,7 @@ impl FromStr for SystemContent {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct AssistantContent {
     pub text: String,
 }
@@ -984,6 +989,7 @@ impl FromStr for AssistantContent {
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
 #[serde(tag = "type", rename_all = "lowercase")]
+#[allow(dead_code)]
 pub enum UserContent {
     Text { text: String },
     Image { image_url: ImageUrl },
@@ -998,6 +1004,7 @@ impl FromStr for UserContent {
 }
 #[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
+#[allow(dead_code)]
 pub struct ImageUrl {
     pub url: String,
     #[serde(default)]

package/src/test_ai.rs CHANGED Viewed

@@ -42,6 +42,7 @@ mod tests {
         let top_p: f32 = 0.95;
         let serverconfig_file_path = Path::new("../../serverconfig.json");
         let absolute_path = serverconfig_file_path.canonicalize().unwrap();
+        let testing = true; // This causes the JSON being output from run_pipeline() to be in LLM JSON format
         // Read the file
         let data = fs::read_to_string(absolute_path).unwrap();
@@ -83,7 +84,6 @@ mod tests {
                                 .expect("Ollama server not found");
                             let embedding_model = ollama_client.embedding_model(ollama_embedding_model_name);
                             let comp_model = ollama_client.completion_model(ollama_comp_model_name);
                             for chart in ai_json.charts.clone() {
                                 match chart {
                                     super::super::Charts::Summary(testdata) => {
@@ -100,6 +100,7 @@ mod tests {
                                                 &dataset_db,
                                                 &genedb,
                                                 &ai_json,
+                                                testing,
                                             )
                                             .await;
                                             let mut llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
@@ -142,6 +143,7 @@ mod tests {
                                                     &dataset_db,
                                                     &genedb,
                                                     &ai_json,
+                                                    testing,
                                                 )
                                                 .await;
                                                 let mut llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");