npm - @sjcrh/proteinpaint-rust - Versions diffs - 2.148.1 → 2.150.0 - Mend

@sjcrh/proteinpaint-rust 2.148.1 → 2.150.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/src/sjprovider.rs CHANGED Viewed

@@ -73,6 +73,15 @@ impl Client {
     pub fn builder() -> ClientBuilder<'static> {
         ClientBuilder::new()
     }
+    pub fn completion_model(&self, model: &str) -> CompletionModel {
+        CompletionModel::new(self.clone(), model)
+    }
+    pub fn embedding_model(&self, model: &str) -> EmbeddingModel {
+        EmbeddingModel::new(self.clone(), model, 0, self.base_url.to_string())
+    }
     pub fn new() -> Self {
         Self::builder().build().expect("Myprovider client should build")
     }
@@ -129,11 +138,16 @@ impl EmbeddingsClient for Client {
 impl VerifyClient for Client {
     async fn verify(&self) -> Result<(), VerifyError> {
-        let response = self.get("api/tags").expect("Failed to build request").send().await?;
+        let response = self
+            .get("api/tags")
+            .expect("Failed to build request")
+            .send()
+            .await
+            .unwrap();
         match response.status() {
             reqwest::StatusCode::OK => Ok(()),
             _ => {
-                response.error_for_status()?;
+                response.error_for_status().unwrap();
                 Ok(())
             }
         }
@@ -262,7 +276,7 @@ impl embeddings::EmbeddingModel for EmbeddingModel {
             if response.status().is_success() {
                 //println!("response.json:{:?}", response.text().await?);
-                let json_data: Value = serde_json::from_str(&response.text().await?)?;
+                let json_data: Value = serde_json::from_str(&response.text().await.unwrap())?;
                 let emb = json_data["outputs"].as_array().unwrap();
                 //.unwrap_or(&vec![serde_json::Value::String(
                 //    "No embeddings found in json output".to_string(),
@@ -481,12 +495,39 @@ impl CompletionModel {
             panic!("max_new_tokens and top_p not found!");
         };
+        let mut user_query = "";
+        let mut system_prompt = "";
+        for message in &full_history {
+            match message {
+                self::Message::User {
+                    content: text,
+                    images: _,
+                    name: _,
+                } => {
+                    //println!("User:{:?}", text);
+                    user_query = text;
+                }
+                self::Message::System {
+                    content: text,
+                    images: _,
+                    name: _,
+                } => {
+                    system_prompt = text;
+                    //println!("System:{:?}", text);
+                }
+                self::Message::Assistant { content: _, id: _ } => {}
+                self::Message::ToolResult { content: _, name: _ } => {}
+            }
+        }
+        let final_text = system_prompt.replace(&"{question}", &user_query);
+        //println!("final_text:{:?}", final_text);
         let mut request_payload = json!({
          "inputs":[
                 {
                     "model_name": self.model,
                     "inputs": {
-                        "text": full_history,
+                        "text": final_text,
                         "max_new_tokens": max_new_tokens,
                         "temperature": completion_request.temperature,
                         "top_p": top_p
@@ -612,7 +653,7 @@ impl completion::CompletionModel for CompletionModel {
                 let chunk = match chunk_result {
                     Ok(c) => c,
                     Err(e) => {
-                        yield Err(CompletionError::from(e));
+                        yield Err(CompletionError::RequestError(e.into()));
                         break;
                     }
                 };
@@ -797,7 +838,7 @@ impl ConvertMessage for Message {
                                         images.push(data)
                                     }
                                     rig::message::UserContent::Document(rig::message::Document { data, .. }) => {
-                                        texts.push(data)
+                                        texts.push(data.to_string())
                                     }
                                     _ => {} // Audio not supported by Ollama
                                 }
@@ -993,7 +1034,7 @@ mod tests {
     #[tokio::test]
     #[ignore]
-    async fn test_myprovider_implementation() {
+    async fn test_sjprovider_implementation() {
         let user_input = "Generate DE plot for men with weight greater than 30lbs vs women less than 20lbs";
         let serverconfig_file_path = Path::new("../../serverconfig.json");
         let absolute_path = serverconfig_file_path.canonicalize().unwrap();
@@ -1118,17 +1159,17 @@ If a query does not match any of the fields described above, then return JSON wi
         });
         // Create RAG agent
-        let agent = AgentBuilder::new(comp_model).preamble("Generate classification for the user query into summary, dge, hierarchial, snv_indel, cnv, variant_calling, sv_fusion and none categories. Return output in JSON with ALWAYS a single word answer { \"answer\": \"dge\" }, that is 'summary' for summary plot, 'dge' for differential gene expression, 'hierarchial' for hierarchial clustering, 'snv_indel' for SNV/Indel, 'cnv' for CNV and 'sv_fusion' for SV/fusion, 'variant_calling' for variant calling, 'surivial' for survival data, 'none' for none of the previously described categories. The answer should always be in lower case").dynamic_context(top_k, vector_store.index(embedding_model)).additional_params(additional).temperature(temperature).build();
+        let agent = AgentBuilder::new(comp_model).preamble("Generate classification for the user query into summary, dge, hierarchial, snv_indel, cnv, variant_calling, sv_fusion and none categories. Return output in JSON with ALWAYS a single word answer { \"answer\": \"dge\" }, that is 'summary' for summary plot, 'dge' for differential gene expression, 'hierarchial' for hierarchial clustering, 'snv_indel' for SNV/Indel, 'cnv' for CNV and 'sv_fusion' for SV/fusion, 'variant_calling' for variant calling, 'surivial' for survival data, 'none' for none of the previously described categories. The answer should always be in lower case. \nQuestion= {question} \nanswer").dynamic_context(top_k, vector_store.index(embedding_model)).additional_params(additional).temperature(temperature).build();
         let response = agent.prompt(user_input).await.expect("Failed to prompt myprovider");
         //println!("Myprovider: {}", response);
         let result = response.replace("json", "").replace("```", "");
         //println!("result:{}", result);
-        let json_value: Value = serde_json::from_str(&result).expect("REASON");
-        let json_value2: Value = serde_json::from_str(&json_value[0]["generated_text"].to_string()).expect("REASON2");
+        let json_value: Value = serde_json::from_str(&result).expect("REASON2");
+        let json_value2: Value = serde_json::from_str(&json_value[0]["generated_text"].to_string()).expect("REASON3");
         //println!("json_value2:{}", json_value2.as_str().unwrap());
-        let json_value3: Value = serde_json::from_str(&json_value2.as_str().unwrap()).expect("REASON2");
+        let json_value3: Value = serde_json::from_str(&json_value2.as_str().unwrap()).expect("REASON4");
         assert_eq!(json_value3["answer"].to_string().replace("\"", ""), "dge");
     }
 }

package/src/test_ai.rs ADDED Viewed

@@ -0,0 +1,168 @@
+// For capturing output from a test, run: cd .. && cargo test -- --nocapture
+// Ignored tests: cd .. && export RUST_BACKTRACE=full && time cargo test -- --ignored --nocapture
+#[allow(dead_code)]
+fn main() {}
+#[cfg(test)]
+mod tests {
+    use serde_json;
+    use std::fs::{self};
+    use std::path::Path;
+    #[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+    struct ServerConfig {
+        tpmasterdir: String,
+        llm_backend: String,
+        sj_apilink: String,
+        sj_comp_model_name: String,
+        sj_embedding_model_name: String,
+        ollama_apilink: String,
+        ollama_comp_model_name: String,
+        ollama_embedding_model_name: String,
+        genomes: Vec<Genomes>,
+    }
+    #[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+    struct Genomes {
+        name: String,
+        datasets: Vec<Dataset>,
+    }
+    #[derive(PartialEq, Debug, Clone, schemars::JsonSchema, serde::Serialize, serde::Deserialize)]
+    struct Dataset {
+        name: String,
+        aifiles: Option<String>, // For now aifiles are defined only for certain datasets
+    }
+    #[tokio::test]
+    #[ignore]
+    async fn user_prompts() {
+        let temperature: f64 = 0.01;
+        let max_new_tokens: usize = 512;
+        let top_p: f32 = 0.95;
+        let serverconfig_file_path = Path::new("../../serverconfig.json");
+        let absolute_path = serverconfig_file_path.canonicalize().unwrap();
+        // Read the file
+        let data = fs::read_to_string(absolute_path).unwrap();
+        // Parse the JSON data
+        let serverconfig: ServerConfig = serde_json::from_str(&data).expect("JSON not in serverconfig.json format");
+        for genome in &serverconfig.genomes {
+            for dataset in &genome.datasets {
+                match &dataset.aifiles {
+                    Some(ai_json_file) => {
+                        println!("Testing dataset:{}", dataset.name);
+                        let ai_json_file_path = String::from("../../") + ai_json_file;
+                        let ai_json_file = Path::new(&ai_json_file_path);
+                        // Read the file
+                        let ai_data = fs::read_to_string(ai_json_file).unwrap();
+                        // Parse the JSON data
+                        let ai_json: super::super::AiJsonFormat =
+                            serde_json::from_str(&ai_data).expect("AI JSON file does not have the correct format");
+                        //println!("ai_json:{:?}", ai_json);
+                        let genedb = String::from(&serverconfig.tpmasterdir) + &"/" + &ai_json.genedb;
+                        let dataset_db = String::from(&serverconfig.tpmasterdir) + &"/" + &ai_json.db;
+                        let llm_backend_name = &serverconfig.llm_backend;
+                        let llm_backend_type: super::super::llm_backend;
+                        if llm_backend_name != "ollama" && llm_backend_name != "SJ" {
+                            panic!(
+                                "This code currently supports only Ollama and SJ provider. llm_backend_name must be \"ollama\" or \"SJ\""
+                            );
+                        } else if *llm_backend_name == "ollama".to_string() {
+                            let ollama_host = &serverconfig.ollama_apilink;
+                            let ollama_embedding_model_name = &serverconfig.ollama_embedding_model_name;
+                            let ollama_comp_model_name = &serverconfig.ollama_comp_model_name;
+                            llm_backend_type = super::super::llm_backend::Ollama();
+                            let ollama_client = super::super::ollama::Client::builder()
+                                .base_url(ollama_host)
+                                .build()
+                                .expect("Ollama server not found");
+                            let embedding_model = ollama_client.embedding_model(ollama_embedding_model_name);
+                            let comp_model = ollama_client.completion_model(ollama_comp_model_name);
+                            for chart in ai_json.charts.clone() {
+                                match chart {
+                                    super::super::Charts::Summary(testdata) => {
+                                        for ques_ans in testdata.TestData {
+                                            let user_input = ques_ans.question;
+                                            let llm_output = super::super::run_pipeline(
+                                                &user_input,
+                                                comp_model.clone(),
+                                                embedding_model.clone(),
+                                                llm_backend_type.clone(),
+                                                temperature,
+                                                max_new_tokens,
+                                                top_p,
+                                                &dataset_db,
+                                                &genedb,
+                                                &ai_json,
+                                            )
+                                            .await;
+                                            let mut llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
+                                            let mut expected_json_value: super::super::SummaryType = serde_json::from_str(&ques_ans.answer).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
+                                            assert_eq!(
+                                                llm_json_value.sort_summarytype_struct(),
+                                                expected_json_value.sort_summarytype_struct()
+                                            );
+                                        }
+                                    }
+                                    super::super::Charts::DE(_testdata) => {} // To do
+                                }
+                            }
+                        } else if *llm_backend_name == "SJ".to_string() {
+                            let sjprovider_host = &serverconfig.sj_apilink;
+                            let sj_embedding_model_name = &serverconfig.sj_embedding_model_name;
+                            let sj_comp_model_name = &serverconfig.sj_comp_model_name;
+                            llm_backend_type = super::super::llm_backend::Sj();
+                            let sj_client = super::super::sjprovider::Client::builder()
+                                .base_url(sjprovider_host)
+                                .build()
+                                .expect("SJ server not found");
+                            let embedding_model = sj_client.embedding_model(sj_embedding_model_name);
+                            let comp_model = sj_client.completion_model(sj_comp_model_name);
+                            for chart in ai_json.charts.clone() {
+                                match chart {
+                                    super::super::Charts::Summary(testdata) => {
+                                        for ques_ans in testdata.TestData {
+                                            let user_input = ques_ans.question;
+                                            if user_input.len() > 0 {
+                                                let llm_output = super::super::run_pipeline(
+                                                    &user_input,
+                                                    comp_model.clone(),
+                                                    embedding_model.clone(),
+                                                    llm_backend_type.clone(),
+                                                    temperature,
+                                                    max_new_tokens,
+                                                    top_p,
+                                                    &dataset_db,
+                                                    &genedb,
+                                                    &ai_json,
+                                                )
+                                                .await;
+                                                let mut llm_json_value: super::super::SummaryType = serde_json::from_str(&llm_output.unwrap()).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
+                                                let mut expected_json_value: super::super::SummaryType = serde_json::from_str(&ques_ans.answer).expect("Did not get a valid JSON of type {action: summary, summaryterms:[{clinical: term1}, {geneExpression: gene}], filter:[{term: term1, value: value1}]} from the LLM");
+                                                assert_eq!(
+                                                    llm_json_value.sort_summarytype_struct(),
+                                                    expected_json_value.sort_summarytype_struct()
+                                                );
+                                            } else {
+                                                panic!("The user input is empty");
+                                            }
+                                        }
+                                    }
+                                    super::super::Charts::DE(_testdata) => {} // To do
+                                }
+                            }
+                        }
+                    }
+                    None => {}
+                }
+            }
+        }
+    }
+}