@sjcrh/proteinpaint-server 2.175.0 → 2.176.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sjcrh/proteinpaint-server",
3
- "version": "2.175.0",
3
+ "version": "2.176.0",
4
4
  "type": "module",
5
5
  "description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
6
6
  "main": "src/app.js",
@@ -66,7 +66,7 @@
66
66
  "@sjcrh/proteinpaint-r": "2.152.1-0",
67
67
  "@sjcrh/proteinpaint-rust": "2.175.0",
68
68
  "@sjcrh/proteinpaint-shared": "2.175.0",
69
- "@sjcrh/proteinpaint-types": "2.175.0",
69
+ "@sjcrh/proteinpaint-types": "2.176.0",
70
70
  "@types/express": "^5.0.0",
71
71
  "@types/express-session": "^1.18.1",
72
72
  "better-sqlite3": "^12.4.1",
@@ -30,7 +30,10 @@ function init({ genomes }) {
30
30
  if (q.overlayTw) terms.push(q.overlayTw);
31
31
  if (q.divideTw) terms.push(q.divideTw);
32
32
  try {
33
- const data = await getData({ filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__ }, ds);
33
+ const data = await getData(
34
+ { filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__, __abortSignal: q.__abortSignal },
35
+ ds
36
+ );
34
37
  if (data.error) throw new Error(data.error);
35
38
  const { absMin, absMax, bins, charts, uncomputableValues, descrStats, outlierMin, outlierMax } = await processData(data, q);
36
39
  const returnData = {
@@ -42,7 +42,8 @@ async function trigger_getcategories(q, res, tdb, ds) {
42
42
  // optional, from mds3 mayAddGetCategoryArgs()
43
43
  rglst: q.rglst,
44
44
  // optional, from mds3 mayAddGetCategoryArgs()
45
- __protected__: q.__protected__
45
+ __protected__: q.__protected__,
46
+ __abortSignal: q.__abortSignal
46
47
  };
47
48
  const data = await getData(arg, ds);
48
49
  if (data.error) throw data.error;
@@ -188,6 +188,18 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
188
188
  testing
189
189
  );
190
190
  mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
191
+ } else if (classResult == "sampleScatter") {
192
+ const time12 = (/* @__PURE__ */ new Date()).valueOf();
193
+ ai_output_json = await extract_samplescatter_terms_from_query(
194
+ user_prompt,
195
+ llm,
196
+ dataset_db_output,
197
+ dataset_json,
198
+ genes_list,
199
+ ds,
200
+ testing
201
+ );
202
+ mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
191
203
  } else {
192
204
  ai_output_json = { type: "html", html: "Unknown classification value" };
193
205
  }
@@ -701,6 +713,118 @@ function validate_matrix_response(response, common_genes, dataset_json, ds) {
701
713
  return { type: "plot", plot: pp_plot_json };
702
714
  }
703
715
  }
716
+ async function extract_samplescatter_terms_from_query(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
717
+ if (!dataset_json.prebuiltPlots || dataset_json.prebuiltPlots.length == 0) {
718
+ return { type: "html", html: "No pre-built scatter plots (t-SNE/UMAP) are available for this dataset" };
719
+ }
720
+ const Schema = {
721
+ $schema: "http://json-schema.org/draft-07/schema#",
722
+ $ref: "#/definitions/SampleScatterType",
723
+ definitions: {
724
+ SampleScatterType: {
725
+ type: "object",
726
+ properties: {
727
+ plotName: {
728
+ type: "string",
729
+ description: "Name of the pre-built scatter plot to display"
730
+ },
731
+ colorTW: {
732
+ type: ["string", "null"],
733
+ description: "Term name or gene name to overlay as color on the scatter plot. Set to null to remove the color overlay."
734
+ },
735
+ shapeTW: {
736
+ type: ["string", "null"],
737
+ description: "Term name or gene name to overlay as shape on the scatter plot. Set to null to remove the shape overlay."
738
+ },
739
+ term0: {
740
+ type: ["string", "null"],
741
+ description: "Term name to use for Z/Divide which splits the plot into panels. Set to null to remove the divide overlay."
742
+ },
743
+ simpleFilter: {
744
+ type: "array",
745
+ items: { $ref: "#/definitions/FilterTerm" },
746
+ description: "Optional simple filter terms to restrict the sample set"
747
+ }
748
+ },
749
+ required: ["plotName"],
750
+ additionalProperties: false
751
+ },
752
+ ...FILTER_TERM_DEFINITIONS
753
+ }
754
+ };
755
+ const common_genes = extractGenesFromPrompt(prompt, genes_list);
756
+ const scatter_ds = dataset_json.charts.find((chart) => chart.type == "sampleScatter");
757
+ if (!scatter_ds) throw "sampleScatter information is not present in the dataset file.";
758
+ if (scatter_ds.TrainingData.length == 0) throw "No training data is provided for the sampleScatter agent.";
759
+ const training_data = formatTrainingExamples(scatter_ds.TrainingData);
760
+ const plotNames = dataset_json.prebuiltPlots.map((p) => p.name).join(", ");
761
+ let system_prompt = "I am an assistant that extracts overlay parameters for pre-built scatter plots (t-SNE/UMAP). The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + " The available pre-built plots are: " + plotNames + '. The "plotName" field must match one of these exactly. The "colorTW", "shapeTW", and "term0" fields should contain names of clinical fields from the sqlite db OR gene names. To remove an overlay, set the corresponding field to null explicitly. If the user does not mention a particular overlay, do NOT include that field in the output (omit it entirely). Only include "colorTW", "shapeTW", or "term0" if the user explicitly mentions coloring, shaping, or dividing. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(scatter_ds.SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
762
+ if (dataset_json.hasGeneExpression && common_genes.length > 0) {
763
+ system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
764
+ }
765
+ system_prompt += " Question: {" + prompt + "} answer:";
766
+ const response = await route_to_appropriate_llm_provider(system_prompt, llm);
767
+ if (testing) {
768
+ return { action: "sampleScatter", response: JSON.parse(response) };
769
+ } else {
770
+ return validate_samplescatter_response(response, common_genes, dataset_json, ds);
771
+ }
772
+ }
773
+ function validate_samplescatter_response(response, common_genes, dataset_json, ds) {
774
+ const response_type = JSON.parse(response);
775
+ let html = "";
776
+ if (response_type.html) html = response_type.html;
777
+ if (!response_type.plotName) {
778
+ html += "plotName is required for sample scatter output";
779
+ } else {
780
+ const matchedPlot = dataset_json.prebuiltPlots.find(
781
+ (p) => p.name.toLowerCase() == response_type.plotName.toLowerCase()
782
+ );
783
+ if (!matchedPlot) {
784
+ const availablePlots = dataset_json.prebuiltPlots.map((p) => p.name).join(", ");
785
+ html += "Unknown plot name: " + response_type.plotName + ". Available plots are: " + availablePlots;
786
+ }
787
+ }
788
+ const pp_plot_json = {
789
+ chartType: "sampleScatter",
790
+ name: response_type.plotName
791
+ };
792
+ const validateOverlayTerm = (termName, fieldKey) => {
793
+ if (termName === null) {
794
+ pp_plot_json[fieldKey] = null;
795
+ return;
796
+ }
797
+ if (termName === void 0) {
798
+ return;
799
+ }
800
+ const termValidation = validate_term(termName, common_genes, dataset_json, ds);
801
+ if (termValidation.html.length > 0) {
802
+ html += termValidation.html;
803
+ } else {
804
+ const tw = { ...termValidation.term_type };
805
+ if (termValidation.category == "float" || termValidation.category == "integer") {
806
+ tw.q = { mode: "continuous" };
807
+ }
808
+ pp_plot_json[fieldKey] = tw;
809
+ }
810
+ };
811
+ validateOverlayTerm(response_type.colorTW, "colorTW");
812
+ validateOverlayTerm(response_type.shapeTW, "shapeTW");
813
+ validateOverlayTerm(response_type.term0, "term0");
814
+ if (response_type.simpleFilter && response_type.simpleFilter.length > 0) {
815
+ const validated_filters = validate_filter(response_type.simpleFilter, ds, "");
816
+ if (validated_filters.html.length > 0) {
817
+ html += validated_filters.html;
818
+ } else {
819
+ pp_plot_json.filter = validated_filters.simplefilter;
820
+ }
821
+ }
822
+ if (html.length > 0) {
823
+ return { type: "html", html };
824
+ } else {
825
+ return { type: "plot", plot: pp_plot_json };
826
+ }
827
+ }
704
828
  function validate_term(response_term, common_genes, dataset_json, ds) {
705
829
  let html = "";
706
830
  let term_type;
@@ -62,6 +62,7 @@ async function getResult(q, ds) {
62
62
  if (q.dataType == TermTypes.GENE_EXPRESSION) {
63
63
  _q = JSON.parse(JSON.stringify(q));
64
64
  _q.forClusteringAnalysis = true;
65
+ _q.__abortSignal = q.__abortSignal;
65
66
  }
66
67
  let term2sample2value, byTermId, bySampleId, skippedSexChrGenes;
67
68
  if (q.dataType == NUMERIC_DICTIONARY_TERM) {
@@ -80,7 +80,6 @@ function make(q, req, res, ds, genome) {
80
80
  if (ds.cohort.boxplots) c.boxplots = ds.cohort.boxplots;
81
81
  if (tdb.maxGeneVariantGeneSetSize) c.maxGeneVariantGeneSetSize = tdb.maxGeneVariantGeneSetSize;
82
82
  addRestrictAncestries(c, tdb);
83
- addScatterplots(c, ds);
84
83
  addMatrixplots(c, ds);
85
84
  addMutationSignatureplots(c, ds);
86
85
  addNonDictionaryQueries(c, ds, genome);
@@ -89,6 +88,7 @@ function make(q, req, res, ds, genome) {
89
88
  c.clientAuthResult = info?.clientAuthResult || {};
90
89
  if (tdb.displaySampleIds) c.displaySampleIds = tdb.displaySampleIds(c.clientAuthResult);
91
90
  c.authFilter = req.query.filter;
91
+ addScatterplots(c, ds, info);
92
92
  res.send({ termdbConfig: c });
93
93
  }
94
94
  function addRestrictAncestries(c, tdb) {
@@ -97,8 +97,12 @@ function addRestrictAncestries(c, tdb) {
97
97
  return { name: i.name, tvs: i.tvs, PCcount: i.PCcount };
98
98
  });
99
99
  }
100
- function addScatterplots(c, ds) {
100
+ function addScatterplots(c, ds, info) {
101
101
  if (!ds.cohort.scatterplots) return;
102
+ if (ds.cohort.scatterplots.get) {
103
+ c.scatterplots = ds.cohort.scatterplots.get(info?.clientAuthResult);
104
+ return;
105
+ }
102
106
  c.scatterplots = ds.cohort.scatterplots.plots.map((p) => {
103
107
  return {
104
108
  name: p.name,
@@ -37,14 +37,20 @@ function init({ genomes }) {
37
37
  }
38
38
  async function trigger_getDescrStats(q, ds) {
39
39
  const data = await getData(
40
- { filter: q.filter, filter0: q.filter0, terms: [q.tw], __protected__: q.__protected__ },
40
+ {
41
+ filter: q.filter,
42
+ filter0: q.filter0,
43
+ terms: [q.tw],
44
+ __protected__: q.__protected__,
45
+ __abortSignal: q.__abortSignal
46
+ },
41
47
  ds
42
48
  );
43
49
  if (data.error) throw data.error;
44
50
  const values = [];
45
51
  for (const key in data.samples) {
46
52
  const sample = data.samples[key];
47
- const v = sample[q.tw.$id];
53
+ const v = sample[q.tw.$id || ""];
48
54
  if (!v && v !== 0) {
49
55
  continue;
50
56
  }
@@ -40,14 +40,14 @@ function init({ genomes }) {
40
40
  if (q.scaleDotTW) terms.push(q.scaleDotTW);
41
41
  if (q.coordTWs) for (const tw of q.coordTWs) terms.push(tw);
42
42
  const data = await getData(
43
- { filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__ },
43
+ { filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__, __abortSignal: q.__abortSignal },
44
44
  ds,
45
45
  true
46
46
  // FIXME 3rd arg hardcoded to true
47
47
  );
48
48
  if (data.error) throw new Error(data.error);
49
49
  let result;
50
- if (q.coordTWs.length > 0) {
50
+ if (q.coordTWs && q.coordTWs.length > 0) {
51
51
  const tmp = await getSampleCoordinatesByTerms(req, q, ds, data);
52
52
  cohortSamples = tmp[0];
53
53
  } else {
@@ -465,6 +465,8 @@ async function loadFile(p, ds) {
465
465
  }
466
466
  async function mayInitiateScatterplots(ds) {
467
467
  if (!ds.cohort.scatterplots) return;
468
+ if (typeof ds.cohort.scatterplots.get == "function") {
469
+ }
468
470
  if (!Array.isArray(ds.cohort.scatterplots.plots)) throw new Error("cohort.scatterplots.plots is not array");
469
471
  for (const p of ds.cohort.scatterplots.plots) {
470
472
  if (!p.name) throw new Error(".name missing from one of scatterplots.plots[]");
@@ -52,7 +52,8 @@ async function getViolin(q, ds) {
52
52
  filter: q.filter,
53
53
  filter0: q.filter0,
54
54
  currentGeneNames: q.currentGeneNames,
55
- __protected__: q.__protected__
55
+ __protected__: q.__protected__,
56
+ __abortSignal: q.__abortSignal
56
57
  },
57
58
  ds
58
59
  );
File without changes