@sjcrh/proteinpaint-server 2.175.0 → 2.176.1-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/routes/termdb.boxplot.js +4 -1
- package/routes/termdb.categories.js +2 -1
- package/routes/termdb.chat.js +124 -0
- package/routes/termdb.cluster.js +1 -0
- package/routes/termdb.config.js +7 -2
- package/routes/termdb.descrstats.js +8 -2
- package/routes/termdb.sampleScatter.js +4 -2
- package/routes/termdb.violin.js +2 -1
- package/routes/types.js +0 -0
- package/src/app.js +1317 -993
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjcrh/proteinpaint-server",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.176.1-0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "a genomics visualization tool for exploring a cohort's genotype and phenotype data",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -65,8 +65,8 @@
|
|
|
65
65
|
"@sjcrh/proteinpaint-python": "2.174.0",
|
|
66
66
|
"@sjcrh/proteinpaint-r": "2.152.1-0",
|
|
67
67
|
"@sjcrh/proteinpaint-rust": "2.175.0",
|
|
68
|
-
"@sjcrh/proteinpaint-shared": "2.
|
|
69
|
-
"@sjcrh/proteinpaint-types": "2.
|
|
68
|
+
"@sjcrh/proteinpaint-shared": "2.176.1-0",
|
|
69
|
+
"@sjcrh/proteinpaint-types": "2.176.1-0",
|
|
70
70
|
"@types/express": "^5.0.0",
|
|
71
71
|
"@types/express-session": "^1.18.1",
|
|
72
72
|
"better-sqlite3": "^12.4.1",
|
package/routes/termdb.boxplot.js
CHANGED
|
@@ -30,7 +30,10 @@ function init({ genomes }) {
|
|
|
30
30
|
if (q.overlayTw) terms.push(q.overlayTw);
|
|
31
31
|
if (q.divideTw) terms.push(q.divideTw);
|
|
32
32
|
try {
|
|
33
|
-
const data = await getData(
|
|
33
|
+
const data = await getData(
|
|
34
|
+
{ filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__, __abortSignal: q.__abortSignal },
|
|
35
|
+
ds
|
|
36
|
+
);
|
|
34
37
|
if (data.error) throw new Error(data.error);
|
|
35
38
|
const { absMin, absMax, bins, charts, uncomputableValues, descrStats, outlierMin, outlierMax } = await processData(data, q);
|
|
36
39
|
const returnData = {
|
|
@@ -42,7 +42,8 @@ async function trigger_getcategories(q, res, tdb, ds) {
|
|
|
42
42
|
// optional, from mds3 mayAddGetCategoryArgs()
|
|
43
43
|
rglst: q.rglst,
|
|
44
44
|
// optional, from mds3 mayAddGetCategoryArgs()
|
|
45
|
-
__protected__: q.__protected__
|
|
45
|
+
__protected__: q.__protected__,
|
|
46
|
+
__abortSignal: q.__abortSignal
|
|
46
47
|
};
|
|
47
48
|
const data = await getData(arg, ds);
|
|
48
49
|
if (data.error) throw data.error;
|
package/routes/termdb.chat.js
CHANGED
|
@@ -188,6 +188,18 @@ async function run_chat_pipeline(user_prompt, llm, aiRoute, dataset_json, testin
|
|
|
188
188
|
testing
|
|
189
189
|
);
|
|
190
190
|
mayLog("Time taken for matrix agent:", formatElapsedTime(Date.now() - time12));
|
|
191
|
+
} else if (classResult == "sampleScatter") {
|
|
192
|
+
const time12 = (/* @__PURE__ */ new Date()).valueOf();
|
|
193
|
+
ai_output_json = await extract_samplescatter_terms_from_query(
|
|
194
|
+
user_prompt,
|
|
195
|
+
llm,
|
|
196
|
+
dataset_db_output,
|
|
197
|
+
dataset_json,
|
|
198
|
+
genes_list,
|
|
199
|
+
ds,
|
|
200
|
+
testing
|
|
201
|
+
);
|
|
202
|
+
mayLog("Time taken for sampleScatter agent:", formatElapsedTime(Date.now() - time12));
|
|
191
203
|
} else {
|
|
192
204
|
ai_output_json = { type: "html", html: "Unknown classification value" };
|
|
193
205
|
}
|
|
@@ -701,6 +713,118 @@ function validate_matrix_response(response, common_genes, dataset_json, ds) {
|
|
|
701
713
|
return { type: "plot", plot: pp_plot_json };
|
|
702
714
|
}
|
|
703
715
|
}
|
|
716
|
+
async function extract_samplescatter_terms_from_query(prompt, llm, dataset_db_output, dataset_json, genes_list, ds, testing) {
|
|
717
|
+
if (!dataset_json.prebuiltPlots || dataset_json.prebuiltPlots.length == 0) {
|
|
718
|
+
return { type: "html", html: "No pre-built scatter plots (t-SNE/UMAP) are available for this dataset" };
|
|
719
|
+
}
|
|
720
|
+
const Schema = {
|
|
721
|
+
$schema: "http://json-schema.org/draft-07/schema#",
|
|
722
|
+
$ref: "#/definitions/SampleScatterType",
|
|
723
|
+
definitions: {
|
|
724
|
+
SampleScatterType: {
|
|
725
|
+
type: "object",
|
|
726
|
+
properties: {
|
|
727
|
+
plotName: {
|
|
728
|
+
type: "string",
|
|
729
|
+
description: "Name of the pre-built scatter plot to display"
|
|
730
|
+
},
|
|
731
|
+
colorTW: {
|
|
732
|
+
type: ["string", "null"],
|
|
733
|
+
description: "Term name or gene name to overlay as color on the scatter plot. Set to null to remove the color overlay."
|
|
734
|
+
},
|
|
735
|
+
shapeTW: {
|
|
736
|
+
type: ["string", "null"],
|
|
737
|
+
description: "Term name or gene name to overlay as shape on the scatter plot. Set to null to remove the shape overlay."
|
|
738
|
+
},
|
|
739
|
+
term0: {
|
|
740
|
+
type: ["string", "null"],
|
|
741
|
+
description: "Term name to use for Z/Divide which splits the plot into panels. Set to null to remove the divide overlay."
|
|
742
|
+
},
|
|
743
|
+
simpleFilter: {
|
|
744
|
+
type: "array",
|
|
745
|
+
items: { $ref: "#/definitions/FilterTerm" },
|
|
746
|
+
description: "Optional simple filter terms to restrict the sample set"
|
|
747
|
+
}
|
|
748
|
+
},
|
|
749
|
+
required: ["plotName"],
|
|
750
|
+
additionalProperties: false
|
|
751
|
+
},
|
|
752
|
+
...FILTER_TERM_DEFINITIONS
|
|
753
|
+
}
|
|
754
|
+
};
|
|
755
|
+
const common_genes = extractGenesFromPrompt(prompt, genes_list);
|
|
756
|
+
const scatter_ds = dataset_json.charts.find((chart) => chart.type == "sampleScatter");
|
|
757
|
+
if (!scatter_ds) throw "sampleScatter information is not present in the dataset file.";
|
|
758
|
+
if (scatter_ds.TrainingData.length == 0) throw "No training data is provided for the sampleScatter agent.";
|
|
759
|
+
const training_data = formatTrainingExamples(scatter_ds.TrainingData);
|
|
760
|
+
const plotNames = dataset_json.prebuiltPlots.map((p) => p.name).join(", ");
|
|
761
|
+
let system_prompt = "I am an assistant that extracts overlay parameters for pre-built scatter plots (t-SNE/UMAP). The final output must be in the following JSON format with NO extra comments. The JSON schema is as follows: " + JSON.stringify(Schema) + " The available pre-built plots are: " + plotNames + '. The "plotName" field must match one of these exactly. The "colorTW", "shapeTW", and "term0" fields should contain names of clinical fields from the sqlite db OR gene names. To remove an overlay, set the corresponding field to null explicitly. If the user does not mention a particular overlay, do NOT include that field in the output (omit it entirely). Only include "colorTW", "shapeTW", or "term0" if the user explicitly mentions coloring, shaping, or dividing. ' + FILTER_DESCRIPTION + checkField(dataset_json.DatasetPrompt) + checkField(scatter_ds.SystemPrompt) + "\n The DB content is as follows: " + dataset_db_output.rag_docs.join(",") + " training data is as follows:" + training_data;
|
|
762
|
+
if (dataset_json.hasGeneExpression && common_genes.length > 0) {
|
|
763
|
+
system_prompt += "\n List of relevant genes are as follows (separated by comma(,)):" + common_genes.join(",");
|
|
764
|
+
}
|
|
765
|
+
system_prompt += " Question: {" + prompt + "} answer:";
|
|
766
|
+
const response = await route_to_appropriate_llm_provider(system_prompt, llm);
|
|
767
|
+
if (testing) {
|
|
768
|
+
return { action: "sampleScatter", response: JSON.parse(response) };
|
|
769
|
+
} else {
|
|
770
|
+
return validate_samplescatter_response(response, common_genes, dataset_json, ds);
|
|
771
|
+
}
|
|
772
|
+
}
|
|
773
|
+
function validate_samplescatter_response(response, common_genes, dataset_json, ds) {
|
|
774
|
+
const response_type = JSON.parse(response);
|
|
775
|
+
let html = "";
|
|
776
|
+
if (response_type.html) html = response_type.html;
|
|
777
|
+
if (!response_type.plotName) {
|
|
778
|
+
html += "plotName is required for sample scatter output";
|
|
779
|
+
} else {
|
|
780
|
+
const matchedPlot = dataset_json.prebuiltPlots.find(
|
|
781
|
+
(p) => p.name.toLowerCase() == response_type.plotName.toLowerCase()
|
|
782
|
+
);
|
|
783
|
+
if (!matchedPlot) {
|
|
784
|
+
const availablePlots = dataset_json.prebuiltPlots.map((p) => p.name).join(", ");
|
|
785
|
+
html += "Unknown plot name: " + response_type.plotName + ". Available plots are: " + availablePlots;
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
const pp_plot_json = {
|
|
789
|
+
chartType: "sampleScatter",
|
|
790
|
+
name: response_type.plotName
|
|
791
|
+
};
|
|
792
|
+
const validateOverlayTerm = (termName, fieldKey) => {
|
|
793
|
+
if (termName === null) {
|
|
794
|
+
pp_plot_json[fieldKey] = null;
|
|
795
|
+
return;
|
|
796
|
+
}
|
|
797
|
+
if (termName === void 0) {
|
|
798
|
+
return;
|
|
799
|
+
}
|
|
800
|
+
const termValidation = validate_term(termName, common_genes, dataset_json, ds);
|
|
801
|
+
if (termValidation.html.length > 0) {
|
|
802
|
+
html += termValidation.html;
|
|
803
|
+
} else {
|
|
804
|
+
const tw = { ...termValidation.term_type };
|
|
805
|
+
if (termValidation.category == "float" || termValidation.category == "integer") {
|
|
806
|
+
tw.q = { mode: "continuous" };
|
|
807
|
+
}
|
|
808
|
+
pp_plot_json[fieldKey] = tw;
|
|
809
|
+
}
|
|
810
|
+
};
|
|
811
|
+
validateOverlayTerm(response_type.colorTW, "colorTW");
|
|
812
|
+
validateOverlayTerm(response_type.shapeTW, "shapeTW");
|
|
813
|
+
validateOverlayTerm(response_type.term0, "term0");
|
|
814
|
+
if (response_type.simpleFilter && response_type.simpleFilter.length > 0) {
|
|
815
|
+
const validated_filters = validate_filter(response_type.simpleFilter, ds, "");
|
|
816
|
+
if (validated_filters.html.length > 0) {
|
|
817
|
+
html += validated_filters.html;
|
|
818
|
+
} else {
|
|
819
|
+
pp_plot_json.filter = validated_filters.simplefilter;
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
if (html.length > 0) {
|
|
823
|
+
return { type: "html", html };
|
|
824
|
+
} else {
|
|
825
|
+
return { type: "plot", plot: pp_plot_json };
|
|
826
|
+
}
|
|
827
|
+
}
|
|
704
828
|
function validate_term(response_term, common_genes, dataset_json, ds) {
|
|
705
829
|
let html = "";
|
|
706
830
|
let term_type;
|
package/routes/termdb.cluster.js
CHANGED
|
@@ -62,6 +62,7 @@ async function getResult(q, ds) {
|
|
|
62
62
|
if (q.dataType == TermTypes.GENE_EXPRESSION) {
|
|
63
63
|
_q = JSON.parse(JSON.stringify(q));
|
|
64
64
|
_q.forClusteringAnalysis = true;
|
|
65
|
+
_q.__abortSignal = q.__abortSignal;
|
|
65
66
|
}
|
|
66
67
|
let term2sample2value, byTermId, bySampleId, skippedSexChrGenes;
|
|
67
68
|
if (q.dataType == NUMERIC_DICTIONARY_TERM) {
|
package/routes/termdb.config.js
CHANGED
|
@@ -79,8 +79,8 @@ function make(q, req, res, ds, genome) {
|
|
|
79
79
|
if (ds.cohort.correlationVolcano) c.correlationVolcano = ds.cohort.correlationVolcano;
|
|
80
80
|
if (ds.cohort.boxplots) c.boxplots = ds.cohort.boxplots;
|
|
81
81
|
if (tdb.maxGeneVariantGeneSetSize) c.maxGeneVariantGeneSetSize = tdb.maxGeneVariantGeneSetSize;
|
|
82
|
+
if (tdb.maxAnnoTermsPerClientRequest) c.maxAnnoTermsPerClientRequest = tdb.maxAnnoTermsPerClientRequest;
|
|
82
83
|
addRestrictAncestries(c, tdb);
|
|
83
|
-
addScatterplots(c, ds);
|
|
84
84
|
addMatrixplots(c, ds);
|
|
85
85
|
addMutationSignatureplots(c, ds);
|
|
86
86
|
addNonDictionaryQueries(c, ds, genome);
|
|
@@ -89,6 +89,7 @@ function make(q, req, res, ds, genome) {
|
|
|
89
89
|
c.clientAuthResult = info?.clientAuthResult || {};
|
|
90
90
|
if (tdb.displaySampleIds) c.displaySampleIds = tdb.displaySampleIds(c.clientAuthResult);
|
|
91
91
|
c.authFilter = req.query.filter;
|
|
92
|
+
addScatterplots(c, ds, info);
|
|
92
93
|
res.send({ termdbConfig: c });
|
|
93
94
|
}
|
|
94
95
|
function addRestrictAncestries(c, tdb) {
|
|
@@ -97,8 +98,12 @@ function addRestrictAncestries(c, tdb) {
|
|
|
97
98
|
return { name: i.name, tvs: i.tvs, PCcount: i.PCcount };
|
|
98
99
|
});
|
|
99
100
|
}
|
|
100
|
-
function addScatterplots(c, ds) {
|
|
101
|
+
function addScatterplots(c, ds, info) {
|
|
101
102
|
if (!ds.cohort.scatterplots) return;
|
|
103
|
+
if (ds.cohort.scatterplots.get) {
|
|
104
|
+
c.scatterplots = ds.cohort.scatterplots.get(info?.clientAuthResult);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
102
107
|
c.scatterplots = ds.cohort.scatterplots.plots.map((p) => {
|
|
103
108
|
return {
|
|
104
109
|
name: p.name,
|
|
@@ -37,14 +37,20 @@ function init({ genomes }) {
|
|
|
37
37
|
}
|
|
38
38
|
async function trigger_getDescrStats(q, ds) {
|
|
39
39
|
const data = await getData(
|
|
40
|
-
{
|
|
40
|
+
{
|
|
41
|
+
filter: q.filter,
|
|
42
|
+
filter0: q.filter0,
|
|
43
|
+
terms: [q.tw],
|
|
44
|
+
__protected__: q.__protected__,
|
|
45
|
+
__abortSignal: q.__abortSignal
|
|
46
|
+
},
|
|
41
47
|
ds
|
|
42
48
|
);
|
|
43
49
|
if (data.error) throw data.error;
|
|
44
50
|
const values = [];
|
|
45
51
|
for (const key in data.samples) {
|
|
46
52
|
const sample = data.samples[key];
|
|
47
|
-
const v = sample[q.tw.$id];
|
|
53
|
+
const v = sample[q.tw.$id || ""];
|
|
48
54
|
if (!v && v !== 0) {
|
|
49
55
|
continue;
|
|
50
56
|
}
|
|
@@ -40,14 +40,14 @@ function init({ genomes }) {
|
|
|
40
40
|
if (q.scaleDotTW) terms.push(q.scaleDotTW);
|
|
41
41
|
if (q.coordTWs) for (const tw of q.coordTWs) terms.push(tw);
|
|
42
42
|
const data = await getData(
|
|
43
|
-
{ filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__ },
|
|
43
|
+
{ filter: q.filter, filter0: q.filter0, terms, __protected__: q.__protected__, __abortSignal: q.__abortSignal },
|
|
44
44
|
ds,
|
|
45
45
|
true
|
|
46
46
|
// FIXME 3rd arg hardcoded to true
|
|
47
47
|
);
|
|
48
48
|
if (data.error) throw new Error(data.error);
|
|
49
49
|
let result;
|
|
50
|
-
if (q.coordTWs.length > 0) {
|
|
50
|
+
if (q.coordTWs && q.coordTWs.length > 0) {
|
|
51
51
|
const tmp = await getSampleCoordinatesByTerms(req, q, ds, data);
|
|
52
52
|
cohortSamples = tmp[0];
|
|
53
53
|
} else {
|
|
@@ -465,6 +465,8 @@ async function loadFile(p, ds) {
|
|
|
465
465
|
}
|
|
466
466
|
async function mayInitiateScatterplots(ds) {
|
|
467
467
|
if (!ds.cohort.scatterplots) return;
|
|
468
|
+
if (typeof ds.cohort.scatterplots.get == "function") {
|
|
469
|
+
}
|
|
468
470
|
if (!Array.isArray(ds.cohort.scatterplots.plots)) throw new Error("cohort.scatterplots.plots is not array");
|
|
469
471
|
for (const p of ds.cohort.scatterplots.plots) {
|
|
470
472
|
if (!p.name) throw new Error(".name missing from one of scatterplots.plots[]");
|
package/routes/termdb.violin.js
CHANGED
package/routes/types.js
ADDED
|
File without changes
|