@orkg/scidquest 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ var g = Object.defineProperty;
2
+ var p = (a, e, n) => e in a ? g(a, e, { enumerable: !0, configurable: !0, writable: !0, value: n }) : a[e] = n;
3
+ var u = (a, e, n) => p(a, typeof e != "symbol" ? e + "" : e, n);
4
+ import { pipeline as k } from "@huggingface/transformers";
5
+ class f {
6
+ constructor() {
7
+ u(this, "model", null);
8
+ u(this, "modelLoading", null);
9
+ }
10
+ async initialize() {
11
+ if (!this.model) {
12
+ if (this.modelLoading) return this.modelLoading;
13
+ console.log("[FrontendSemanticChunker] Initializing model..."), this.modelLoading = (async () => {
14
+ try {
15
+ this.model = await k(
16
+ "feature-extraction",
17
+ "Xenova/all-MiniLM-L6-v2",
18
+ { dtype: "q8" }
19
+ ), console.log("[FrontendSemanticChunker] Model loaded successfully");
20
+ } catch (e) {
21
+ throw console.error("[FrontendSemanticChunker] Model initialization failed:", e), this.modelLoading = null, e;
22
+ }
23
+ })(), await this.modelLoading;
24
+ }
25
+ }
26
+ splitIntoChunks(e) {
27
+ const n = [];
28
+ for (const s of e) {
29
+ if (!s.text || s.text.trim().length === 0) continue;
30
+ const o = s.text.split(new RegExp("(?<=[.!?])(?=\\s)"));
31
+ let t = "", r = 0;
32
+ for (const h of o) {
33
+ const m = h.trim();
34
+ m && (t && t.length + m.length + 1 > 2e3 ? (n.push({
35
+ text: t.trim(),
36
+ pageNumber: s.pageNumber,
37
+ startIndex: r,
38
+ endIndex: r + t.length,
39
+ tokenEstimate: Math.ceil(t.length / 4)
40
+ }), t = t.trim().split(/\s+/).slice(-Math.ceil(400 / 5)).join(" ") + " " + m, r += t.length - 400) : t += (t ? " " : "") + m);
41
+ }
42
+ t.trim() && (n.push({
43
+ text: t.trim(),
44
+ pageNumber: s.pageNumber,
45
+ startIndex: r,
46
+ endIndex: r + t.length,
47
+ tokenEstimate: Math.ceil(t.length / 4)
48
+ }), t = "", r = 0);
49
+ }
50
+ return console.log(
51
+ `[FrontendSemanticChunker] Total chunks: ${n.length}, Target size: 2000 chars`
52
+ ), n;
53
+ }
54
+ async embed(e) {
55
+ const n = await this.model(e, { pooling: "mean", normalize: !0 });
56
+ return Array.from(n.data);
57
+ }
58
+ cosineSimilarity(e, n) {
59
+ let c = 0, l = 0, s = 0;
60
+ for (let o = 0; o < e.length; o++)
61
+ c += e[o] * n[o], l += e[o] * e[o], s += n[o] * n[o];
62
+ return c / (Math.sqrt(l) * Math.sqrt(s));
63
+ }
64
+ async findRelevantChunks(e, n, c = 4e3) {
65
+ console.log(
66
+ `[FrontendSemanticChunker] Finding relevant chunks for question (max ${c} tokens)`
67
+ ), await this.initialize();
68
+ const l = this.splitIntoChunks(n);
69
+ console.log(
70
+ `[FrontendSemanticChunker] Created ${l.length} chunks from ${n.length} pages`
71
+ ), console.log("[FrontendSemanticChunker] Generating embeddings...");
72
+ const s = await Promise.all(
73
+ l.map((i) => this.embed(i.text))
74
+ ), o = await this.embed(e), t = l.map((i, d) => ({
75
+ ...i,
76
+ similarity: this.cosineSimilarity(o, s[d])
77
+ }));
78
+ t.sort((i, d) => d.similarity - i.similarity), console.log(
79
+ "[FrontendSemanticChunker] Top 5 similarity scores:",
80
+ t.slice(0, 5).map((i) => `page ${i.pageNumber}: ${i.similarity.toFixed(4)}`)
81
+ );
82
+ const r = [];
83
+ let h = 0;
84
+ for (const i of t)
85
+ h + i.tokenEstimate <= c && (r.push(i), h += i.tokenEstimate);
86
+ console.log(
87
+ `[FrontendSemanticChunker] Selected ${r.length} chunks (${h} tokens)`
88
+ ), r.sort((i, d) => i.pageNumber - d.pageNumber);
89
+ const m = Array.from(new Set(r.map((i) => i.pageNumber)));
90
+ return console.log(
91
+ "[FrontendSemanticChunker] Chunks span pages:",
92
+ m.join(", ")
93
+ ), r;
94
+ }
95
+ isInitialized() {
96
+ return this.model !== null;
97
+ }
98
+ }
99
+ const y = new f();
100
+ export {
101
+ y as frontendSemanticChunker
102
+ };
package/package.json ADDED
@@ -0,0 +1,63 @@
1
+ {
2
+ "name": "@orkg/scidquest",
3
+ "version": "1.0.0",
4
+ "type": "module",
5
+ "description": "Standalone research paper contribution and analysis tool",
6
+ "main": "./dist/scidquest.es.js",
7
+ "module": "./dist/scidquest.es.js",
8
+ "types": "./dist/index.d.ts",
9
+ "files": [
10
+ "dist"
11
+ ],
12
+ "exports": {
13
+ ".": {
14
+ "types": "./dist/index.d.ts",
15
+ "import": "./dist/scidquest.es.js",
16
+ "default": "./dist/scidquest.es.js"
17
+ },
18
+ "./dist/contribute-standalone.css": "./dist/contribute-standalone.css"
19
+ },
20
+ "scripts": {
21
+ "dev": "vite",
22
+ "build": "tsc && vite build",
23
+ "preview": "vite preview",
24
+ "prepublishOnly": "npm run build"
25
+ },
26
+ "peerDependencies": {
27
+ "@emotion/react": "^11.14.0",
28
+ "@emotion/styled": "^11.14.0",
29
+ "@mui/material": "^6.4.11",
30
+ "react": "^18.3.1",
31
+ "react-dom": "^18.3.1"
32
+ },
33
+ "dependencies": {
34
+ "@ai-sdk/groq": "^1.2.9",
35
+ "@ai-sdk/mistral": "^2.0.23",
36
+ "@ai-sdk/openai": "^2.0.0-alpha.3",
37
+ "@emotion/react": "^11.14.0",
38
+ "@emotion/styled": "^11.14.0",
39
+ "@huggingface/transformers": "^4.2.0",
40
+ "@mui/icons-material": "^6.4.10",
41
+ "@mui/material": "^6.4.11",
42
+ "@reduxjs/toolkit": "^2.8.2",
43
+ "ai": "^5.0.86",
44
+ "pdfjs-dist": "^5.4.296",
45
+ "react": "^18.3.1",
46
+ "react-dom": "^18.3.1",
47
+ "react-pdf": "^10.3.0",
48
+ "react-redux": "^9.2.0",
49
+ "react-router-dom": "^7.5.0"
50
+ },
51
+ "devDependencies": {
52
+ "@types/node": "^25.1.0",
53
+ "@types/react": "^18.3.18",
54
+ "@types/react-dom": "^18.3.5",
55
+ "@vitejs/plugin-react": "^4.3.4",
56
+ "typescript": "~5.6.2",
57
+ "vite": "^6.0.5",
58
+ "vite-plugin-static-copy": "^4.1.0"
59
+ },
60
+ "publishConfig": {
61
+ "access": "public"
62
+ }
63
+ }