@sjovanovic/recall.js 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +92 -89
- package/package.json +7 -7
- package/recall.js +351 -375
- package/utils/sanitize.js +34 -0
package/README.md
CHANGED
|
@@ -4,15 +4,11 @@
|
|
|
4
4
|
<img alt="Recall.js is long term memory for AI apps!" src="logo.svg" />
|
|
5
5
|
</p>
|
|
6
6
|
|
|
7
|
-
Recall.js
|
|
7
|
+
Recall.js provides long‑term memory for AI applications. It is a JavaScript library and command‑line tool for building Retrieval‑Augmented Generation (RAG) systems, with a focus on speed, ease of use, and embeddability.
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
Beyond RAG, recall.js can be used for generic semantic search, as expert memory for your AI app, or as a recommendation system. It supports multilingual embeddings out of the box, allowing you to add data in one language and query it in another.
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
Recall.js supports multilingual embeddings out of the box so you can add data in one language and then query it in another.
|
|
14
|
-
|
|
15
|
-
Under the hood, recall.js uses sentence vector embeddings and a vector database to index and query your data. It is a light wrapper around local language models such as [MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) and [CozoDB](https://www.cozodb.org/) vector database.
|
|
11
|
+
Under the hood, recall.js uses [Transformers.js](https://huggingface.co/docs/transformers.js/index) for feature extraction and a vector database (powered by [CozoDB](https://www.cozodb.org/)) for indexing and querying. It is a lightweight wrapper around local language models such as [Multilingual-MiniLM-L12-v2](https://huggingface.co/Xenova/paraphrase-multilingual-MiniLM-L12-v2).
|
|
16
12
|
|
|
17
13
|
## Install
|
|
18
14
|
|
|
@@ -20,60 +16,57 @@ Under the hood, recall.js uses sentence vector embeddings and a vector database
|
|
|
20
16
|
|
|
21
17
|
## Usage
|
|
22
18
|
|
|
23
|
-
|
|
19
|
+
### Command Line
|
|
24
20
|
|
|
25
21
|
```console
|
|
26
22
|
recall --add 'The quick brown fox jumps over the lazy dog|Fox|{"foo":"bar"}'
|
|
27
23
|
recall --query "Un animal saute par-dessus un autre animal" --limit 1
|
|
28
24
|
```
|
|
29
|
-
**
|
|
25
|
+
> **Note:** When the library is used for the first time, it will download a local language model (Multilingual-MiniLM-L12-v2). This may take a while depending on your internet connection. Please be patient.
|
|
30
26
|
|
|
31
|
-
|
|
27
|
+
### JavaScript
|
|
32
28
|
|
|
33
29
|
```javascript
|
|
34
30
|
|
|
35
|
-
import
|
|
31
|
+
import Recall from '@sjovanovic/recall.js'
|
|
36
32
|
|
|
37
33
|
const testRecall = async () => {
|
|
38
|
-
await RECALL.addBatch([
|
|
39
|
-
{
|
|
40
|
-
input: "The quick brown fox jumps over the lazy dog",
|
|
41
|
-
result: "Fox and dog",
|
|
42
|
-
data: { foo: "bar" }
|
|
43
|
-
}
|
|
44
|
-
])
|
|
45
|
-
|
|
46
|
-
// Semantic search query in different language (French) "Animal jumps over another animal"
|
|
47
|
-
let response = await RECALL.searchText("Un animal saute par-dessus un autre animal", 1)
|
|
48
|
-
console.log(response)
|
|
49
|
-
}
|
|
50
|
-
testRecall()
|
|
51
34
|
|
|
52
|
-
|
|
35
|
+
let config = {
|
|
36
|
+
SHOW_PROGRESS: true
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let recall = new Recall(config)
|
|
53
40
|
|
|
54
|
-
|
|
41
|
+
await recall.addBatch([
|
|
42
|
+
{
|
|
43
|
+
input: "The quick brown fox jumps over the lazy dog",
|
|
44
|
+
result: "Fox and dog",
|
|
45
|
+
data: { foo: "bar" }
|
|
46
|
+
}
|
|
47
|
+
])
|
|
55
48
|
|
|
49
|
+
// Semantic search query in different language (French) "Animal jumps over another animal"
|
|
50
|
+
let response = await recall.searchText("Un animal saute par-dessus un autre animal", 1)
|
|
51
|
+
console.log(response)
|
|
52
|
+
}
|
|
53
|
+
testRecall()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Example response:**
|
|
57
|
+
```json
|
|
56
58
|
{
|
|
57
|
-
"headers": [
|
|
58
|
-
"dist",
|
|
59
|
-
"result",
|
|
60
|
-
"id",
|
|
61
|
-
"data"
|
|
62
|
-
],
|
|
59
|
+
"headers": ["dist", "result", "id", "data", "category"],
|
|
63
60
|
"rows": [
|
|
64
61
|
[
|
|
65
|
-
0.
|
|
62
|
+
0.6840495824813843,
|
|
66
63
|
"Fox and dog",
|
|
67
64
|
"08840189191373282",
|
|
68
|
-
{
|
|
69
|
-
|
|
70
|
-
}
|
|
65
|
+
{ "foo": "bar" },
|
|
66
|
+
""
|
|
71
67
|
]
|
|
72
68
|
]
|
|
73
69
|
}
|
|
74
|
-
|
|
75
|
-
*/
|
|
76
|
-
|
|
77
70
|
```
|
|
78
71
|
|
|
79
72
|
## Options
|
|
@@ -87,17 +80,17 @@ Usage:
|
|
|
87
80
|
recall --query "Foo Bar"
|
|
88
81
|
|
|
89
82
|
Options:
|
|
90
|
-
--query "SEARCH_STRING"
|
|
91
|
-
--limit
|
|
92
|
-
--add 'input|result|{"foo":"bar"}'
|
|
93
|
-
--remove 'id'
|
|
94
|
-
--nuke
|
|
95
|
-
--
|
|
96
|
-
--
|
|
97
|
-
--
|
|
98
|
-
--
|
|
99
|
-
--
|
|
100
|
-
--
|
|
83
|
+
--query "SEARCH_STRING" - Search the database
|
|
84
|
+
--limit N - Limit number of results (used with --query).
|
|
85
|
+
--add 'input|result|{"foo":"bar"}|categ' - Add a data entry.
|
|
86
|
+
--remove 'id' - Remove data by ID.
|
|
87
|
+
--nuke - Destroy the database.
|
|
88
|
+
--db "FILE_NAME" - Specify database file (SQLite).
|
|
89
|
+
--import "file.csv | file.tsv" - Import from CSV or TSV with columns: input, result, additional data.
|
|
90
|
+
--input-header "foo" - When used with --import, designate a specific header column as input.
|
|
91
|
+
--result-header "bar" - When used with --import, designate a specific header column as result.
|
|
92
|
+
--json "FILE_NAME" - Import from a file with one JSON object per line: {input:"", result:"", data:{}}.
|
|
93
|
+
--category "CATEGORY" - Specify category when adding data and filter by it when querying (defaults to empty string).
|
|
101
94
|
```
|
|
102
95
|
|
|
103
96
|
**Note:** when adding data recall will generate unique id automatically. To set custom id add it as a string property named "id" in the data object (i.e. `{"id":"customID"}`).
|
|
@@ -105,69 +98,79 @@ Options:
|
|
|
105
98
|
|
|
106
99
|
## JavaScript API Reference
|
|
107
100
|
|
|
108
|
-
###
|
|
101
|
+
### Configuration
|
|
109
102
|
|
|
110
|
-
|
|
103
|
+
The default configuration object is exported as config:
|
|
111
104
|
|
|
112
105
|
```javascript
|
|
113
106
|
export const config = {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
107
|
+
VECTOR_SIZE: 384, // Number of dimensions (must match the model's output)
|
|
108
|
+
MODEL_NAME: 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', // Model name for Transformers.js
|
|
109
|
+
SHOW_ERRORS: true, // Show error messages
|
|
110
|
+
DB_FILE: join(PATH, 'vector.db'), // Path to the SQLite database file (used by CozoDB)
|
|
111
|
+
PATH: PATH, // Directory of recall.js
|
|
112
|
+
DEVICE: undefined, // Transformers.js device
|
|
113
|
+
DTYPE: undefined, // Transformers.js dtype
|
|
114
|
+
PROGRESS_CALLBACK: undefined // Transformers.js progress callback
|
|
119
115
|
}
|
|
120
116
|
```
|
|
121
117
|
|
|
122
|
-
###
|
|
118
|
+
### Methods
|
|
119
|
+
|
|
120
|
+
**getDb()**
|
|
123
121
|
|
|
124
|
-
Returns reference to the CozoDB instance.
|
|
122
|
+
Returns reference to the underlying CozoDB instance.
|
|
125
123
|
|
|
126
|
-
|
|
124
|
+
**getEmbeddings(text) -> Promise<Array>**
|
|
127
125
|
|
|
128
126
|
Given text calculates the embeddings vector
|
|
129
127
|
|
|
130
|
-
|
|
128
|
+
**add(input, result, data={}, category="") -> Promise<Object>**
|
|
131
129
|
|
|
132
|
-
|
|
130
|
+
Adds a data entry.
|
|
133
131
|
|
|
134
|
-
|
|
132
|
+
- input – The sentence to generate embeddings from.
|
|
133
|
+
- result – The string to display in search results.
|
|
134
|
+
- data – Arbitrary object for additional information and references. If it contains an id property, that value will be used as the record’s unique ID.
|
|
135
|
+
- category – Optional category string.
|
|
135
136
|
|
|
136
|
-
|
|
137
|
-
`batch` is an Array that looks like this:
|
|
138
|
-
```
|
|
139
|
-
let batch = [{input:"", result:"", data:{}}]
|
|
140
|
-
```
|
|
137
|
+
**addBatch(batch) -> Promise<Object>**
|
|
141
138
|
|
|
142
|
-
|
|
139
|
+
Adds multiple entries in a batch (more efficient than repeated add calls).
|
|
140
|
+
batch is an array of objects with the same structure as add:
|
|
141
|
+
```javascript
|
|
142
|
+
let batch = [
|
|
143
|
+
{ input: "", result: "", data: {}, category: "" }
|
|
144
|
+
]
|
|
145
|
+
```
|
|
143
146
|
|
|
144
|
-
|
|
147
|
+
**remove(id) -> Promise<Object>**
|
|
145
148
|
|
|
146
|
-
|
|
149
|
+
Removes the record with the specified ID (string).
|
|
147
150
|
|
|
148
|
-
|
|
151
|
+
**searchText(text, category="", numResults = 5, includeInput=false) -> Promise<Object>**
|
|
149
152
|
|
|
150
|
-
|
|
153
|
+
Queries the vector database.
|
|
151
154
|
|
|
152
|
-
|
|
155
|
+
- text – The query text.
|
|
156
|
+
- category – Optional category filter.
|
|
157
|
+
- numResults – Number of results to return.
|
|
158
|
+
- includeInput – If true, the original input text is included in the response.
|
|
153
159
|
|
|
154
|
-
|
|
160
|
+
**nuke()**
|
|
155
161
|
|
|
156
|
-
|
|
157
|
-
```
|
|
158
|
-
{input:"one", result:"one result", data:{"id":"123"}}
|
|
159
|
-
{input:"", result:"", data:{}}
|
|
160
|
-
...
|
|
161
|
-
```
|
|
162
|
-
This is the most efficient way to import data.
|
|
162
|
+
Deletes the entire database.
|
|
163
163
|
|
|
164
|
-
|
|
164
|
+
**importFromJSONStream(fileName) -> Promise<Object>**
|
|
165
165
|
|
|
166
|
-
Imports from
|
|
167
|
-
|
|
168
|
-
|
|
166
|
+
Imports data from a readable stream or file containing one JSON object per line (JSONL). Example line format:
|
|
167
|
+
```json
|
|
168
|
+
{input:"one", result:"one result", data:{"id":"123"}, category:""}
|
|
169
|
+
```
|
|
170
|
+
This is the most efficient import method.
|
|
169
171
|
|
|
170
|
-
|
|
172
|
+
**importFromCSVorTSV(fileName, inputHeader=null, resultHeader=null) -> Promise<Object>**
|
|
171
173
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
+
Imports data from a CSV or TSV file. By default, the first column is used as input, the second as result, and the remaining columns are merged into the data object.
|
|
175
|
+
If `inputHeader` is specified, the function looks for a column with that name and uses it as input.
|
|
176
|
+
If `resultHeader` is specified, it looks for a column with that name and uses it as result.
|
package/package.json
CHANGED
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjovanovic/recall.js",
|
|
3
|
-
"version": "1.0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "1.0.5",
|
|
4
|
+
"description": "Easy RAG with semantic search and long term memory",
|
|
5
5
|
"main": "recall.js",
|
|
6
6
|
"bin": {
|
|
7
7
|
"recall": "recall.js"
|
|
8
8
|
},
|
|
9
9
|
"type": "module",
|
|
10
10
|
"scripts": {
|
|
11
|
-
"
|
|
11
|
+
"start": "node recall.js",
|
|
12
|
+
"test": "node recall.js --test",
|
|
12
13
|
"query": "node recall.js --query "
|
|
13
14
|
},
|
|
14
15
|
"author": "Slobodan Jovanovic",
|
|
15
16
|
"license": "ISC",
|
|
16
17
|
"dependencies": {
|
|
17
|
-
"@
|
|
18
|
-
"@
|
|
19
|
-
"@xenova/transformers": "^2.17.2",
|
|
18
|
+
"@huggingface/transformers": "^4.2.0",
|
|
19
|
+
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
20
20
|
"cozo-node": "^0.7.6",
|
|
21
21
|
"csv-parser": "^3.2.0",
|
|
22
|
-
"zod": "^3.
|
|
22
|
+
"zod": "^4.3.6"
|
|
23
23
|
}
|
|
24
24
|
}
|
package/recall.js
CHANGED
|
@@ -1,429 +1,409 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {CozoDb} from 'cozo-node'
|
|
3
|
-
import
|
|
3
|
+
import { pipeline } from "@huggingface/transformers";
|
|
4
4
|
import csv from 'csv-parser'
|
|
5
5
|
import fs from 'fs'
|
|
6
6
|
import { resolve, join, dirname, sep } from 'path'
|
|
7
7
|
import { fileURLToPath } from 'url'
|
|
8
8
|
|
|
9
|
-
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
10
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
11
|
-
import { z } from "zod";
|
|
12
|
-
|
|
13
9
|
const pathToThisFile = resolve(fileURLToPath(import.meta.url))
|
|
14
10
|
const pathPassedToNode = resolve(process.argv[1])
|
|
15
11
|
const isThisFileBeingRunViaCLI = pathToThisFile.includes(pathPassedToNode) || pathPassedToNode.includes('.npm-global')
|
|
16
12
|
const PATH = dirname(pathToThisFile)
|
|
17
13
|
|
|
18
14
|
export const config = {
|
|
19
|
-
VECTOR_SIZE: 384, // number of dimensions
|
|
20
|
-
MODEL_NAME: 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', // model to use
|
|
15
|
+
VECTOR_SIZE: 384, // number of dimensions (must match the models output)
|
|
16
|
+
MODEL_NAME: 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', // model to use (passed to Transformers.js)
|
|
21
17
|
SHOW_ERRORS: true, // Show errors
|
|
18
|
+
SHOW_PROGRESS: false, // Show model loading progress in the console
|
|
22
19
|
DB_FILE: join(PATH, 'vector.db'), // Path to the datbase file (SQLite file used by CozoDB)
|
|
23
|
-
PATH: PATH // directory of recall.js
|
|
20
|
+
PATH: PATH, // directory of recall.js
|
|
21
|
+
DEVICE: undefined, // Transformers.js device
|
|
22
|
+
DTYPE: undefined, // Transformers.js dtype
|
|
23
|
+
PROGRESS_CALLBACK: undefined // Transformers.js progress_callback
|
|
24
24
|
}
|
|
25
|
+
var recal_instance = null
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
export class Recall {
|
|
28
|
+
constructor(opts = {}){
|
|
29
|
+
this.opts = {
|
|
30
|
+
...config,
|
|
31
|
+
...opts
|
|
32
|
+
}
|
|
33
|
+
this.initDone = false
|
|
34
|
+
this.db = new CozoDb('sqlite', this.opts.DB_FILE)
|
|
35
|
+
}
|
|
36
|
+
async printQuery(query, params = {}) {
|
|
37
|
+
try{
|
|
38
|
+
if(!this.initDone) {
|
|
39
|
+
this.initDone = true
|
|
40
|
+
await this.createTable()
|
|
41
|
+
}
|
|
42
|
+
}catch(err) {}
|
|
43
|
+
try {
|
|
44
|
+
let data = this.db.run(query, params)
|
|
45
|
+
return data
|
|
46
|
+
}catch(err){
|
|
47
|
+
if(this.opts.SHOW_ERRORS) console.error(err.display || err.message)
|
|
48
|
+
}
|
|
31
49
|
}
|
|
32
|
-
return db
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
async function printQuery(query, params = {}) {
|
|
36
50
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
await
|
|
51
|
+
async getEmbeddings(text){
|
|
52
|
+
let pipe = this.opts._pipe
|
|
53
|
+
if(!pipe) {
|
|
54
|
+
this.opts._pipe = await pipeline("feature-extraction", this.opts.MODEL_NAME, {
|
|
55
|
+
progress_callback:(progress) => {
|
|
56
|
+
if(this.opts.PROGRESS_CALLBACK) return this.opts.PROGRESS_CALLBACK(progress);
|
|
57
|
+
if(this.opts.SHOW_PROGRESS && progress.status === "progress_total"){
|
|
58
|
+
process.stdout.write(`\r\x1b[K✅ Loaded ${ Math.round(progress.progress)}% ${progress.name || "model"}`)
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
device: this.opts.DEVICE,
|
|
62
|
+
dtype: this.opts.DTYPE
|
|
63
|
+
});
|
|
64
|
+
pipe = this.opts._pipe
|
|
41
65
|
}
|
|
42
|
-
|
|
43
|
-
|
|
66
|
+
const embedding = await pipe(text, { pooling: "mean", normalize: true });
|
|
67
|
+
return Array.from(embedding.data)
|
|
44
68
|
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if(
|
|
69
|
+
|
|
70
|
+
async createTable() {
|
|
71
|
+
// create table
|
|
72
|
+
let tableCreated = await this.printQuery(`:create embeddings {id: String, category: String => v: <F32; ${this.opts.VECTOR_SIZE}>, input: String, result: String, data: Json}`)
|
|
73
|
+
if(tableCreated){
|
|
74
|
+
// create vector index
|
|
75
|
+
let indexCreated = await printQuery(`::hnsw create embeddings:index_name {
|
|
76
|
+
dim: ${this.opts.VECTOR_SIZE},
|
|
77
|
+
m: 50,
|
|
78
|
+
dtype: F32,
|
|
79
|
+
fields: [v],
|
|
80
|
+
distance: L2, # Cosine, IP
|
|
81
|
+
ef_construction:50, # number of nearest neighbors
|
|
82
|
+
extend_candidates: false, # include nearest neighbors of the nearest neighbors
|
|
83
|
+
keep_pruned_connections: false,
|
|
84
|
+
}`)
|
|
85
|
+
return tableCreated && indexCreated
|
|
86
|
+
}
|
|
87
|
+
return false
|
|
50
88
|
}
|
|
51
|
-
}
|
|
52
89
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
}
|
|
90
|
+
async add(input, result, data={}, category="") {
|
|
91
|
+
if(!input || !result) return
|
|
92
|
+
input = this.sanitizeString(input)
|
|
93
|
+
result = this.sanitizeString(result)
|
|
94
|
+
const embedding = await this.getEmbeddings(input)
|
|
95
|
+
let id = data.id || Math.random().toString().substring(2)
|
|
96
|
+
return await printQuery(`?[id, v, input, result, data, category] <- [["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input.replaceAll('"', "'"))}, ${JSON.stringify(result.replaceAll('"', "'"))}, ${JSON.stringify(data)}, ${JSON.stringify(category.replaceAll('"', "'"))} ]]
|
|
97
|
+
:put embeddings {id, category => v, input, result, data}
|
|
98
|
+
`)
|
|
99
|
+
}
|
|
61
100
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
let tableCreated = await printQuery(`:create embeddings {id: String, category: String => v: <F32; ${config.VECTOR_SIZE}>, input: String, result: String, data: Json}`)
|
|
65
|
-
if(tableCreated){
|
|
66
|
-
// create index
|
|
67
|
-
let indexCreated = await printQuery(`::hnsw create embeddings:index_name {
|
|
68
|
-
dim: ${config.VECTOR_SIZE},
|
|
69
|
-
m: 50,
|
|
70
|
-
dtype: F32,
|
|
71
|
-
fields: [v],
|
|
72
|
-
distance: L2, # Cosine, IP
|
|
73
|
-
ef_construction:50, # number of nearest neighbors
|
|
74
|
-
extend_candidates: false, # include nearest neighbors of the nearest neighbors
|
|
75
|
-
keep_pruned_connections: false,
|
|
76
|
-
}`)
|
|
77
|
-
return tableCreated && indexCreated
|
|
101
|
+
sanitizeString(str){
|
|
102
|
+
return str.replace(/[\/#$%\^&\*{}=_`~()\"]/g," ").replace(/\s{2,}/g, " ").trim()
|
|
78
103
|
}
|
|
79
|
-
return false
|
|
80
|
-
}
|
|
81
104
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
105
|
+
/**
|
|
106
|
+
*
|
|
107
|
+
* Batch array:
|
|
108
|
+
* [{input:"", result:"", data:{}}]
|
|
109
|
+
*
|
|
110
|
+
* @param {Array} batch
|
|
111
|
+
* @returns
|
|
112
|
+
*/
|
|
113
|
+
async addBatch(batch, opts={onProgress:null}) {
|
|
114
|
+
if(!batch || !Array.isArray(batch)) return
|
|
115
|
+
let vectorBatch = []
|
|
116
|
+
for(let i=0;i<batch.length; i++){
|
|
117
|
+
let {input, result, data, category} = batch[i]
|
|
118
|
+
|
|
119
|
+
if(!input || !result) continue
|
|
120
|
+
if(!data) data = {}
|
|
121
|
+
if(!category) category = ''
|
|
122
|
+
const embedding = await this.getEmbeddings(input)
|
|
123
|
+
batch[i].embedding = embedding
|
|
124
|
+
let item = ''
|
|
125
|
+
if(i == 0) {
|
|
126
|
+
item += `?[id, v, input, result, data, category] <- [`
|
|
127
|
+
}
|
|
93
128
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
* Batch array:
|
|
97
|
-
* [{input:"", result:"", data:{}}]
|
|
98
|
-
*
|
|
99
|
-
* @param {Array} batch
|
|
100
|
-
* @returns
|
|
101
|
-
*/
|
|
102
|
-
export const addBatch = async (batch) => {
|
|
103
|
-
if(!batch || !Array.isArray(batch)) return
|
|
104
|
-
let vectorBatch = []
|
|
105
|
-
for(let i=0;i<batch.length; i++){
|
|
106
|
-
let {input, result, data, category} = batch[i]
|
|
107
|
-
|
|
108
|
-
if(!input || !result) continue
|
|
109
|
-
if(!data) data = {}
|
|
110
|
-
if(!category) category = ''
|
|
111
|
-
const embedding = await getEmbeddings(input)
|
|
112
|
-
batch[i].embedding = embedding
|
|
113
|
-
let item = ''
|
|
114
|
-
if(i == 0) {
|
|
115
|
-
item += `?[id, v, input, result, data, category] <- [`
|
|
116
|
-
}
|
|
129
|
+
input = this.sanitizeString(input)
|
|
130
|
+
result = this.sanitizeString(result)
|
|
117
131
|
|
|
118
|
-
|
|
119
|
-
|
|
132
|
+
let id = data?.id ? data.id : Math.random().toString().substring(2)
|
|
133
|
+
item += `["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input)}, ${JSON.stringify(result)}, ${JSON.stringify(data)}, ${JSON.stringify(category)} ],`
|
|
134
|
+
if(i == batch.length-1) {
|
|
135
|
+
item += `]
|
|
136
|
+
:put embeddings {id, category => v, input, result, data}`
|
|
137
|
+
}
|
|
138
|
+
vectorBatch.push(item)
|
|
120
139
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
item += `]
|
|
125
|
-
:put embeddings {id, category => v, input, result, data}`
|
|
140
|
+
if(opts.onProgress && typeof opts.onProgress == 'function') {
|
|
141
|
+
await opts.onProgress({index: i+1, total:batch.length, item: batch[i], embedding, percent: Math.round((i+1) / batch.length * 100)})
|
|
142
|
+
}
|
|
126
143
|
}
|
|
127
|
-
vectorBatch.
|
|
144
|
+
return await this.printQuery(vectorBatch.join("\n"))
|
|
128
145
|
}
|
|
129
|
-
return await printQuery(vectorBatch.join("\n"))
|
|
130
|
-
}
|
|
131
146
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
}, q = vec(${JSON.stringify(embedding)})
|
|
156
|
-
:sort -dist`)
|
|
157
|
-
return results
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
export const vectorSearch = async (query, category='', numResults=5) => {
|
|
161
|
-
let result = undefined
|
|
162
|
-
try{
|
|
163
|
-
result = await searchText(query, category, numResults)
|
|
164
|
-
}catch(err){
|
|
165
|
-
if(config.SHOW_ERRORS) console.error(err.display || err.message)
|
|
147
|
+
async remove(id, category="") {
|
|
148
|
+
if(!id || typeof id != 'string') return
|
|
149
|
+
id = id.replace(/[^a-zA-Z0-9]/g, '')
|
|
150
|
+
category = this.sanitizeString(category)
|
|
151
|
+
if(!id || !category) return
|
|
152
|
+
let results = await this.printQuery(
|
|
153
|
+
`?[id, category] <- [['${id}', '${category}']]
|
|
154
|
+
::rm embeddings {id, category}`)
|
|
155
|
+
return results
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async removeAllByCategory(category=""){
|
|
159
|
+
category = this.sanitizeString(category)
|
|
160
|
+
if(!category) return
|
|
161
|
+
let results
|
|
162
|
+
try {
|
|
163
|
+
results = await this.printQuery(
|
|
164
|
+
`?[id, category] := *embeddings{id, category}, category = "${category}"
|
|
165
|
+
:rm embeddings {id, category}`)
|
|
166
|
+
}catch(err){
|
|
167
|
+
console.error(err)
|
|
168
|
+
}
|
|
169
|
+
return results
|
|
166
170
|
}
|
|
167
|
-
return result
|
|
168
|
-
}
|
|
169
171
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
172
|
+
async searchText(text, category="", numResults = 5, includeInput=false) {
|
|
173
|
+
const embedding = await this.getEmbeddings(text)
|
|
174
|
+
let results = await this.printQuery(`?[dist, result, id, data, category${includeInput? ', input' : ''}] := ~embeddings:index_name { id, v, input, result, data, category${includeInput? ', input' : ''} |
|
|
175
|
+
query: q,
|
|
176
|
+
k: ${numResults}, # number of results
|
|
177
|
+
ef: 50, # number of neighbours to consider
|
|
178
|
+
bind_distance: dist,
|
|
179
|
+
filter: category==${JSON.stringify(category)},
|
|
180
|
+
radius: 10.0
|
|
181
|
+
}, q = vec(${JSON.stringify(embedding)})
|
|
182
|
+
:sort dist`)
|
|
183
|
+
return results
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
async vectorSearch(query, category='', numResults=5) {
|
|
187
|
+
let result = undefined
|
|
188
|
+
try{
|
|
189
|
+
result = await this.searchText(query, category, numResults)
|
|
190
|
+
}catch(err){
|
|
191
|
+
if(config.SHOW_ERRORS) console.error(err.display || err.message)
|
|
181
192
|
}
|
|
193
|
+
return result
|
|
182
194
|
}
|
|
183
|
-
args._cmd = process.argv[1].split(sep).pop()
|
|
184
|
-
return args
|
|
185
|
-
}
|
|
186
195
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
}
|
|
196
|
+
nuke() {
|
|
197
|
+
return fs.unlinkSync(this.opts.DB_FILE)
|
|
198
|
+
}
|
|
190
199
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
//console.error(err)
|
|
200
|
+
async importFromJSONStream(fileName) {
|
|
201
|
+
async function jsonStream(readable, callback = async function(){}) {
|
|
202
|
+
readable.setEncoding('utf8');
|
|
203
|
+
let data = '';
|
|
204
|
+
for await (const chunk of readable) {
|
|
205
|
+
if(chunk.indexOf("\n")) {
|
|
206
|
+
pts = chunk.split("\n")
|
|
207
|
+
for(let i=0;i<pts.length; i++){
|
|
208
|
+
data += pts[i]
|
|
209
|
+
try {
|
|
210
|
+
let json = JSON.parse(data)
|
|
211
|
+
await callback(json)
|
|
212
|
+
json = null
|
|
213
|
+
data = ''
|
|
214
|
+
}catch(err) {}
|
|
207
215
|
}
|
|
216
|
+
}else{
|
|
217
|
+
data += chunk;
|
|
208
218
|
}
|
|
209
|
-
}else{
|
|
210
|
-
data += chunk;
|
|
211
219
|
}
|
|
212
220
|
}
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
221
|
+
let batchSize = 40, batch = [], i=0, currentBatch = 0
|
|
222
|
+
let stream = typeof fileName == 'string' ? fs.createReadStream(fileName) : fileName
|
|
223
|
+
await jsonStream(stream, async (json) => {
|
|
224
|
+
if(json.input && json.result){
|
|
225
|
+
if(!json.data) json.data = {}
|
|
226
|
+
if(i % batchSize === 0){
|
|
227
|
+
if(batch.length) {
|
|
228
|
+
currentBatch = currentBatch + 1
|
|
229
|
+
console.log(`Adding batch ${currentBatch} (${batch.length} items)`)
|
|
230
|
+
await this.addBatch(batch)
|
|
231
|
+
batch = []
|
|
232
|
+
}
|
|
225
233
|
}
|
|
234
|
+
batch.push(json)
|
|
235
|
+
i=i+1
|
|
226
236
|
}
|
|
227
|
-
|
|
228
|
-
|
|
237
|
+
})
|
|
238
|
+
if(batch.length) {
|
|
239
|
+
console.log(`Adding batch ${currentBatch + 1} (${batch.length} items)`)
|
|
240
|
+
await this.addBatch(batch)
|
|
229
241
|
}
|
|
230
|
-
})
|
|
231
|
-
if(batch.length) {
|
|
232
|
-
console.log(`Adding batch ${currentBatch + 1} (${batch.length} items)`)
|
|
233
|
-
await addBatch(batch)
|
|
234
242
|
}
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
243
|
+
|
|
244
|
+
async importFromCSVorTSV(fileName, inputHeader, resultHeader) {
|
|
245
|
+
if(!fileName || !fileName.includes('.')) return
|
|
246
|
+
let ext = fileName.split('.').pop()
|
|
247
|
+
ext = ext.toLowerCase()
|
|
248
|
+
if(ext != 'csv' && ext != 'tsv') return console.log('File must have csv or tsv extension')
|
|
249
|
+
let parseOpts = {
|
|
250
|
+
separator: ext == 'tsv' ? '\t' : ',',
|
|
251
|
+
mapHeaders: ({ header, index }) => {
|
|
252
|
+
if(inputHeader) {
|
|
253
|
+
if(inputHeader == header){
|
|
254
|
+
return 'input'
|
|
255
|
+
}
|
|
256
|
+
}else if(index === 0){
|
|
247
257
|
return 'input'
|
|
248
258
|
}
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
if(
|
|
259
|
+
if(resultHeader){
|
|
260
|
+
if(resultHeader == header){
|
|
261
|
+
return 'result'
|
|
262
|
+
}
|
|
263
|
+
}else if(index === 1){
|
|
254
264
|
return 'result'
|
|
255
265
|
}
|
|
256
|
-
|
|
257
|
-
return 'result'
|
|
266
|
+
return header.replaceAll(/\W/gi, '_').replaceAll(/[^a-zA-Z0-9\_]/g, '').toLowerCase()
|
|
258
267
|
}
|
|
259
|
-
return header.replaceAll(/\W/gi, '_').replaceAll(/[^a-zA-Z0-9\_]/g, '').toLowerCase()
|
|
260
268
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
+
let fetchFromFile = async (fileName) => {
|
|
270
|
+
return new Promise(async (resolve, reject)=>{
|
|
271
|
+
let results = []
|
|
272
|
+
fs.createReadStream(fileName)
|
|
273
|
+
.pipe(csv(parseOpts))
|
|
274
|
+
.on('data', async (data) => {
|
|
275
|
+
results.push(data)
|
|
276
|
+
})
|
|
277
|
+
.on('end', () => {
|
|
278
|
+
console.log(`${fileName} loaded.`);
|
|
279
|
+
resolve(results)
|
|
280
|
+
}).on('error', (err) => {
|
|
281
|
+
console.error(err);
|
|
282
|
+
})
|
|
269
283
|
})
|
|
270
|
-
|
|
271
|
-
console.log(`${fileName} loaded.`);
|
|
272
|
-
resolve(results)
|
|
273
|
-
}).on('error', (err) => {
|
|
274
|
-
console.error(err);
|
|
275
|
-
})
|
|
276
|
-
})
|
|
277
|
-
}
|
|
278
|
-
|
|
284
|
+
}
|
|
279
285
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
286
|
+
let results = await fetchFromFile(fileName)
|
|
287
|
+
|
|
288
|
+
let batchSize = 40, batch = [], currentBatch = 0, totalBatches = Math.ceil(results.length / batchSize), dataHeaders = Object.keys(results[results.length-1]).filter(k => k != 'input' && k != 'result'), data
|
|
289
|
+
for(let i=0; i<results.length; i++){
|
|
290
|
+
if(i % batchSize === 0){
|
|
291
|
+
if(batch.length) {
|
|
292
|
+
currentBatch = currentBatch + 1
|
|
293
|
+
console.log(`Adding batch ${currentBatch} of ${totalBatches} (${batch.length} items)`)
|
|
294
|
+
await this.addBatch(batch)
|
|
295
|
+
batch = []
|
|
296
|
+
}
|
|
290
297
|
}
|
|
298
|
+
data = {}
|
|
299
|
+
dataHeaders.forEach(k => k && results[i][k] ? data[k] = results[i][k] : null)
|
|
300
|
+
batch.push({
|
|
301
|
+
input: results[i].input,
|
|
302
|
+
result: results[i].result,
|
|
303
|
+
data
|
|
304
|
+
})
|
|
305
|
+
}
|
|
306
|
+
if(batch.length) {
|
|
307
|
+
console.log(`Adding batch ${currentBatch + 1} of ${totalBatches} (${batch.length} items)`)
|
|
308
|
+
await this.addBatch(batch)
|
|
291
309
|
}
|
|
292
|
-
data = {}
|
|
293
|
-
dataHeaders.forEach(k => k && results[i][k] ? data[k] = results[i][k] : null)
|
|
294
|
-
batch.push({
|
|
295
|
-
input: results[i].input,
|
|
296
|
-
result: results[i].result,
|
|
297
|
-
data
|
|
298
|
-
})
|
|
299
|
-
}
|
|
300
|
-
if(batch.length) {
|
|
301
|
-
console.log(`Adding batch ${currentBatch + 1} of ${totalBatches} (${batch.length} items)`)
|
|
302
|
-
await addBatch(batch)
|
|
303
310
|
}
|
|
311
|
+
|
|
304
312
|
}
|
|
305
313
|
|
|
306
|
-
const
|
|
314
|
+
export const getDb = () => {
|
|
315
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
316
|
+
return recal_instance.db
|
|
317
|
+
}
|
|
307
318
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
// });
|
|
313
|
-
|
|
314
|
-
// // Add an addition tool
|
|
315
|
-
// server.tool("add",
|
|
316
|
-
// { a: z.number(), b: z.number() },
|
|
317
|
-
// async ({ a, b }) => ({
|
|
318
|
-
// content: [{ type: "text", text: String(a + b) }]
|
|
319
|
-
// })
|
|
320
|
-
// );
|
|
321
|
-
|
|
322
|
-
// // Add a dynamic greeting resource
|
|
323
|
-
// server.resource(
|
|
324
|
-
// "greeting",
|
|
325
|
-
// new ResourceTemplate("greeting://{name}", { list: undefined }),
|
|
326
|
-
// async (uri, { name }) => ({
|
|
327
|
-
// contents: [{
|
|
328
|
-
// uri: uri.href,
|
|
329
|
-
// text: `Hello, ${name}!`
|
|
330
|
-
// }]
|
|
331
|
-
// })
|
|
332
|
-
// );
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
const server = new McpServer({
|
|
336
|
-
name: "Recall",
|
|
337
|
-
description: "Recall provides semantic search on the local vector database.",
|
|
338
|
-
version: "1.0.0"
|
|
339
|
-
});
|
|
340
|
-
|
|
341
|
-
// server.resource(
|
|
342
|
-
// "echo",
|
|
343
|
-
// new ResourceTemplate("echo://{message}", { list: undefined }),
|
|
344
|
-
// async (uri, { message }) => ({
|
|
345
|
-
// contents: [{
|
|
346
|
-
// uri: uri.href,
|
|
347
|
-
// text: `Resource echo: ${message}`
|
|
348
|
-
// }]
|
|
349
|
-
// })
|
|
350
|
-
// );
|
|
351
|
-
|
|
352
|
-
server.tool(
|
|
353
|
-
"search",
|
|
354
|
-
{
|
|
355
|
-
text: z.string(),
|
|
356
|
-
//numberOfResults: z.number()
|
|
357
|
-
},
|
|
358
|
-
async ({ text, numberOfResults }) => {
|
|
359
|
-
if(numberOfResults && numberOfResults > 50) numberOfResults = 50
|
|
360
|
-
|
|
361
|
-
let startTime = performance.now()
|
|
362
|
-
let results = await searchText(text, numberOfResults)
|
|
363
|
-
var timeDiff = ((performance.now() - startTime) / 1000).toFixed(2)
|
|
364
|
-
let content = [
|
|
365
|
-
{
|
|
366
|
-
type: "text",
|
|
367
|
-
text: `Sorry. Recal search didn't find anything.`
|
|
368
|
-
}
|
|
369
|
-
]
|
|
370
|
-
if(results && results.rows && results.rows.length) {
|
|
371
|
-
// content = results.rows.map(r => {
|
|
372
|
-
// return {
|
|
373
|
-
// type: "text",
|
|
374
|
-
// text: r[1]
|
|
375
|
-
// }
|
|
376
|
-
// })
|
|
377
|
-
content = [{
|
|
378
|
-
type: "text",
|
|
379
|
-
text: `Recal search found the following results in ${timeDiff}s:`
|
|
380
|
-
}]
|
|
381
|
-
for(let i=0; i<results.rows.length; i++){
|
|
382
|
-
let row = results.rows[i]
|
|
383
|
-
content.push({
|
|
384
|
-
type: "text",
|
|
385
|
-
text: row[1]
|
|
386
|
-
})
|
|
387
|
-
// if(results.rows[2] && Object.keys(results.rows[2])){
|
|
388
|
-
// content.push({
|
|
389
|
-
// type: "json",
|
|
390
|
-
// text: row[2]
|
|
391
|
-
// })
|
|
392
|
-
// }
|
|
393
|
-
}
|
|
394
|
-
}
|
|
319
|
+
async function printQuery(query, params = {}) {
|
|
320
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
321
|
+
return await recal_instance.printQuery(query, params)
|
|
322
|
+
}
|
|
395
323
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
await
|
|
324
|
+
export const getEmbeddings = async (text) => {
|
|
325
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
326
|
+
return await recal_instance.getEmbeddings(text)
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
export const createTable = async () => {
|
|
330
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
331
|
+
return await recal_instance.createTable()
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
export const add = async (input, result, data={}, category="") => {
|
|
335
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
336
|
+
return await recal_instance.add(input, result, data, category)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
export const addBatch = async (batch, opts={onProgress:null}) => {
|
|
340
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
341
|
+
return await recal_instance.addBatch(batch, opts)
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
export const remove = async (id, category="") => {
|
|
345
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
346
|
+
return await recal_instance.remove(id, category)
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
export const removeAllByCategory = async (category="") => {
|
|
350
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
351
|
+
return await recal_instance.removeAllByCategory(category)
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
export const searchText = async (text, category="", numResults = 5, includeInput=false) => {
|
|
355
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
356
|
+
return await recal_instance.searchText(text, category, numResults, includeInput)
|
|
419
357
|
}
|
|
420
358
|
|
|
421
|
-
const
|
|
422
|
-
|
|
359
|
+
export const vectorSearch = async (query, category='', numResults=5) => {
|
|
360
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
361
|
+
return await recal_instance.vectorSearch(query, category, numResults)
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
export const nuke = () => {
|
|
365
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
366
|
+
return recal_instance.nuke()
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
export const importFromJSONStream = async (fileName) => {
|
|
370
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
371
|
+
return await recal_instance.importFromJSONStream(fileName)
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
export const importFromCSVorTSV = async (fileName, inputHeader, resultHeader) => {
|
|
375
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
376
|
+
return await recal_instance.importFromCSVorTSV(fileName, inputHeader, resultHeader)
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async function test(){
|
|
380
|
+
let recall = new Recall()
|
|
381
|
+
recall.nuke()
|
|
382
|
+
await recall.add('The quick brown fox jumps over the lazy dog', 'Fox jumps over dog', {foo:"bar"})
|
|
383
|
+
await recall.add('History of Serbia бегинс with emperor Heraclius', 'Serbia and Roman empire', {foo:"baz"})
|
|
384
|
+
let resp = await recall.vectorSearch('Un animal saute par-dessus un autre animal')
|
|
385
|
+
return JSON.stringify(resp)
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
const cmdArgs = (list = []) => {
|
|
389
|
+
let args = {}, current = null
|
|
390
|
+
for(let i=0; i<process.argv.length; i++){
|
|
391
|
+
let val = process.argv[i]
|
|
392
|
+
if(current && !list.includes(val)){
|
|
393
|
+
args[current] = val
|
|
394
|
+
current = null
|
|
395
|
+
}
|
|
396
|
+
if(list.includes(val)) {
|
|
397
|
+
current = val
|
|
398
|
+
args[current] = ''
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
args._cmd = process.argv[1].split(sep).pop()
|
|
402
|
+
return args
|
|
423
403
|
}
|
|
424
404
|
|
|
425
405
|
const runCLI = async () => {
|
|
426
|
-
let args = cmdArgs(['--query', '-q', '--add', '--db', '--import', '--json', '--
|
|
406
|
+
let args = cmdArgs(['--query', '-q', '--add', '--db', '--import', '--json', '--nuke', '--input-header', '--result-header', '--test', '--limit', '--category'])
|
|
427
407
|
let query = args['--query'] || args['-q']
|
|
428
408
|
if(args['--db']){
|
|
429
409
|
config.DB_FILE = args['--db']
|
|
@@ -468,27 +448,23 @@ const runCLI = async () => {
|
|
|
468
448
|
}else if(args['--json']){
|
|
469
449
|
await importFromJSONStream(args['--json'])
|
|
470
450
|
console.log('Imported.')
|
|
471
|
-
}else if(args['--mcp'] != undefined){
|
|
472
|
-
await mcp()
|
|
473
|
-
console.log('MCP server running.')
|
|
474
451
|
}else if(args['--test'] != undefined){
|
|
475
452
|
console.log('Test: ', await test())
|
|
476
453
|
}else{
|
|
477
454
|
console.log('Usage:')
|
|
478
455
|
console.log(args._cmd + ' --query "Foo Bar"')
|
|
479
456
|
console.log("\n" + 'Options:')
|
|
480
|
-
console.log('--query "SEARCH_STRING" -
|
|
481
|
-
console.log('--limit
|
|
482
|
-
console.log(`--add 'input|result|{"foo":"bar"}|categ' -
|
|
483
|
-
console.log(`--remove 'id' -
|
|
484
|
-
console.log(`--nuke -
|
|
485
|
-
console.log(`--
|
|
486
|
-
console.log(`--
|
|
487
|
-
console.log(
|
|
488
|
-
console.log('--
|
|
489
|
-
console.log(
|
|
490
|
-
console.log(`--
|
|
491
|
-
console.log(`--category "CATEGORY" - specify category when adding data and to filter by when querying (defaults to empty string)`)
|
|
457
|
+
console.log('--query "SEARCH_STRING" - Search the database')
|
|
458
|
+
console.log('--limit N - Limit number of results (used with --query).')
|
|
459
|
+
console.log(`--add 'input|result|{"foo":"bar"}|categ' - Add a data entry.`)
|
|
460
|
+
console.log(`--remove 'id' - Remove data by ID.`)
|
|
461
|
+
console.log(`--nuke - Destroy the database.`)
|
|
462
|
+
console.log(`--db "FILE_NAME" - Specify database file (SQLite).`)
|
|
463
|
+
console.log(`--import "file.csv | file.tsv" - Import from CSV or TSV with columns: input, result, additional data.`)
|
|
464
|
+
console.log('--input-header "foo" - When used with --import, designate a specific header column as input.')
|
|
465
|
+
console.log('--result-header "bar" - When used with --import, designate a specific header column as result.')
|
|
466
|
+
console.log(`--json "FILE_NAME" - Import from a file with one JSON object per line: {input:"", result:"", data:{}}.`)
|
|
467
|
+
console.log(`--category "CATEGORY" - Specify category when adding data and filter by it when querying (defaults to empty string).`)
|
|
492
468
|
}
|
|
493
469
|
}
|
|
494
470
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
export function sanitizeValue(stringValue, maxChars=1000) {
|
|
2
|
+
if (typeof stringValue !== 'string') {
|
|
3
|
+
throw new Error('stringValue must be a string');
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
let sanitized = stringValue.normalize('NFC').trim();
|
|
7
|
+
|
|
8
|
+
// Basic validation
|
|
9
|
+
if (sanitized.length === 0) {
|
|
10
|
+
throw new Error('stringValue name cannot be empty');
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if (sanitized.length > maxChars) {
|
|
14
|
+
throw new Error(`stringValue name too long (max ${maxChars} characters)`);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Block control characters (primary security concern)
|
|
18
|
+
// This allows all other Unicode characters including emojis, Chinese, Arabic, etc.
|
|
19
|
+
if (/[\x00-\x1F\x7F-\x9F\u200B\u200E\u200F\u202A-\u202E\u2060-\u2069\uFEFF]/.test(sanitized)) {
|
|
20
|
+
throw new Error('stringValue contains disallowed control characters');
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Block private use areas
|
|
24
|
+
if (/[\uE000-\uF8FF\uFFF0-\uFFFF]/.test(sanitized)) {
|
|
25
|
+
throw new Error('stringValue contains disallowed Unicode characters');
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Block surrogate pairs (invalid alone)
|
|
29
|
+
if (/[\uD800-\uDFFF]/.test(sanitized)) {
|
|
30
|
+
throw new Error('stringValue contains invalid Unicode characters');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return sanitized;
|
|
34
|
+
}
|