@sjovanovic/recall.js 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -94
- package/package.json +2 -2
- package/recall.js +341 -397
package/README.md
CHANGED
|
@@ -4,15 +4,11 @@
|
|
|
4
4
|
<img alt="Recall.js is long term memory for AI apps!" src="logo.svg" />
|
|
5
5
|
</p>
|
|
6
6
|
|
|
7
|
-
Recall.js
|
|
7
|
+
Recall.js provides long‑term memory for AI applications. It is a JavaScript library and command‑line tool for building Retrieval‑Augmented Generation (RAG) systems, with a focus on speed, ease of use, and embeddability.
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
Beyond RAG, recall.js can be used for generic semantic search, as expert memory for your AI app, or as a recommendation system. It supports multilingual embeddings out of the box, allowing you to add data in one language and query it in another.
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
Recall.js supports multilingual embeddings out of the box so you can add data in one language and then query it in another.
|
|
14
|
-
|
|
15
|
-
Under the hood, recall.js uses [Transformers.js](https://huggingface.co/docs/transformers.js/index) feature extraction and a vector database to index and query your data. It is a light wrapper around local language models such as [Multilingual-MiniLM-L12-v2](https://huggingface.co/Xenova/paraphrase-multilingual-MiniLM-L12-v2) and [CozoDB](https://www.cozodb.org/) vector database.
|
|
11
|
+
Under the hood, recall.js uses [Transformers.js](https://huggingface.co/docs/transformers.js/index) for feature extraction and a vector database (powered by [CozoDB](https://www.cozodb.org/)) for indexing and querying. It is a lightweight wrapper around local language models such as [Multilingual-MiniLM-L12-v2](https://huggingface.co/Xenova/paraphrase-multilingual-MiniLM-L12-v2).
|
|
16
12
|
|
|
17
13
|
## Install
|
|
18
14
|
|
|
@@ -20,62 +16,57 @@ Under the hood, recall.js uses [Transformers.js](https://huggingface.co/docs/tra
|
|
|
20
16
|
|
|
21
17
|
## Usage
|
|
22
18
|
|
|
23
|
-
|
|
19
|
+
### Command Line
|
|
24
20
|
|
|
25
21
|
```console
|
|
26
22
|
recall --add 'The quick brown fox jumps over the lazy dog|Fox|{"foo":"bar"}'
|
|
27
23
|
recall --query "Un animal saute par-dessus un autre animal" --limit 1
|
|
28
24
|
```
|
|
29
|
-
**
|
|
25
|
+
> **Note:** When the library is used for the first time, it will download a local language model (Multilingual-MiniLM-L12-v2). This may take a while depending on your internet connection. Please be patient.
|
|
30
26
|
|
|
31
|
-
|
|
27
|
+
### JavaScript
|
|
32
28
|
|
|
33
29
|
```javascript
|
|
34
30
|
|
|
35
|
-
import
|
|
31
|
+
import Recall from '@sjovanovic/recall.js'
|
|
36
32
|
|
|
37
33
|
const testRecall = async () => {
|
|
38
|
-
await RECALL.addBatch([
|
|
39
|
-
{
|
|
40
|
-
input: "The quick brown fox jumps over the lazy dog",
|
|
41
|
-
result: "Fox and dog",
|
|
42
|
-
data: { foo: "bar" }
|
|
43
|
-
}
|
|
44
|
-
])
|
|
45
|
-
|
|
46
|
-
// Semantic search query in different language (French) "Animal jumps over another animal"
|
|
47
|
-
let response = await RECALL.searchText("Un animal saute par-dessus un autre animal", 1)
|
|
48
|
-
console.log(response)
|
|
49
|
-
}
|
|
50
|
-
testRecall()
|
|
51
34
|
|
|
52
|
-
|
|
35
|
+
let config = {
|
|
36
|
+
SHOW_PROGRESS: true
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let recall = new Recall(config)
|
|
53
40
|
|
|
54
|
-
|
|
41
|
+
await recall.addBatch([
|
|
42
|
+
{
|
|
43
|
+
input: "The quick brown fox jumps over the lazy dog",
|
|
44
|
+
result: "Fox and dog",
|
|
45
|
+
data: { foo: "bar" }
|
|
46
|
+
}
|
|
47
|
+
])
|
|
55
48
|
|
|
49
|
+
// Semantic search query in different language (French) "Animal jumps over another animal"
|
|
50
|
+
let response = await recall.searchText("Un animal saute par-dessus un autre animal", 1)
|
|
51
|
+
console.log(response)
|
|
52
|
+
}
|
|
53
|
+
testRecall()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Example response:**
|
|
57
|
+
```json
|
|
56
58
|
{
|
|
57
|
-
"headers": [
|
|
58
|
-
"dist",
|
|
59
|
-
"result",
|
|
60
|
-
"id",
|
|
61
|
-
"data",
|
|
62
|
-
"category"
|
|
63
|
-
],
|
|
59
|
+
"headers": ["dist", "result", "id", "data", "category"],
|
|
64
60
|
"rows": [
|
|
65
61
|
[
|
|
66
|
-
0.6840495824813843,
|
|
62
|
+
0.6840495824813843,
|
|
67
63
|
"Fox and dog",
|
|
68
64
|
"08840189191373282",
|
|
69
|
-
{
|
|
70
|
-
"foo": "bar"
|
|
71
|
-
},
|
|
65
|
+
{ "foo": "bar" },
|
|
72
66
|
""
|
|
73
67
|
]
|
|
74
68
|
]
|
|
75
69
|
}
|
|
76
|
-
|
|
77
|
-
*/
|
|
78
|
-
|
|
79
70
|
```
|
|
80
71
|
|
|
81
72
|
## Options
|
|
@@ -86,21 +77,20 @@ Easy way to view all the options is via command line:
|
|
|
86
77
|
recall --help
|
|
87
78
|
|
|
88
79
|
Usage:
|
|
89
|
-
recall
|
|
80
|
+
recall --query "Foo Bar"
|
|
90
81
|
|
|
91
82
|
Options:
|
|
92
|
-
--query "SEARCH_STRING" -
|
|
93
|
-
--limit
|
|
94
|
-
--add 'input|result|{"foo":"bar"}|categ' -
|
|
95
|
-
--remove 'id' -
|
|
96
|
-
--nuke -
|
|
97
|
-
--
|
|
98
|
-
--
|
|
99
|
-
--
|
|
100
|
-
--
|
|
101
|
-
--
|
|
102
|
-
--
|
|
103
|
-
--category "CATEGORY" - specify category when adding data and to filter by when querying (defaults to empty string)
|
|
83
|
+
--query "SEARCH_STRING" - Search the database
|
|
84
|
+
--limit N - Limit number of results (used with --query).
|
|
85
|
+
--add 'input|result|{"foo":"bar"}|categ' - Add a data entry.
|
|
86
|
+
--remove 'id' - Remove data by ID.
|
|
87
|
+
--nuke - Destroy the database.
|
|
88
|
+
--db "FILE_NAME" - Specify database file (SQLite).
|
|
89
|
+
--import "file.csv | file.tsv" - Import from CSV or TSV with columns: input, result, additional data.
|
|
90
|
+
--input-header "foo" - When used with --import, designate a specific header column as input.
|
|
91
|
+
--result-header "bar" - When used with --import, designate a specific header column as result.
|
|
92
|
+
--json "FILE_NAME" - Import from a file with one JSON object per line: {input:"", result:"", data:{}}.
|
|
93
|
+
--category "CATEGORY" - Specify category when adding data and filter by it when querying (defaults to empty string).
|
|
104
94
|
```
|
|
105
95
|
|
|
106
96
|
**Note:** when adding data recall will generate unique id automatically. To set custom id add it as a string property named "id" in the data object (i.e. `{"id":"customID"}`).
|
|
@@ -108,72 +98,79 @@ Options:
|
|
|
108
98
|
|
|
109
99
|
## JavaScript API Reference
|
|
110
100
|
|
|
111
|
-
###
|
|
101
|
+
### Configuration
|
|
112
102
|
|
|
113
|
-
|
|
103
|
+
The default configuration object is exported as config:
|
|
114
104
|
|
|
115
105
|
```javascript
|
|
116
106
|
export const config = {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
107
|
+
VECTOR_SIZE: 384, // Number of dimensions (must match the model's output)
|
|
108
|
+
MODEL_NAME: 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', // Model name for Transformers.js
|
|
109
|
+
SHOW_ERRORS: true, // Show error messages
|
|
110
|
+
DB_FILE: join(PATH, 'vector.db'), // Path to the SQLite database file (used by CozoDB)
|
|
111
|
+
PATH: PATH, // Directory of recall.js
|
|
112
|
+
DEVICE: undefined, // Transformers.js device
|
|
113
|
+
DTYPE: undefined, // Transformers.js dtype
|
|
114
|
+
PROGRESS_CALLBACK: undefined // Transformers.js progress callback
|
|
125
115
|
}
|
|
126
116
|
```
|
|
127
117
|
|
|
128
|
-
###
|
|
118
|
+
### Methods
|
|
119
|
+
|
|
120
|
+
**getDb()**
|
|
129
121
|
|
|
130
|
-
Returns reference to the CozoDB instance.
|
|
122
|
+
Returns reference to the underlying CozoDB instance.
|
|
131
123
|
|
|
132
|
-
|
|
124
|
+
**getEmbeddings(text) -> Promise<Array>**
|
|
133
125
|
|
|
134
126
|
Given text calculates the embeddings vector
|
|
135
127
|
|
|
136
|
-
|
|
128
|
+
**add(input, result, data={}, category="") -> Promise<Object>**
|
|
137
129
|
|
|
138
|
-
|
|
130
|
+
Adds a data entry.
|
|
139
131
|
|
|
140
|
-
|
|
132
|
+
- input – The sentence to generate embeddings from.
|
|
133
|
+
- result – The string to display in search results.
|
|
134
|
+
- data – Arbitrary object for additional information and references. If it contains an id property, that value will be used as the record’s unique ID.
|
|
135
|
+
- category – Optional category string.
|
|
141
136
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
137
|
+
**addBatch(batch) -> Promise<Object>**
|
|
138
|
+
|
|
139
|
+
Adds multiple entries in a batch (more efficient than repeated add calls).
|
|
140
|
+
batch is an array of objects with the same structure as add:
|
|
141
|
+
```javascript
|
|
142
|
+
let batch = [
|
|
143
|
+
{ input: "", result: "", data: {}, category: "" }
|
|
144
|
+
]
|
|
146
145
|
```
|
|
147
146
|
|
|
148
|
-
|
|
147
|
+
**remove(id) -> Promise<Object>**
|
|
149
148
|
|
|
150
|
-
|
|
149
|
+
Removes the record with the specified ID (string).
|
|
151
150
|
|
|
152
|
-
|
|
151
|
+
**searchText(text, category="", numResults = 5, includeInput=false) -> Promise<Object>**
|
|
153
152
|
|
|
154
|
-
|
|
153
|
+
Queries the vector database.
|
|
155
154
|
|
|
156
|
-
|
|
155
|
+
- text – The query text.
|
|
156
|
+
- category – Optional category filter.
|
|
157
|
+
- numResults – Number of results to return.
|
|
158
|
+
- includeInput – If true, the original input text is included in the response.
|
|
157
159
|
|
|
158
|
-
|
|
160
|
+
**nuke()**
|
|
159
161
|
|
|
160
|
-
|
|
162
|
+
Deletes the entire database.
|
|
161
163
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
+
**importFromJSONStream(fileName) -> Promise<Object>**
|
|
165
|
+
|
|
166
|
+
Imports data from a readable stream or file containing one JSON object per line (JSONL). Example line format:
|
|
167
|
+
```json
|
|
164
168
|
{input:"one", result:"one result", data:{"id":"123"}, category:""}
|
|
165
|
-
{input:"", result:"", data:{}, category:""}
|
|
166
|
-
...
|
|
167
169
|
```
|
|
168
|
-
This is the most efficient
|
|
169
|
-
|
|
170
|
-
### RECALL.importFromCSVorTSV(fileName, inputHeader=null, resultHeader=null) -> Promise()
|
|
171
|
-
|
|
172
|
-
Imports from CSV or TSV file. By default fist column is used as input, second as result and remaining columns are put in the data object.
|
|
173
|
-
If `inputHeader` is specified, function will try to find the column by that name and use it as input.
|
|
174
|
-
If `resultHeader` is specified, function will try to find the column by that name and use it as result.
|
|
170
|
+
This is the most efficient import method.
|
|
175
171
|
|
|
176
|
-
|
|
172
|
+
**importFromCSVorTSV(fileName, inputHeader=null, resultHeader=null) -> Promise<Object>**
|
|
177
173
|
|
|
178
|
-
|
|
179
|
-
|
|
174
|
+
Imports data from a CSV or TSV file. By default, the first column is used as input, the second as result, and the remaining columns are merged into the data object.
|
|
175
|
+
If `inputHeader` is specified, the function looks for a column with that name and uses it as input.
|
|
176
|
+
If `resultHeader` is specified, it looks for a column with that name and uses it as result.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sjovanovic/recall.js",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "Easy RAG with semantic search and long term memory",
|
|
5
5
|
"main": "recall.js",
|
|
6
6
|
"bin": {
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
"type": "module",
|
|
10
10
|
"scripts": {
|
|
11
11
|
"start": "node recall.js",
|
|
12
|
-
"test": "
|
|
12
|
+
"test": "node recall.js --test",
|
|
13
13
|
"query": "node recall.js --query "
|
|
14
14
|
},
|
|
15
15
|
"author": "Slobodan Jovanovic",
|
package/recall.js
CHANGED
|
@@ -6,12 +6,6 @@ import fs from 'fs'
|
|
|
6
6
|
import { resolve, join, dirname, sep } from 'path'
|
|
7
7
|
import { fileURLToPath } from 'url'
|
|
8
8
|
|
|
9
|
-
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
10
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
11
|
-
import { z } from "zod";
|
|
12
|
-
|
|
13
|
-
// import {sanitizeValue} from './utils/sanitize.js'
|
|
14
|
-
|
|
15
9
|
const pathToThisFile = resolve(fileURLToPath(import.meta.url))
|
|
16
10
|
const pathPassedToNode = resolve(process.argv[1])
|
|
17
11
|
const isThisFileBeingRunViaCLI = pathToThisFile.includes(pathPassedToNode) || pathPassedToNode.includes('.npm-global')
|
|
@@ -21,182 +15,374 @@ export const config = {
|
|
|
21
15
|
VECTOR_SIZE: 384, // number of dimensions (must match the models output)
|
|
22
16
|
MODEL_NAME: 'Xenova/paraphrase-multilingual-MiniLM-L12-v2', // model to use (passed to Transformers.js)
|
|
23
17
|
SHOW_ERRORS: true, // Show errors
|
|
18
|
+
SHOW_PROGRESS: false, // Show model loading progress in the console
|
|
24
19
|
DB_FILE: join(PATH, 'vector.db'), // Path to the datbase file (SQLite file used by CozoDB)
|
|
25
20
|
PATH: PATH, // directory of recall.js
|
|
26
21
|
DEVICE: undefined, // Transformers.js device
|
|
27
22
|
DTYPE: undefined, // Transformers.js dtype
|
|
28
23
|
PROGRESS_CALLBACK: undefined // Transformers.js progress_callback
|
|
29
24
|
}
|
|
25
|
+
var recal_instance = null
|
|
30
26
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
27
|
+
export class Recall {
|
|
28
|
+
constructor(opts = {}){
|
|
29
|
+
this.opts = {
|
|
30
|
+
...config,
|
|
31
|
+
...opts
|
|
32
|
+
}
|
|
33
|
+
this.initDone = false
|
|
34
|
+
this.db = new CozoDb('sqlite', this.opts.DB_FILE)
|
|
35
|
+
}
|
|
36
|
+
async printQuery(query, params = {}) {
|
|
37
|
+
try{
|
|
38
|
+
if(!this.initDone) {
|
|
39
|
+
this.initDone = true
|
|
40
|
+
await this.createTable()
|
|
41
|
+
}
|
|
42
|
+
}catch(err) {}
|
|
43
|
+
try {
|
|
44
|
+
let data = this.db.run(query, params)
|
|
45
|
+
return data
|
|
46
|
+
}catch(err){
|
|
47
|
+
if(this.opts.SHOW_ERRORS) console.error(err.display || err.message)
|
|
48
|
+
}
|
|
36
49
|
}
|
|
37
|
-
return db
|
|
38
|
-
}
|
|
39
50
|
|
|
40
|
-
async
|
|
41
|
-
|
|
42
|
-
if(!
|
|
43
|
-
|
|
44
|
-
|
|
51
|
+
async getEmbeddings(text){
|
|
52
|
+
let pipe = this.opts._pipe
|
|
53
|
+
if(!pipe) {
|
|
54
|
+
this.opts._pipe = await pipeline("feature-extraction", this.opts.MODEL_NAME, {
|
|
55
|
+
progress_callback:(progress) => {
|
|
56
|
+
if(this.opts.PROGRESS_CALLBACK) return this.opts.PROGRESS_CALLBACK(progress);
|
|
57
|
+
if(this.opts.SHOW_PROGRESS && progress.status === "progress_total"){
|
|
58
|
+
process.stdout.write(`\r\x1b[K✅ Loaded ${ Math.round(progress.progress)}% ${progress.name || "model"}`)
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
device: this.opts.DEVICE,
|
|
62
|
+
dtype: this.opts.DTYPE
|
|
63
|
+
});
|
|
64
|
+
pipe = this.opts._pipe
|
|
45
65
|
}
|
|
46
|
-
|
|
47
|
-
|
|
66
|
+
const embedding = await pipe(text, { pooling: "mean", normalize: true });
|
|
67
|
+
return Array.from(embedding.data)
|
|
48
68
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if(
|
|
69
|
+
|
|
70
|
+
async createTable() {
|
|
71
|
+
// create table
|
|
72
|
+
let tableCreated = await this.printQuery(`:create embeddings {id: String, category: String => v: <F32; ${this.opts.VECTOR_SIZE}>, input: String, result: String, data: Json}`)
|
|
73
|
+
if(tableCreated){
|
|
74
|
+
// create vector index
|
|
75
|
+
let indexCreated = await printQuery(`::hnsw create embeddings:index_name {
|
|
76
|
+
dim: ${this.opts.VECTOR_SIZE},
|
|
77
|
+
m: 50,
|
|
78
|
+
dtype: F32,
|
|
79
|
+
fields: [v],
|
|
80
|
+
distance: L2, # Cosine, IP
|
|
81
|
+
ef_construction:50, # number of nearest neighbors
|
|
82
|
+
extend_candidates: false, # include nearest neighbors of the nearest neighbors
|
|
83
|
+
keep_pruned_connections: false,
|
|
84
|
+
}`)
|
|
85
|
+
return tableCreated && indexCreated
|
|
86
|
+
}
|
|
87
|
+
return false
|
|
54
88
|
}
|
|
55
|
-
}
|
|
56
89
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
},
|
|
67
|
-
device: config.DEVICE,
|
|
68
|
-
dtype: config.DTYPE
|
|
69
|
-
});
|
|
70
|
-
pipe = config._pipe
|
|
90
|
+
async add(input, result, data={}, category="") {
|
|
91
|
+
if(!input || !result) return
|
|
92
|
+
input = this.sanitizeString(input)
|
|
93
|
+
result = this.sanitizeString(result)
|
|
94
|
+
const embedding = await this.getEmbeddings(input)
|
|
95
|
+
let id = data.id || Math.random().toString().substring(2)
|
|
96
|
+
return await printQuery(`?[id, v, input, result, data, category] <- [["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input.replaceAll('"', "'"))}, ${JSON.stringify(result.replaceAll('"', "'"))}, ${JSON.stringify(data)}, ${JSON.stringify(category.replaceAll('"', "'"))} ]]
|
|
97
|
+
:put embeddings {id, category => v, input, result, data}
|
|
98
|
+
`)
|
|
71
99
|
}
|
|
72
|
-
const embedding = await pipe(text, { pooling: "mean", normalize: true });
|
|
73
|
-
return Array.from(embedding.data)
|
|
74
|
-
}
|
|
75
100
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
let tableCreated = await printQuery(`:create embeddings {id: String, category: String => v: <F32; ${config.VECTOR_SIZE}>, input: String, result: String, data: Json}`)
|
|
79
|
-
if(tableCreated){
|
|
80
|
-
// create index
|
|
81
|
-
let indexCreated = await printQuery(`::hnsw create embeddings:index_name {
|
|
82
|
-
dim: ${config.VECTOR_SIZE},
|
|
83
|
-
m: 50,
|
|
84
|
-
dtype: F32,
|
|
85
|
-
fields: [v],
|
|
86
|
-
distance: L2, # Cosine, IP
|
|
87
|
-
ef_construction:50, # number of nearest neighbors
|
|
88
|
-
extend_candidates: false, # include nearest neighbors of the nearest neighbors
|
|
89
|
-
keep_pruned_connections: false,
|
|
90
|
-
}`)
|
|
91
|
-
return tableCreated && indexCreated
|
|
101
|
+
sanitizeString(str){
|
|
102
|
+
return str.replace(/[\/#$%\^&\*{}=_`~()\"]/g," ").replace(/\s{2,}/g, " ").trim()
|
|
92
103
|
}
|
|
93
|
-
return false
|
|
94
|
-
}
|
|
95
104
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
105
|
+
/**
|
|
106
|
+
*
|
|
107
|
+
* Batch array:
|
|
108
|
+
* [{input:"", result:"", data:{}}]
|
|
109
|
+
*
|
|
110
|
+
* @param {Array} batch
|
|
111
|
+
* @returns
|
|
112
|
+
*/
|
|
113
|
+
async addBatch(batch, opts={onProgress:null}) {
|
|
114
|
+
if(!batch || !Array.isArray(batch)) return
|
|
115
|
+
let vectorBatch = []
|
|
116
|
+
for(let i=0;i<batch.length; i++){
|
|
117
|
+
let {input, result, data, category} = batch[i]
|
|
118
|
+
|
|
119
|
+
if(!input || !result) continue
|
|
120
|
+
if(!data) data = {}
|
|
121
|
+
if(!category) category = ''
|
|
122
|
+
const embedding = await this.getEmbeddings(input)
|
|
123
|
+
batch[i].embedding = embedding
|
|
124
|
+
let item = ''
|
|
125
|
+
if(i == 0) {
|
|
126
|
+
item += `?[id, v, input, result, data, category] <- [`
|
|
127
|
+
}
|
|
106
128
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
if(!input || !result) continue
|
|
122
|
-
if(!data) data = {}
|
|
123
|
-
if(!category) category = ''
|
|
124
|
-
const embedding = await getEmbeddings(input)
|
|
125
|
-
batch[i].embedding = embedding
|
|
126
|
-
let item = ''
|
|
127
|
-
if(i == 0) {
|
|
128
|
-
item += `?[id, v, input, result, data, category] <- [`
|
|
129
|
+
input = this.sanitizeString(input)
|
|
130
|
+
result = this.sanitizeString(result)
|
|
131
|
+
|
|
132
|
+
let id = data?.id ? data.id : Math.random().toString().substring(2)
|
|
133
|
+
item += `["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input)}, ${JSON.stringify(result)}, ${JSON.stringify(data)}, ${JSON.stringify(category)} ],`
|
|
134
|
+
if(i == batch.length-1) {
|
|
135
|
+
item += `]
|
|
136
|
+
:put embeddings {id, category => v, input, result, data}`
|
|
137
|
+
}
|
|
138
|
+
vectorBatch.push(item)
|
|
139
|
+
|
|
140
|
+
if(opts.onProgress && typeof opts.onProgress == 'function') {
|
|
141
|
+
await opts.onProgress({index: i+1, total:batch.length, item: batch[i], embedding, percent: Math.round((i+1) / batch.length * 100)})
|
|
142
|
+
}
|
|
129
143
|
}
|
|
144
|
+
return await this.printQuery(vectorBatch.join("\n"))
|
|
145
|
+
}
|
|
130
146
|
|
|
131
|
-
|
|
132
|
-
|
|
147
|
+
async remove(id, category="") {
|
|
148
|
+
if(!id || typeof id != 'string') return
|
|
149
|
+
id = id.replace(/[^a-zA-Z0-9]/g, '')
|
|
150
|
+
category = this.sanitizeString(category)
|
|
151
|
+
if(!id || !category) return
|
|
152
|
+
let results = await this.printQuery(
|
|
153
|
+
`?[id, category] <- [['${id}', '${category}']]
|
|
154
|
+
::rm embeddings {id, category}`)
|
|
155
|
+
return results
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
async removeAllByCategory(category=""){
|
|
159
|
+
category = this.sanitizeString(category)
|
|
160
|
+
if(!category) return
|
|
161
|
+
let results
|
|
162
|
+
try {
|
|
163
|
+
results = await this.printQuery(
|
|
164
|
+
`?[id, category] := *embeddings{id, category}, category = "${category}"
|
|
165
|
+
:rm embeddings {id, category}`)
|
|
166
|
+
}catch(err){
|
|
167
|
+
console.error(err)
|
|
168
|
+
}
|
|
169
|
+
return results
|
|
170
|
+
}
|
|
133
171
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
:
|
|
172
|
+
async searchText(text, category="", numResults = 5, includeInput=false) {
|
|
173
|
+
const embedding = await this.getEmbeddings(text)
|
|
174
|
+
let results = await this.printQuery(`?[dist, result, id, data, category${includeInput? ', input' : ''}] := ~embeddings:index_name { id, v, input, result, data, category${includeInput? ', input' : ''} |
|
|
175
|
+
query: q,
|
|
176
|
+
k: ${numResults}, # number of results
|
|
177
|
+
ef: 50, # number of neighbours to consider
|
|
178
|
+
bind_distance: dist,
|
|
179
|
+
filter: category==${JSON.stringify(category)},
|
|
180
|
+
radius: 10.0
|
|
181
|
+
}, q = vec(${JSON.stringify(embedding)})
|
|
182
|
+
:sort dist`)
|
|
183
|
+
return results
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
async vectorSearch(query, category='', numResults=5) {
|
|
187
|
+
let result = undefined
|
|
188
|
+
try{
|
|
189
|
+
result = await this.searchText(query, category, numResults)
|
|
190
|
+
}catch(err){
|
|
191
|
+
if(config.SHOW_ERRORS) console.error(err.display || err.message)
|
|
139
192
|
}
|
|
140
|
-
|
|
193
|
+
return result
|
|
194
|
+
}
|
|
141
195
|
|
|
142
|
-
|
|
143
|
-
|
|
196
|
+
nuke() {
|
|
197
|
+
return fs.unlinkSync(this.opts.DB_FILE)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
async importFromJSONStream(fileName) {
|
|
201
|
+
async function jsonStream(readable, callback = async function(){}) {
|
|
202
|
+
readable.setEncoding('utf8');
|
|
203
|
+
let data = '';
|
|
204
|
+
for await (const chunk of readable) {
|
|
205
|
+
if(chunk.indexOf("\n")) {
|
|
206
|
+
pts = chunk.split("\n")
|
|
207
|
+
for(let i=0;i<pts.length; i++){
|
|
208
|
+
data += pts[i]
|
|
209
|
+
try {
|
|
210
|
+
let json = JSON.parse(data)
|
|
211
|
+
await callback(json)
|
|
212
|
+
json = null
|
|
213
|
+
data = ''
|
|
214
|
+
}catch(err) {}
|
|
215
|
+
}
|
|
216
|
+
}else{
|
|
217
|
+
data += chunk;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
let batchSize = 40, batch = [], i=0, currentBatch = 0
|
|
222
|
+
let stream = typeof fileName == 'string' ? fs.createReadStream(fileName) : fileName
|
|
223
|
+
await jsonStream(stream, async (json) => {
|
|
224
|
+
if(json.input && json.result){
|
|
225
|
+
if(!json.data) json.data = {}
|
|
226
|
+
if(i % batchSize === 0){
|
|
227
|
+
if(batch.length) {
|
|
228
|
+
currentBatch = currentBatch + 1
|
|
229
|
+
console.log(`Adding batch ${currentBatch} (${batch.length} items)`)
|
|
230
|
+
await this.addBatch(batch)
|
|
231
|
+
batch = []
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
batch.push(json)
|
|
235
|
+
i=i+1
|
|
236
|
+
}
|
|
237
|
+
})
|
|
238
|
+
if(batch.length) {
|
|
239
|
+
console.log(`Adding batch ${currentBatch + 1} (${batch.length} items)`)
|
|
240
|
+
await this.addBatch(batch)
|
|
144
241
|
}
|
|
145
242
|
}
|
|
146
|
-
|
|
243
|
+
|
|
244
|
+
async importFromCSVorTSV(fileName, inputHeader, resultHeader) {
|
|
245
|
+
if(!fileName || !fileName.includes('.')) return
|
|
246
|
+
let ext = fileName.split('.').pop()
|
|
247
|
+
ext = ext.toLowerCase()
|
|
248
|
+
if(ext != 'csv' && ext != 'tsv') return console.log('File must have csv or tsv extension')
|
|
249
|
+
let parseOpts = {
|
|
250
|
+
separator: ext == 'tsv' ? '\t' : ',',
|
|
251
|
+
mapHeaders: ({ header, index }) => {
|
|
252
|
+
if(inputHeader) {
|
|
253
|
+
if(inputHeader == header){
|
|
254
|
+
return 'input'
|
|
255
|
+
}
|
|
256
|
+
}else if(index === 0){
|
|
257
|
+
return 'input'
|
|
258
|
+
}
|
|
259
|
+
if(resultHeader){
|
|
260
|
+
if(resultHeader == header){
|
|
261
|
+
return 'result'
|
|
262
|
+
}
|
|
263
|
+
}else if(index === 1){
|
|
264
|
+
return 'result'
|
|
265
|
+
}
|
|
266
|
+
return header.replaceAll(/\W/gi, '_').replaceAll(/[^a-zA-Z0-9\_]/g, '').toLowerCase()
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
let fetchFromFile = async (fileName) => {
|
|
270
|
+
return new Promise(async (resolve, reject)=>{
|
|
271
|
+
let results = []
|
|
272
|
+
fs.createReadStream(fileName)
|
|
273
|
+
.pipe(csv(parseOpts))
|
|
274
|
+
.on('data', async (data) => {
|
|
275
|
+
results.push(data)
|
|
276
|
+
})
|
|
277
|
+
.on('end', () => {
|
|
278
|
+
console.log(`${fileName} loaded.`);
|
|
279
|
+
resolve(results)
|
|
280
|
+
}).on('error', (err) => {
|
|
281
|
+
console.error(err);
|
|
282
|
+
})
|
|
283
|
+
})
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
let results = await fetchFromFile(fileName)
|
|
287
|
+
|
|
288
|
+
let batchSize = 40, batch = [], currentBatch = 0, totalBatches = Math.ceil(results.length / batchSize), dataHeaders = Object.keys(results[results.length-1]).filter(k => k != 'input' && k != 'result'), data
|
|
289
|
+
for(let i=0; i<results.length; i++){
|
|
290
|
+
if(i % batchSize === 0){
|
|
291
|
+
if(batch.length) {
|
|
292
|
+
currentBatch = currentBatch + 1
|
|
293
|
+
console.log(`Adding batch ${currentBatch} of ${totalBatches} (${batch.length} items)`)
|
|
294
|
+
await this.addBatch(batch)
|
|
295
|
+
batch = []
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
data = {}
|
|
299
|
+
dataHeaders.forEach(k => k && results[i][k] ? data[k] = results[i][k] : null)
|
|
300
|
+
batch.push({
|
|
301
|
+
input: results[i].input,
|
|
302
|
+
result: results[i].result,
|
|
303
|
+
data
|
|
304
|
+
})
|
|
305
|
+
}
|
|
306
|
+
if(batch.length) {
|
|
307
|
+
console.log(`Adding batch ${currentBatch + 1} of ${totalBatches} (${batch.length} items)`)
|
|
308
|
+
await this.addBatch(batch)
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
export const getDb = () => {
|
|
315
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
316
|
+
return recal_instance.db
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
async function printQuery(query, params = {}) {
|
|
320
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
321
|
+
return await recal_instance.printQuery(query, params)
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
export const getEmbeddings = async (text) => {
|
|
325
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
326
|
+
return await recal_instance.getEmbeddings(text)
|
|
147
327
|
}
|
|
148
328
|
|
|
149
|
-
const
|
|
150
|
-
|
|
329
|
+
export const createTable = async () => {
|
|
330
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
331
|
+
return await recal_instance.createTable()
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
export const add = async (input, result, data={}, category="") => {
|
|
335
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
336
|
+
return await recal_instance.add(input, result, data, category)
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
export const addBatch = async (batch, opts={onProgress:null}) => {
|
|
340
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
341
|
+
return await recal_instance.addBatch(batch, opts)
|
|
151
342
|
}
|
|
152
343
|
|
|
153
344
|
export const remove = async (id, category="") => {
|
|
154
|
-
if(!
|
|
155
|
-
|
|
156
|
-
category = sanitizeString(category)
|
|
157
|
-
if(!id || !category) return
|
|
158
|
-
let results = await printQuery(
|
|
159
|
-
`?[id, category] <- [['${id}', '${category}']]
|
|
160
|
-
::rm embeddings {id, category}`)
|
|
161
|
-
return results
|
|
345
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
346
|
+
return await recal_instance.remove(id, category)
|
|
162
347
|
}
|
|
163
348
|
|
|
164
349
|
export const removeAllByCategory = async (category="") => {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
let results
|
|
168
|
-
try {
|
|
169
|
-
results = await printQuery(
|
|
170
|
-
`?[id, category] := *embeddings{id, category}, category = "${category}"
|
|
171
|
-
:rm embeddings {id, category}`)
|
|
172
|
-
}catch(err){
|
|
173
|
-
console.error(err)
|
|
174
|
-
}
|
|
175
|
-
return results
|
|
350
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
351
|
+
return await recal_instance.removeAllByCategory(category)
|
|
176
352
|
}
|
|
177
353
|
|
|
178
354
|
export const searchText = async (text, category="", numResults = 5, includeInput=false) => {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
query: q,
|
|
182
|
-
k: ${numResults}, # number of results
|
|
183
|
-
ef: 50, # number of neighbours to consider
|
|
184
|
-
bind_distance: dist,
|
|
185
|
-
filter: category==${JSON.stringify(category)},
|
|
186
|
-
radius: 10.0
|
|
187
|
-
}, q = vec(${JSON.stringify(embedding)})
|
|
188
|
-
:sort -dist`)
|
|
189
|
-
return results
|
|
355
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
356
|
+
return await recal_instance.searchText(text, category, numResults, includeInput)
|
|
190
357
|
}
|
|
191
358
|
|
|
192
359
|
export const vectorSearch = async (query, category='', numResults=5) => {
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return
|
|
360
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
361
|
+
return await recal_instance.vectorSearch(query, category, numResults)
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
export const nuke = () => {
|
|
365
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
366
|
+
return recal_instance.nuke()
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
export const importFromJSONStream = async (fileName) => {
|
|
370
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
371
|
+
return await recal_instance.importFromJSONStream(fileName)
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
export const importFromCSVorTSV = async (fileName, inputHeader, resultHeader) => {
|
|
375
|
+
if(!recal_instance) recal_instance = new Recall()
|
|
376
|
+
return await recal_instance.importFromCSVorTSV(fileName, inputHeader, resultHeader)
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async function test(){
|
|
380
|
+
let recall = new Recall()
|
|
381
|
+
recall.nuke()
|
|
382
|
+
await recall.add('The quick brown fox jumps over the lazy dog', 'Fox jumps over dog', {foo:"bar"})
|
|
383
|
+
await recall.add('History of Serbia бегинс with emperor Heraclius', 'Serbia and Roman empire', {foo:"baz"})
|
|
384
|
+
let resp = await recall.vectorSearch('Un animal saute par-dessus un autre animal')
|
|
385
|
+
return JSON.stringify(resp)
|
|
200
386
|
}
|
|
201
387
|
|
|
202
388
|
const cmdArgs = (list = []) => {
|
|
@@ -216,246 +402,8 @@ const cmdArgs = (list = []) => {
|
|
|
216
402
|
return args
|
|
217
403
|
}
|
|
218
404
|
|
|
219
|
-
export const nuke = () => {
|
|
220
|
-
return fs.unlinkSync(config.DB_FILE)
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
export const importFromJSONStream = async (fileName) => {
|
|
224
|
-
async function jsonStream(readable, callback = async function(){}) {
|
|
225
|
-
readable.setEncoding('utf8');
|
|
226
|
-
let data = '';
|
|
227
|
-
for await (const chunk of readable) {
|
|
228
|
-
if(chunk.indexOf("\n")) {
|
|
229
|
-
pts = chunk.split("\n")
|
|
230
|
-
for(let i=0;i<pts.length; i++){
|
|
231
|
-
data += pts[i]
|
|
232
|
-
try {
|
|
233
|
-
let json = JSON.parse(data)
|
|
234
|
-
await callback(json)
|
|
235
|
-
json = null
|
|
236
|
-
data = ''
|
|
237
|
-
}catch(err) {
|
|
238
|
-
//console.error(err)
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
}else{
|
|
242
|
-
data += chunk;
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
let batchSize = 40, batch = [], i=0, currentBatch = 0
|
|
247
|
-
let stream = typeof fileName == 'string' ? fs.createReadStream(fileName) : fileName
|
|
248
|
-
await jsonStream(stream, async (json) => {
|
|
249
|
-
if(json.input && json.result){
|
|
250
|
-
if(!json.data) json.data = {}
|
|
251
|
-
if(i % batchSize === 0){
|
|
252
|
-
if(batch.length) {
|
|
253
|
-
currentBatch = currentBatch + 1
|
|
254
|
-
console.log(`Adding batch ${currentBatch} (${batch.length} items)`)
|
|
255
|
-
await addBatch(batch)
|
|
256
|
-
batch = []
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
batch.push(json)
|
|
260
|
-
i=i+1
|
|
261
|
-
}
|
|
262
|
-
})
|
|
263
|
-
if(batch.length) {
|
|
264
|
-
console.log(`Adding batch ${currentBatch + 1} (${batch.length} items)`)
|
|
265
|
-
await addBatch(batch)
|
|
266
|
-
}
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
export const importFromCSVorTSV = async (fileName, inputHeader, resultHeader) => {
|
|
270
|
-
if(!fileName || !fileName.includes('.')) return
|
|
271
|
-
let ext = fileName.split('.').pop()
|
|
272
|
-
ext = ext.toLowerCase()
|
|
273
|
-
if(ext != 'csv' && ext != 'tsv') return console.log('File must have csv or tsv extension')
|
|
274
|
-
let parseOpts = {
|
|
275
|
-
separator: ext == 'tsv' ? '\t' : ',',
|
|
276
|
-
mapHeaders: ({ header, index }) => {
|
|
277
|
-
if(inputHeader) {
|
|
278
|
-
if(inputHeader == header){
|
|
279
|
-
return 'input'
|
|
280
|
-
}
|
|
281
|
-
}else if(index === 0){
|
|
282
|
-
return 'input'
|
|
283
|
-
}
|
|
284
|
-
if(resultHeader){
|
|
285
|
-
if(resultHeader == header){
|
|
286
|
-
return 'result'
|
|
287
|
-
}
|
|
288
|
-
}else if(index === 1){
|
|
289
|
-
return 'result'
|
|
290
|
-
}
|
|
291
|
-
return header.replaceAll(/\W/gi, '_').replaceAll(/[^a-zA-Z0-9\_]/g, '').toLowerCase()
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
let fetchFromFile = async (fileName) => {
|
|
295
|
-
return new Promise(async (resolve, reject)=>{
|
|
296
|
-
let results = []
|
|
297
|
-
fs.createReadStream(fileName)
|
|
298
|
-
.pipe(csv(parseOpts))
|
|
299
|
-
.on('data', async (data) => {
|
|
300
|
-
results.push(data)
|
|
301
|
-
})
|
|
302
|
-
.on('end', () => {
|
|
303
|
-
console.log(`${fileName} loaded.`);
|
|
304
|
-
resolve(results)
|
|
305
|
-
}).on('error', (err) => {
|
|
306
|
-
console.error(err);
|
|
307
|
-
})
|
|
308
|
-
})
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
let results = await fetchFromFile(fileName)
|
|
313
|
-
|
|
314
|
-
let batchSize = 40, batch = [], currentBatch = 0, totalBatches = Math.ceil(results.length / batchSize), dataHeaders = Object.keys(results[results.length-1]).filter(k => k != 'input' && k != 'result'), data
|
|
315
|
-
for(let i=0; i<results.length; i++){
|
|
316
|
-
if(i % batchSize === 0){
|
|
317
|
-
if(batch.length) {
|
|
318
|
-
currentBatch = currentBatch + 1
|
|
319
|
-
console.log(`Adding batch ${currentBatch} of ${totalBatches} (${batch.length} items)`)
|
|
320
|
-
await addBatch(batch)
|
|
321
|
-
batch = []
|
|
322
|
-
}
|
|
323
|
-
}
|
|
324
|
-
data = {}
|
|
325
|
-
dataHeaders.forEach(k => k && results[i][k] ? data[k] = results[i][k] : null)
|
|
326
|
-
batch.push({
|
|
327
|
-
input: results[i].input,
|
|
328
|
-
result: results[i].result,
|
|
329
|
-
data
|
|
330
|
-
})
|
|
331
|
-
}
|
|
332
|
-
if(batch.length) {
|
|
333
|
-
console.log(`Adding batch ${currentBatch + 1} of ${totalBatches} (${batch.length} items)`)
|
|
334
|
-
await addBatch(batch)
|
|
335
|
-
}
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
const mcp = async () => {
|
|
339
|
-
|
|
340
|
-
// Create an MCP server
|
|
341
|
-
// const server = new McpServer({
|
|
342
|
-
// name: "Demo",
|
|
343
|
-
// version: "1.0.0"
|
|
344
|
-
// });
|
|
345
|
-
|
|
346
|
-
// // Add an addition tool
|
|
347
|
-
// server.tool("add",
|
|
348
|
-
// { a: z.number(), b: z.number() },
|
|
349
|
-
// async ({ a, b }) => ({
|
|
350
|
-
// content: [{ type: "text", text: String(a + b) }]
|
|
351
|
-
// })
|
|
352
|
-
// );
|
|
353
|
-
|
|
354
|
-
// // Add a dynamic greeting resource
|
|
355
|
-
// server.resource(
|
|
356
|
-
// "greeting",
|
|
357
|
-
// new ResourceTemplate("greeting://{name}", { list: undefined }),
|
|
358
|
-
// async (uri, { name }) => ({
|
|
359
|
-
// contents: [{
|
|
360
|
-
// uri: uri.href,
|
|
361
|
-
// text: `Hello, ${name}!`
|
|
362
|
-
// }]
|
|
363
|
-
// })
|
|
364
|
-
// );
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
const server = new McpServer({
|
|
368
|
-
name: "Recall",
|
|
369
|
-
description: "Recall provides semantic search on the local vector database.",
|
|
370
|
-
version: "1.0.0"
|
|
371
|
-
});
|
|
372
|
-
|
|
373
|
-
// server.resource(
|
|
374
|
-
// "echo",
|
|
375
|
-
// new ResourceTemplate("echo://{message}", { list: undefined }),
|
|
376
|
-
// async (uri, { message }) => ({
|
|
377
|
-
// contents: [{
|
|
378
|
-
// uri: uri.href,
|
|
379
|
-
// text: `Resource echo: ${message}`
|
|
380
|
-
// }]
|
|
381
|
-
// })
|
|
382
|
-
// );
|
|
383
|
-
|
|
384
|
-
server.tool(
|
|
385
|
-
"search",
|
|
386
|
-
{
|
|
387
|
-
text: z.string(),
|
|
388
|
-
//numberOfResults: z.number()
|
|
389
|
-
},
|
|
390
|
-
async ({ text, numberOfResults }) => {
|
|
391
|
-
if(numberOfResults && numberOfResults > 50) numberOfResults = 50
|
|
392
|
-
|
|
393
|
-
let startTime = performance.now()
|
|
394
|
-
let results = await searchText(text, numberOfResults)
|
|
395
|
-
var timeDiff = ((performance.now() - startTime) / 1000).toFixed(2)
|
|
396
|
-
let content = [
|
|
397
|
-
{
|
|
398
|
-
type: "text",
|
|
399
|
-
text: `Sorry. Recal search didn't find anything.`
|
|
400
|
-
}
|
|
401
|
-
]
|
|
402
|
-
if(results && results.rows && results.rows.length) {
|
|
403
|
-
// content = results.rows.map(r => {
|
|
404
|
-
// return {
|
|
405
|
-
// type: "text",
|
|
406
|
-
// text: r[1]
|
|
407
|
-
// }
|
|
408
|
-
// })
|
|
409
|
-
content = [{
|
|
410
|
-
type: "text",
|
|
411
|
-
text: `Recal search found the following results in ${timeDiff}s:`
|
|
412
|
-
}]
|
|
413
|
-
for(let i=0; i<results.rows.length; i++){
|
|
414
|
-
let row = results.rows[i]
|
|
415
|
-
content.push({
|
|
416
|
-
type: "text",
|
|
417
|
-
text: row[1]
|
|
418
|
-
})
|
|
419
|
-
// if(results.rows[2] && Object.keys(results.rows[2])){
|
|
420
|
-
// content.push({
|
|
421
|
-
// type: "json",
|
|
422
|
-
// text: row[2]
|
|
423
|
-
// })
|
|
424
|
-
// }
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
|
|
428
|
-
return {
|
|
429
|
-
content
|
|
430
|
-
}
|
|
431
|
-
}
|
|
432
|
-
);
|
|
433
|
-
|
|
434
|
-
// server.prompt(
|
|
435
|
-
// "echo",
|
|
436
|
-
// { message: z.string() },
|
|
437
|
-
// ({ message }) => ({
|
|
438
|
-
// messages: [{
|
|
439
|
-
// role: "user",
|
|
440
|
-
// content: {
|
|
441
|
-
// type: "text",
|
|
442
|
-
// text: `Please process this message: ${message}`
|
|
443
|
-
// }
|
|
444
|
-
// }]
|
|
445
|
-
// })
|
|
446
|
-
// );
|
|
447
|
-
|
|
448
|
-
// Start receiving messages on stdin and sending messages on stdout
|
|
449
|
-
const transport = new StdioServerTransport();
|
|
450
|
-
await server.connect(transport);
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
const splitSentences = (text) => {
|
|
454
|
-
return text.replace(/([.?!])\s*(?=[A-Z])/g, "$1|").split("|")
|
|
455
|
-
}
|
|
456
|
-
|
|
457
405
|
const runCLI = async () => {
|
|
458
|
-
let args = cmdArgs(['--query', '-q', '--add', '--db', '--import', '--json', '--
|
|
406
|
+
let args = cmdArgs(['--query', '-q', '--add', '--db', '--import', '--json', '--nuke', '--input-header', '--result-header', '--test', '--limit', '--category'])
|
|
459
407
|
let query = args['--query'] || args['-q']
|
|
460
408
|
if(args['--db']){
|
|
461
409
|
config.DB_FILE = args['--db']
|
|
@@ -500,27 +448,23 @@ const runCLI = async () => {
|
|
|
500
448
|
}else if(args['--json']){
|
|
501
449
|
await importFromJSONStream(args['--json'])
|
|
502
450
|
console.log('Imported.')
|
|
503
|
-
}else if(args['--mcp'] != undefined){
|
|
504
|
-
await mcp()
|
|
505
|
-
console.log('MCP server running.')
|
|
506
451
|
}else if(args['--test'] != undefined){
|
|
507
452
|
console.log('Test: ', await test())
|
|
508
453
|
}else{
|
|
509
454
|
console.log('Usage:')
|
|
510
455
|
console.log(args._cmd + ' --query "Foo Bar"')
|
|
511
456
|
console.log("\n" + 'Options:')
|
|
512
|
-
console.log('--query "SEARCH_STRING" -
|
|
513
|
-
console.log('--limit
|
|
514
|
-
console.log(`--add 'input|result|{"foo":"bar"}|categ' -
|
|
515
|
-
console.log(`--remove 'id' -
|
|
516
|
-
console.log(`--nuke -
|
|
517
|
-
console.log(`--
|
|
518
|
-
console.log(`--
|
|
519
|
-
console.log(
|
|
520
|
-
console.log('--
|
|
521
|
-
console.log(
|
|
522
|
-
console.log(`--
|
|
523
|
-
console.log(`--category "CATEGORY" - specify category when adding data and to filter by when querying (defaults to empty string)`)
|
|
457
|
+
console.log('--query "SEARCH_STRING" - Search the database')
|
|
458
|
+
console.log('--limit N - Limit number of results (used with --query).')
|
|
459
|
+
console.log(`--add 'input|result|{"foo":"bar"}|categ' - Add a data entry.`)
|
|
460
|
+
console.log(`--remove 'id' - Remove data by ID.`)
|
|
461
|
+
console.log(`--nuke - Destroy the database.`)
|
|
462
|
+
console.log(`--db "FILE_NAME" - Specify database file (SQLite).`)
|
|
463
|
+
console.log(`--import "file.csv | file.tsv" - Import from CSV or TSV with columns: input, result, additional data.`)
|
|
464
|
+
console.log('--input-header "foo" - When used with --import, designate a specific header column as input.')
|
|
465
|
+
console.log('--result-header "bar" - When used with --import, designate a specific header column as result.')
|
|
466
|
+
console.log(`--json "FILE_NAME" - Import from a file with one JSON object per line: {input:"", result:"", data:{}}.`)
|
|
467
|
+
console.log(`--category "CATEGORY" - Specify category when adding data and filter by it when querying (defaults to empty string).`)
|
|
524
468
|
}
|
|
525
469
|
}
|
|
526
470
|
|