@sjovanovic/recall.js 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -25
- package/package.json +1 -1
- package/recall.js +58 -61
package/README.md
CHANGED
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
|
|
7
7
|
Recall.js is long term memory for AI apps!
|
|
8
8
|
|
|
9
|
-
It is a
|
|
9
|
+
It is a tool for building RAG (Retrieval-augmented generation) in a form of JavaScript library and command line utility focused on speed, ease of use and embeddability.
|
|
10
10
|
|
|
11
|
-
It is versatile
|
|
11
|
+
It is versatile and you don't have to use it exclusively for RAG, use it for generic Semantic Search, as expert memory for your AI app, as a recommendation system, there are so many possibilities...
|
|
12
12
|
|
|
13
13
|
Recall.js supports multilingual embeddings out of the box so you can add data in one language and then query it in another.
|
|
14
14
|
|
|
15
|
-
Under the hood, recall.js uses sentence vector embeddings and a vector database to index and query your data. It is a light wrapper around local language models such as [MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2)
|
|
15
|
+
Under the hood, recall.js uses sentence vector embeddings and a vector database to index and query your data. It is a light wrapper around local language models such as [MiniLM-L12-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2) and [CozoDB](https://www.cozodb.org/) vector database.
|
|
16
16
|
|
|
17
17
|
## Install
|
|
18
18
|
|
|
@@ -20,7 +20,15 @@ Under the hood, recall.js uses sentence vector embeddings and a vector database
|
|
|
20
20
|
|
|
21
21
|
## Usage
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
Console:
|
|
24
|
+
|
|
25
|
+
```console
|
|
26
|
+
recall --add 'The quick brown fox jumps over the lazy dog|Fox|{"foo":"bar"}'
|
|
27
|
+
recall --query "Un animal saute par-dessus un autre animal" --limit 1
|
|
28
|
+
```
|
|
29
|
+
**Warning:** when this library is used for the first time, it will download a local language model MiniLM-L12-v2 which may take long time depending on your Internet connectivity. Please be patient.
|
|
30
|
+
|
|
31
|
+
Below is the same example in JavaScript:
|
|
24
32
|
|
|
25
33
|
```javascript
|
|
26
34
|
|
|
@@ -68,18 +76,11 @@ response:
|
|
|
68
76
|
|
|
69
77
|
```
|
|
70
78
|
|
|
71
|
-
Here's how the above example looks like in CLI:
|
|
72
|
-
|
|
73
|
-
```log
|
|
74
|
-
recall --add 'The quick brown fox jumps over the lazy dog|Fox|{"foo":"bar"}'
|
|
75
|
-
recall --query "Un animal saute par-dessus un autre animal" --limit 1
|
|
76
|
-
```
|
|
77
|
-
|
|
78
79
|
## Options
|
|
79
80
|
|
|
80
|
-
|
|
81
|
+
Easy way to view all the options is via command line:
|
|
81
82
|
|
|
82
|
-
```
|
|
83
|
+
```console
|
|
83
84
|
recall --help
|
|
84
85
|
|
|
85
86
|
Usage:
|
|
@@ -99,12 +100,12 @@ Options:
|
|
|
99
100
|
--json "FILE_NAME" - import from file which has one json object per line: {input:"", result:"", data:{}}
|
|
100
101
|
```
|
|
101
102
|
|
|
102
|
-
Note when adding data recall will generate unique id automatically. To set custom id add it as a string property named "id" in the data object (i.e. `{"id":"customID"}`).
|
|
103
|
+
**Note:** when adding data recall will generate unique id automatically. To set custom id add it as a string property named "id" in the data object (i.e. `{"id":"customID"}`).
|
|
103
104
|
|
|
104
105
|
|
|
105
106
|
## JavaScript API Reference
|
|
106
107
|
|
|
107
|
-
|
|
108
|
+
### RECALL.config
|
|
108
109
|
|
|
109
110
|
Configuration object.
|
|
110
111
|
|
|
@@ -118,19 +119,19 @@ export const config = {
|
|
|
118
119
|
}
|
|
119
120
|
```
|
|
120
121
|
|
|
121
|
-
|
|
122
|
+
### RECALL.getDb()
|
|
122
123
|
|
|
123
124
|
Returns reference to the CozoDB instance.
|
|
124
125
|
|
|
125
|
-
|
|
126
|
+
### RECALL.getEmbeddings(text) -> Promise(Array)
|
|
126
127
|
|
|
127
128
|
Given text calculates the embeddings vector
|
|
128
129
|
|
|
129
|
-
|
|
130
|
+
### RECALL.add(input, result, data={}) -> Promise(Object)
|
|
130
131
|
|
|
131
132
|
Add data. `input` is the sentence to get embeddings from. `result` is the string to show in the results. `data` is arbitrary object intended to hold related pieces of information and references. If `data` object contains `id` property it will be used as unique id of the record.
|
|
132
133
|
|
|
133
|
-
|
|
134
|
+
### RECALL.addBatch(batch) -> Promise(Object)
|
|
134
135
|
|
|
135
136
|
Add data in batches (faster than using add repeteadely).
|
|
136
137
|
`batch` is an Array that looks like this:
|
|
@@ -138,19 +139,19 @@ Add data in batches (faster than using add repeteadely).
|
|
|
138
139
|
let batch = [{input:"", result:"", data:{}}]
|
|
139
140
|
```
|
|
140
141
|
|
|
141
|
-
|
|
142
|
+
### RECALL.remove(id) -> Promise(Object)
|
|
142
143
|
|
|
143
144
|
Remove data by id. id is a string.
|
|
144
145
|
|
|
145
|
-
|
|
146
|
+
### RECALL.searchText(text, numResults = 5) -> Promise(Object)
|
|
146
147
|
|
|
147
148
|
Query the vector database. Accepts query text and number of results to return.
|
|
148
149
|
|
|
149
|
-
|
|
150
|
+
### RECALL.nuke()
|
|
150
151
|
|
|
151
152
|
Deletes the database.
|
|
152
153
|
|
|
153
|
-
|
|
154
|
+
### RECALL.importFromJSONStream(fileName) -> Promise(object)
|
|
154
155
|
|
|
155
156
|
Imports from readable stream or file which consists of JSON objects, one per line. e.g.
|
|
156
157
|
```
|
|
@@ -160,13 +161,13 @@ Imports from readable stream or file which consists of JSON objects, one per lin
|
|
|
160
161
|
```
|
|
161
162
|
This is the most efficient way to import data.
|
|
162
163
|
|
|
163
|
-
|
|
164
|
+
### RECALL.importFromCSVorTSV(fileName, inputHeader=null, resultHeader=null) -> Promise()
|
|
164
165
|
|
|
165
166
|
Imports from CSV or TSV file. By default fist column is used as input, second as result and remaining columns are put in the data object.
|
|
166
167
|
If `inputHeader` is specified, function will try to find the column by that name and use it as input.
|
|
167
168
|
If `resultHeader` is specified, function will try to find the column by that name and use it as result.
|
|
168
169
|
|
|
169
|
-
|
|
170
|
+
### RECALL.mcp() -> Promise()
|
|
170
171
|
|
|
171
172
|
(Experimental)
|
|
172
173
|
Runs MCP server and makes the results available when mentioning `Recall search` in the prompt. Currently only supports STDIO.
|
package/package.json
CHANGED
package/recall.js
CHANGED
|
@@ -23,22 +23,26 @@ export const config = {
|
|
|
23
23
|
PATH: PATH // directory of recall.js
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
var db = null
|
|
26
|
+
var db = null, initDone = false
|
|
27
27
|
|
|
28
28
|
export const getDb = () => {
|
|
29
|
-
if(!db)
|
|
29
|
+
if(!db) {
|
|
30
|
+
db = new CozoDb('sqlite', config.DB_FILE)
|
|
31
|
+
}
|
|
30
32
|
return db
|
|
31
33
|
}
|
|
32
34
|
|
|
33
35
|
async function printQuery(query, params = {}) {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if(isCreated) console.log('Created embeddings table.')
|
|
40
|
-
}catch(err) {}
|
|
36
|
+
|
|
37
|
+
try{
|
|
38
|
+
if(!initDone) {
|
|
39
|
+
initDone = true
|
|
40
|
+
await createTable()
|
|
41
41
|
}
|
|
42
|
+
}catch(err) {
|
|
43
|
+
//console.log('CREATE TABLE ERROR', err)
|
|
44
|
+
}
|
|
45
|
+
try {
|
|
42
46
|
let data = getDb().run(query, params)
|
|
43
47
|
return data
|
|
44
48
|
}catch(err){
|
|
@@ -57,7 +61,7 @@ export const getEmbeddings = async (text) => {
|
|
|
57
61
|
|
|
58
62
|
export const createTable = async () => {
|
|
59
63
|
// create table (id, v, input, result, data)
|
|
60
|
-
let tableCreated = await printQuery(`:create embeddings {id: String => v: <F32; ${config.VECTOR_SIZE}>, input: String, result: String, data: Json}`)
|
|
64
|
+
let tableCreated = await printQuery(`:create embeddings {id: String, category: String => v: <F32; ${config.VECTOR_SIZE}>, input: String, result: String, data: Json}`)
|
|
61
65
|
if(tableCreated){
|
|
62
66
|
// create index
|
|
63
67
|
let indexCreated = await printQuery(`::hnsw create embeddings:index_name {
|
|
@@ -67,7 +71,6 @@ export const createTable = async () => {
|
|
|
67
71
|
fields: [v],
|
|
68
72
|
distance: L2, # Cosine, IP
|
|
69
73
|
ef_construction:50, # number of nearest neighbors
|
|
70
|
-
#filter: k != 'foo', # only those rows for which the expression evaluates to true are indexed
|
|
71
74
|
extend_candidates: false, # include nearest neighbors of the nearest neighbors
|
|
72
75
|
keep_pruned_connections: false,
|
|
73
76
|
}`)
|
|
@@ -76,21 +79,15 @@ export const createTable = async () => {
|
|
|
76
79
|
return false
|
|
77
80
|
}
|
|
78
81
|
|
|
79
|
-
export const add = async (input, result, data={}) => {
|
|
82
|
+
export const add = async (input, result, data={}, category="") => {
|
|
80
83
|
if(!input || !result) return
|
|
81
84
|
|
|
82
85
|
input = sanitizeString(input)
|
|
83
86
|
result = sanitizeString(result)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
87
|
const embedding = await getEmbeddings(input)
|
|
88
|
-
|
|
89
|
-
console.log('Adding', input, '->', result)
|
|
90
|
-
|
|
91
88
|
let id = data.id || Math.random().toString().substring(2)
|
|
92
|
-
return await printQuery(`?[id, v, input, result, data] <- [["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input.replaceAll('"', "'"))}, ${JSON.stringify(result.replaceAll('"', "'"))}, ${JSON.stringify(data)} ]]
|
|
93
|
-
:put embeddings {id => v, input, result, data}
|
|
89
|
+
return await printQuery(`?[id, v, input, result, data, category] <- [["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input.replaceAll('"', "'"))}, ${JSON.stringify(result.replaceAll('"', "'"))}, ${JSON.stringify(data)}, ${JSON.stringify(category.replaceAll('"', "'"))} ]]
|
|
90
|
+
:put embeddings {id, category => v, input, result, data}
|
|
94
91
|
`)
|
|
95
92
|
}
|
|
96
93
|
|
|
@@ -106,25 +103,26 @@ export const addBatch = async (batch) => {
|
|
|
106
103
|
if(!batch || !Array.isArray(batch)) return
|
|
107
104
|
let vectorBatch = []
|
|
108
105
|
for(let i=0;i<batch.length; i++){
|
|
109
|
-
let {input, result, data} = batch[i]
|
|
106
|
+
let {input, result, data, category} = batch[i]
|
|
110
107
|
|
|
111
108
|
if(!input || !result) continue
|
|
112
109
|
if(!data) data = {}
|
|
110
|
+
if(!category) category = ''
|
|
113
111
|
const embedding = await getEmbeddings(input)
|
|
114
112
|
batch[i].embedding = embedding
|
|
115
113
|
let item = ''
|
|
116
114
|
if(i == 0) {
|
|
117
|
-
item += `?[id, v, input, result, data] <- [`
|
|
115
|
+
item += `?[id, v, input, result, data, category] <- [`
|
|
118
116
|
}
|
|
119
117
|
|
|
120
118
|
input = sanitizeString(input)
|
|
121
119
|
result = sanitizeString(result)
|
|
122
120
|
|
|
123
121
|
let id = data?.id ? data.id : Math.random().toString().substring(2)
|
|
124
|
-
item += `["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input)}, ${JSON.stringify(result)}, ${JSON.stringify(data)} ],`
|
|
122
|
+
item += `["${id}", ${JSON.stringify(embedding)}, ${JSON.stringify(input)}, ${JSON.stringify(result)}, ${JSON.stringify(data)}, ${JSON.stringify(category)} ],`
|
|
125
123
|
if(i == batch.length-1) {
|
|
126
124
|
item += `]
|
|
127
|
-
:put embeddings {id => v, input, result, data}`
|
|
125
|
+
:put embeddings {id, category => v, input, result, data}`
|
|
128
126
|
}
|
|
129
127
|
vectorBatch.push(item)
|
|
130
128
|
}
|
|
@@ -135,31 +133,38 @@ const sanitizeString = (str)=>{
|
|
|
135
133
|
return str.replace(/[\/#$%\^&\*{}=_`~()\"]/g," ").replace(/\s{2,}/g, " ")
|
|
136
134
|
}
|
|
137
135
|
|
|
138
|
-
export const remove = async (id) => {
|
|
136
|
+
export const remove = async (id, category="") => {
|
|
139
137
|
if(!id || typeof id != 'string') return
|
|
140
138
|
id.replace(/[^a-zA-Z0-9]/g, '')
|
|
141
139
|
if(!id) return
|
|
142
140
|
let results = await printQuery(
|
|
143
|
-
`?[id] <- [['${id}']]
|
|
141
|
+
`?[id, category] <- [['${id}', '${category}']]
|
|
144
142
|
::remove embeddings {id}`)
|
|
145
143
|
return results
|
|
146
144
|
}
|
|
147
145
|
|
|
148
|
-
export const searchText = async (text, numResults = 5) => {
|
|
146
|
+
export const searchText = async (text, category="", numResults = 5) => {
|
|
149
147
|
const embedding = await getEmbeddings(text)
|
|
150
|
-
let results = await printQuery(`?[dist, result, id, data] := ~embeddings:index_name{ id, v, input, result, data |
|
|
148
|
+
let results = await printQuery(`?[dist, result, id, data, category] := ~embeddings:index_name { id, v, input, result, data, category |
|
|
151
149
|
query: q,
|
|
152
150
|
k: ${numResults}, # number of results
|
|
153
|
-
ef:
|
|
151
|
+
ef: 50, # number of neighbours to consider
|
|
154
152
|
bind_distance: dist,
|
|
153
|
+
filter: category==${JSON.stringify(category)},
|
|
155
154
|
radius: 10.0
|
|
156
155
|
}, q = vec(${JSON.stringify(embedding)})
|
|
157
|
-
:sort dist`)
|
|
156
|
+
:sort -dist`)
|
|
158
157
|
return results
|
|
159
158
|
}
|
|
160
159
|
|
|
161
|
-
export const vectorSearch = async (query, numResults=5) => {
|
|
162
|
-
|
|
160
|
+
export const vectorSearch = async (query, category='', numResults=5) => {
|
|
161
|
+
let result = undefined
|
|
162
|
+
try{
|
|
163
|
+
result = await searchText(query, category, numResults)
|
|
164
|
+
}catch(err){
|
|
165
|
+
if(config.SHOW_ERRORS) console.error(err.display || err.message)
|
|
166
|
+
}
|
|
167
|
+
return result
|
|
163
168
|
}
|
|
164
169
|
|
|
165
170
|
const cmdArgs = (list = []) => {
|
|
@@ -274,19 +279,6 @@ export const importFromCSVorTSV = async (fileName, inputHeader, resultHeader) =>
|
|
|
274
279
|
|
|
275
280
|
let results = await fetchFromFile(fileName)
|
|
276
281
|
|
|
277
|
-
// // split results to sentences
|
|
278
|
-
// let results_raw = await fetchFromFile(fileName)
|
|
279
|
-
// let results = []
|
|
280
|
-
// for(let i=0;i<results_raw.length; i++){
|
|
281
|
-
// let sentences = splitSentences(results_raw[i].input)
|
|
282
|
-
// for(let j=0; j<sentences.length; j++){
|
|
283
|
-
// results.push({
|
|
284
|
-
// ...results_raw[i],
|
|
285
|
-
// ...{ input: sentences[j] }
|
|
286
|
-
// })
|
|
287
|
-
// }
|
|
288
|
-
// }
|
|
289
|
-
|
|
290
282
|
let batchSize = 40, batch = [], currentBatch = 0, totalBatches = Math.ceil(results.length / batchSize), dataHeaders = Object.keys(results[results.length-1]).filter(k => k != 'input' && k != 'result'), data
|
|
291
283
|
for(let i=0; i<results.length; i++){
|
|
292
284
|
if(i % batchSize === 0){
|
|
@@ -431,18 +423,22 @@ const splitSentences = (text) => {
|
|
|
431
423
|
}
|
|
432
424
|
|
|
433
425
|
const runCLI = async () => {
|
|
434
|
-
let args = cmdArgs(['--query', '-q', '--add', '--db', '--import', '--json', '--mcp', '--nuke', '--input-header', '--result-header', '--test', '--limit'])
|
|
426
|
+
let args = cmdArgs(['--query', '-q', '--add', '--db', '--import', '--json', '--mcp', '--nuke', '--input-header', '--result-header', '--test', '--limit', '--category'])
|
|
435
427
|
let query = args['--query'] || args['-q']
|
|
436
428
|
if(args['--db']){
|
|
437
429
|
config.DB_FILE = args['--db']
|
|
438
430
|
}
|
|
431
|
+
let category = ''
|
|
432
|
+
if(args['--category']) {
|
|
433
|
+
category = args['--category']
|
|
434
|
+
}
|
|
439
435
|
if(query){
|
|
440
436
|
let numResults = 5
|
|
441
437
|
if(args['--limit'] && parseInt(args['--limit'])) {
|
|
442
438
|
numResults = parseInt(args['--limit'])
|
|
443
439
|
}
|
|
444
440
|
console.time('Search time')
|
|
445
|
-
let result = await vectorSearch(query, numResults)
|
|
441
|
+
let result = await vectorSearch(query, category, numResults)
|
|
446
442
|
console.timeEnd('Search time')
|
|
447
443
|
console.log('Results:')
|
|
448
444
|
console.log(JSON.stringify(result, null, 2))
|
|
@@ -451,17 +447,17 @@ const runCLI = async () => {
|
|
|
451
447
|
if(!input || !result) {
|
|
452
448
|
console.log('Usage:')
|
|
453
449
|
return console.log(args._cmd + `--add 'input|result|{"foo":"bar"}'`)
|
|
454
|
-
}
|
|
450
|
+
}
|
|
455
451
|
let data = {}
|
|
456
452
|
if(dataString) {
|
|
457
453
|
try {data = JSON.parse(dataString)}catch(err) {}
|
|
458
454
|
}
|
|
459
|
-
let resp = await add(input, result, data)
|
|
455
|
+
let resp = await add(input, result, data, category)
|
|
460
456
|
console.log(JSON.stringify(resp, null, 2))
|
|
461
457
|
}else if(args['--remove']){
|
|
462
458
|
let id = args['--remove']
|
|
463
459
|
if(!id) return console.log('Please specify ID to remove')
|
|
464
|
-
let resp = await remove(id)
|
|
460
|
+
let resp = await remove(id, category)
|
|
465
461
|
console.log(JSON.stringify(resp, null, 2))
|
|
466
462
|
}else if(args['--nuke'] != undefined){
|
|
467
463
|
nuke()
|
|
@@ -481,17 +477,18 @@ const runCLI = async () => {
|
|
|
481
477
|
console.log('Usage:')
|
|
482
478
|
console.log(args._cmd + ' --query "Foo Bar"')
|
|
483
479
|
console.log("\n" + 'Options:')
|
|
484
|
-
console.log('--query "SEARCH_STRING"
|
|
485
|
-
console.log('--limit 2
|
|
486
|
-
console.log(`--add 'input|result|{"foo":"bar"}'
|
|
487
|
-
console.log(`--remove 'id'
|
|
488
|
-
console.log(`--nuke
|
|
489
|
-
console.log(`--mcp
|
|
490
|
-
console.log(`--db "FILE_NAME"
|
|
491
|
-
console.log(`--import "file.csv | file.tsv"
|
|
492
|
-
console.log('--input-header "foo"
|
|
493
|
-
console.log('--result-header "bar"
|
|
494
|
-
console.log(`--json "FILE_NAME"
|
|
480
|
+
console.log('--query "SEARCH_STRING" - search')
|
|
481
|
+
console.log('--limit 2 - limit number of results (used with --query)')
|
|
482
|
+
console.log(`--add 'input|result|{"foo":"bar"}|categ' - add data`)
|
|
483
|
+
console.log(`--remove 'id' - remove data`)
|
|
484
|
+
console.log(`--nuke - destroy database`)
|
|
485
|
+
console.log(`--mcp - run as MCP server (experimental)`)
|
|
486
|
+
console.log(`--db "FILE_NAME" - database file (SQLite)`)
|
|
487
|
+
console.log(`--import "file.csv | file.tsv" - import from CSV or TSV w/ columns: 1. input 2. result 3. and remaining columns are additional data`)
|
|
488
|
+
console.log('--input-header "foo" - when used with --import designates specific header column as input')
|
|
489
|
+
console.log('--result-header "bar" - when used with --import designates specific header column as result')
|
|
490
|
+
console.log(`--json "FILE_NAME" - import from file which has one json object per line: {input:"", result:"", data:{}}`)
|
|
491
|
+
console.log(`--category "CATEGORY" - specify category when adding data and to filter by when querying (defaults to empty string)`)
|
|
495
492
|
}
|
|
496
493
|
}
|
|
497
494
|
|