@lancedb/lancedb 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +3 -0
- package/Cargo.toml +28 -0
- package/README.md +49 -0
- package/build.rs +5 -0
- package/eslint.config.js +28 -0
- package/examples/js/index.mjs +40 -0
- package/examples/js/package.json +14 -0
- package/examples/js-openai/index.mjs +43 -0
- package/examples/js-openai/package-lock.json +256 -0
- package/examples/js-openai/package.json +15 -0
- package/examples/js-transformers/index.mjs +65 -0
- package/examples/js-transformers/package-lock.json +1418 -0
- package/examples/js-transformers/package.json +15 -0
- package/examples/js-youtube-transcripts/index.mjs +135 -0
- package/examples/js-youtube-transcripts/package.json +15 -0
- package/examples/ts/data/sample-lancedb/vectors.lance/_latest.manifest +0 -0
- package/examples/ts/data/sample-lancedb/vectors.lance/_transactions/0-adde4e05-fcfc-415c-86a6-5b252cb9e79a.txn +0 -0
- package/examples/ts/data/sample-lancedb/vectors.lance/_versions/1.manifest +0 -0
- package/examples/ts/data/sample-lancedb/vectors.lance/data/3618b33e-3eea-4b5e-a0fc-7d1f718d551e.lance +0 -0
- package/examples/ts/package-lock.json +1340 -0
- package/examples/ts/package.json +22 -0
- package/examples/ts/tsconfig.json +10 -0
- package/jest.config.js +7 -0
- package/lancedb/arrow.ts +650 -0
- package/lancedb/connection.ts +176 -0
- package/lancedb/embedding/embedding_function.ts +78 -0
- package/lancedb/embedding/index.ts +2 -0
- package/lancedb/embedding/openai.ts +62 -0
- package/lancedb/index.ts +69 -0
- package/lancedb/indices.ts +203 -0
- package/lancedb/query.ts +375 -0
- package/lancedb/sanitize.ts +516 -0
- package/lancedb/table.ts +353 -0
- package/package.json +82 -0
- package/tsconfig.json +23 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "vectordb-example-js-transformers",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Example for using transformers.js with lancedb",
|
|
5
|
+
"main": "index.mjs",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
8
|
+
},
|
|
9
|
+
"author": "Lance Devs",
|
|
10
|
+
"license": "Apache-2.0",
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@xenova/transformers": "^2.4.1",
|
|
13
|
+
"@lancedb/lancedb": "file:../.."
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
// Copyright 2023 Lance Developers.
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
|
|
15
|
+
"use strict";
|
|
16
|
+
|
|
17
|
+
// OpenAi does not follow camelCase naming convention
|
|
18
|
+
/* eslint-disable @typescript-eslint/naming-convention */
|
|
19
|
+
|
|
20
|
+
/* global console, process */
|
|
21
|
+
|
|
22
|
+
import * as lancedb from "@lancedb/lancedb";
|
|
23
|
+
import * as fs from "fs/promises";
|
|
24
|
+
import * as readline from "readline/promises";
|
|
25
|
+
import { stdin as input, stdout as output } from "process";
|
|
26
|
+
import { Configuration, OpenAIApi } from "openai";
|
|
27
|
+
|
|
28
|
+
// Download file from XYZ
|
|
29
|
+
const INPUT_FILE_NAME = "data/youtube-transcriptions_sample.jsonl";
|
|
30
|
+
|
|
31
|
+
(async () => {
|
|
32
|
+
// You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
|
|
33
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
34
|
+
// The embedding function will create embeddings for the 'context' column
|
|
35
|
+
const embedFunction = new lancedb.OpenAIEmbeddingFunction("context", apiKey);
|
|
36
|
+
|
|
37
|
+
// Connects to LanceDB
|
|
38
|
+
const db = await lancedb.connect("data/youtube-lancedb");
|
|
39
|
+
|
|
40
|
+
// Open the vectors table or create one if it does not exist
|
|
41
|
+
let tbl;
|
|
42
|
+
if ((await db.tableNames()).includes("vectors")) {
|
|
43
|
+
tbl = await db.openTable("vectors", embedFunction);
|
|
44
|
+
} else {
|
|
45
|
+
tbl = await createEmbeddingsTable(db, embedFunction);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
|
|
49
|
+
const configuration = new Configuration({ apiKey });
|
|
50
|
+
const openai = new OpenAIApi(configuration);
|
|
51
|
+
const rl = readline.createInterface({ input, output });
|
|
52
|
+
try {
|
|
53
|
+
// eslint-disable-next-line no-constant-condition
|
|
54
|
+
while (true) {
|
|
55
|
+
const query = await rl.question("Prompt: ");
|
|
56
|
+
const results = await tbl
|
|
57
|
+
.search(query)
|
|
58
|
+
.select(["title", "text", "context"])
|
|
59
|
+
.limit(3)
|
|
60
|
+
.execute();
|
|
61
|
+
|
|
62
|
+
// console.table(results)
|
|
63
|
+
|
|
64
|
+
const response = await openai.createCompletion({
|
|
65
|
+
model: "text-davinci-003",
|
|
66
|
+
prompt: createPrompt(query, results),
|
|
67
|
+
max_tokens: 400,
|
|
68
|
+
temperature: 0,
|
|
69
|
+
top_p: 1,
|
|
70
|
+
frequency_penalty: 0,
|
|
71
|
+
presence_penalty: 0,
|
|
72
|
+
});
|
|
73
|
+
console.log(response.data.choices[0].text);
|
|
74
|
+
}
|
|
75
|
+
} catch (err) {
|
|
76
|
+
console.log("Error: ", err);
|
|
77
|
+
} finally {
|
|
78
|
+
rl.close();
|
|
79
|
+
}
|
|
80
|
+
process.exit(1);
|
|
81
|
+
})();
|
|
82
|
+
|
|
83
|
+
async function createEmbeddingsTable(db, embedFunction) {
|
|
84
|
+
console.log(`Creating embeddings from ${INPUT_FILE_NAME}`);
|
|
85
|
+
// read the input file into a JSON array, skipping empty lines
|
|
86
|
+
const lines = (await fs.readFile(INPUT_FILE_NAME, "utf-8"))
|
|
87
|
+
.toString()
|
|
88
|
+
.split("\n")
|
|
89
|
+
.filter((line) => line.length > 0)
|
|
90
|
+
.map((line) => JSON.parse(line));
|
|
91
|
+
|
|
92
|
+
const data = contextualize(lines, 20, "video_id");
|
|
93
|
+
return await db.createTable("vectors", data, embedFunction);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Each transcript has a small text column, we include previous transcripts in order to
|
|
97
|
+
// have more context information when creating embeddings
|
|
98
|
+
function contextualize(rows, contextSize, groupColumn) {
|
|
99
|
+
const grouped = [];
|
|
100
|
+
rows.forEach((row) => {
|
|
101
|
+
if (!grouped[row[groupColumn]]) {
|
|
102
|
+
grouped[row[groupColumn]] = [];
|
|
103
|
+
}
|
|
104
|
+
grouped[row[groupColumn]].push(row);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const data = [];
|
|
108
|
+
Object.keys(grouped).forEach((key) => {
|
|
109
|
+
for (let i = 0; i < grouped[key].length; i++) {
|
|
110
|
+
const start = i - contextSize > 0 ? i - contextSize : 0;
|
|
111
|
+
grouped[key][i].context = grouped[key]
|
|
112
|
+
.slice(start, i + 1)
|
|
113
|
+
.map((r) => r.text)
|
|
114
|
+
.join(" ");
|
|
115
|
+
}
|
|
116
|
+
data.push(...grouped[key]);
|
|
117
|
+
});
|
|
118
|
+
return data;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Creates a prompt by aggregating all relevant contexts
|
|
122
|
+
function createPrompt(query, context) {
|
|
123
|
+
let prompt =
|
|
124
|
+
"Answer the question based on the context below.\n\n" + "Context:\n";
|
|
125
|
+
|
|
126
|
+
// need to make sure our prompt is not larger than max size
|
|
127
|
+
prompt =
|
|
128
|
+
prompt +
|
|
129
|
+
context
|
|
130
|
+
.map((c) => c.context)
|
|
131
|
+
.join("\n\n---\n\n")
|
|
132
|
+
.substring(0, 3750);
|
|
133
|
+
prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`;
|
|
134
|
+
return prompt;
|
|
135
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "vectordb-example-js-youtube",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "index.mjs",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
8
|
+
},
|
|
9
|
+
"author": "Lance Devs",
|
|
10
|
+
"license": "Apache-2.0",
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@lancedb/lancedb": "file:../..",
|
|
13
|
+
"openai": "^3.2.1"
|
|
14
|
+
}
|
|
15
|
+
}
|
|
Binary file
|
|
Binary file
|