@lancedb/lancedb 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/.eslintignore +3 -0
  2. package/Cargo.toml +28 -0
  3. package/README.md +49 -0
  4. package/build.rs +5 -0
  5. package/eslint.config.js +28 -0
  6. package/examples/js/index.mjs +40 -0
  7. package/examples/js/package.json +14 -0
  8. package/examples/js-openai/index.mjs +43 -0
  9. package/examples/js-openai/package-lock.json +256 -0
  10. package/examples/js-openai/package.json +15 -0
  11. package/examples/js-transformers/index.mjs +65 -0
  12. package/examples/js-transformers/package-lock.json +1418 -0
  13. package/examples/js-transformers/package.json +15 -0
  14. package/examples/js-youtube-transcripts/index.mjs +135 -0
  15. package/examples/js-youtube-transcripts/package.json +15 -0
  16. package/examples/ts/data/sample-lancedb/vectors.lance/_latest.manifest +0 -0
  17. package/examples/ts/data/sample-lancedb/vectors.lance/_transactions/0-adde4e05-fcfc-415c-86a6-5b252cb9e79a.txn +0 -0
  18. package/examples/ts/data/sample-lancedb/vectors.lance/_versions/1.manifest +0 -0
  19. package/examples/ts/data/sample-lancedb/vectors.lance/data/3618b33e-3eea-4b5e-a0fc-7d1f718d551e.lance +0 -0
  20. package/examples/ts/package-lock.json +1340 -0
  21. package/examples/ts/package.json +22 -0
  22. package/examples/ts/tsconfig.json +10 -0
  23. package/jest.config.js +7 -0
  24. package/lancedb/arrow.ts +650 -0
  25. package/lancedb/connection.ts +176 -0
  26. package/lancedb/embedding/embedding_function.ts +78 -0
  27. package/lancedb/embedding/index.ts +2 -0
  28. package/lancedb/embedding/openai.ts +62 -0
  29. package/lancedb/index.ts +69 -0
  30. package/lancedb/indices.ts +203 -0
  31. package/lancedb/query.ts +375 -0
  32. package/lancedb/sanitize.ts +516 -0
  33. package/lancedb/table.ts +353 -0
  34. package/package.json +82 -0
  35. package/tsconfig.json +23 -0
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "vectordb-example-js-transformers",
3
+ "version": "1.0.0",
4
+ "description": "Example for using transformers.js with lancedb",
5
+ "main": "index.mjs",
6
+ "scripts": {
7
+ "test": "echo \"Error: no test specified\" && exit 1"
8
+ },
9
+ "author": "Lance Devs",
10
+ "license": "Apache-2.0",
11
+ "dependencies": {
12
+ "@xenova/transformers": "^2.4.1",
13
+ "@lancedb/lancedb": "file:../.."
14
+ }
15
+ }
@@ -0,0 +1,135 @@
1
+ // Copyright 2023 Lance Developers.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ "use strict";
16
+
17
+ // OpenAi does not follow camelCase naming convention
18
+ /* eslint-disable @typescript-eslint/naming-convention */
19
+
20
+ /* global console, process */
21
+
22
+ import * as lancedb from "@lancedb/lancedb";
23
+ import * as fs from "fs/promises";
24
+ import * as readline from "readline/promises";
25
+ import { stdin as input, stdout as output } from "process";
26
+ import { Configuration, OpenAIApi } from "openai";
27
+
28
+ // Download file from XYZ
29
+ const INPUT_FILE_NAME = "data/youtube-transcriptions_sample.jsonl";
30
+
31
+ (async () => {
32
+ // You need to provide an OpenAI API key, here we read it from the OPENAI_API_KEY environment variable
33
+ const apiKey = process.env.OPENAI_API_KEY;
34
+ // The embedding function will create embeddings for the 'context' column
35
+ const embedFunction = new lancedb.OpenAIEmbeddingFunction("context", apiKey);
36
+
37
+ // Connects to LanceDB
38
+ const db = await lancedb.connect("data/youtube-lancedb");
39
+
40
+ // Open the vectors table or create one if it does not exist
41
+ let tbl;
42
+ if ((await db.tableNames()).includes("vectors")) {
43
+ tbl = await db.openTable("vectors", embedFunction);
44
+ } else {
45
+ tbl = await createEmbeddingsTable(db, embedFunction);
46
+ }
47
+
48
+ // Use OpenAI Completion API to generate and answer based on the context that LanceDB provides
49
+ const configuration = new Configuration({ apiKey });
50
+ const openai = new OpenAIApi(configuration);
51
+ const rl = readline.createInterface({ input, output });
52
+ try {
53
+ // eslint-disable-next-line no-constant-condition
54
+ while (true) {
55
+ const query = await rl.question("Prompt: ");
56
+ const results = await tbl
57
+ .search(query)
58
+ .select(["title", "text", "context"])
59
+ .limit(3)
60
+ .execute();
61
+
62
+ // console.table(results)
63
+
64
+ const response = await openai.createCompletion({
65
+ model: "text-davinci-003",
66
+ prompt: createPrompt(query, results),
67
+ max_tokens: 400,
68
+ temperature: 0,
69
+ top_p: 1,
70
+ frequency_penalty: 0,
71
+ presence_penalty: 0,
72
+ });
73
+ console.log(response.data.choices[0].text);
74
+ }
75
+ } catch (err) {
76
+ console.log("Error: ", err);
77
+ } finally {
78
+ rl.close();
79
+ }
80
+ process.exit(1);
81
+ })();
82
+
83
+ async function createEmbeddingsTable(db, embedFunction) {
84
+ console.log(`Creating embeddings from ${INPUT_FILE_NAME}`);
85
+ // read the input file into a JSON array, skipping empty lines
86
+ const lines = (await fs.readFile(INPUT_FILE_NAME, "utf-8"))
87
+ .toString()
88
+ .split("\n")
89
+ .filter((line) => line.length > 0)
90
+ .map((line) => JSON.parse(line));
91
+
92
+ const data = contextualize(lines, 20, "video_id");
93
+ return await db.createTable("vectors", data, embedFunction);
94
+ }
95
+
96
+ // Each transcript has a small text column, we include previous transcripts in order to
97
+ // have more context information when creating embeddings
98
+ function contextualize(rows, contextSize, groupColumn) {
99
+ const grouped = [];
100
+ rows.forEach((row) => {
101
+ if (!grouped[row[groupColumn]]) {
102
+ grouped[row[groupColumn]] = [];
103
+ }
104
+ grouped[row[groupColumn]].push(row);
105
+ });
106
+
107
+ const data = [];
108
+ Object.keys(grouped).forEach((key) => {
109
+ for (let i = 0; i < grouped[key].length; i++) {
110
+ const start = i - contextSize > 0 ? i - contextSize : 0;
111
+ grouped[key][i].context = grouped[key]
112
+ .slice(start, i + 1)
113
+ .map((r) => r.text)
114
+ .join(" ");
115
+ }
116
+ data.push(...grouped[key]);
117
+ });
118
+ return data;
119
+ }
120
+
121
+ // Creates a prompt by aggregating all relevant contexts
122
+ function createPrompt(query, context) {
123
+ let prompt =
124
+ "Answer the question based on the context below.\n\n" + "Context:\n";
125
+
126
+ // need to make sure our prompt is not larger than max size
127
+ prompt =
128
+ prompt +
129
+ context
130
+ .map((c) => c.context)
131
+ .join("\n\n---\n\n")
132
+ .substring(0, 3750);
133
+ prompt = prompt + `\n\nQuestion: ${query}\nAnswer:`;
134
+ return prompt;
135
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "vectordb-example-js-youtube",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "index.mjs",
6
+ "scripts": {
7
+ "test": "echo \"Error: no test specified\" && exit 1"
8
+ },
9
+ "author": "Lance Devs",
10
+ "license": "Apache-2.0",
11
+ "dependencies": {
12
+ "@lancedb/lancedb": "file:../..",
13
+ "openai": "^3.2.1"
14
+ }
15
+ }