@mastra/rag 0.0.2-alpha.15 → 0.0.2-alpha.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/document/document.d.ts +6 -4
- package/dist/rag.cjs.development.js +58 -29
- package/dist/rag.cjs.development.js.map +1 -1
- package/dist/rag.cjs.production.min.js +1 -1
- package/dist/rag.cjs.production.min.js.map +1 -1
- package/dist/rag.esm.js +58 -29
- package/dist/rag.esm.js.map +1 -1
- package/package.json +2 -2
- package/src/document/document.test.ts +17 -2
- package/src/document/document.ts +48 -33
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# @mastra/rag
|
|
2
2
|
|
|
3
|
+
## 0.0.2-alpha.17
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- e1dd94a: update the api for embeddings
|
|
8
|
+
- Updated dependencies [e1dd94a]
|
|
9
|
+
- @mastra/core@0.1.27-alpha.33
|
|
10
|
+
|
|
11
|
+
## 0.0.2-alpha.16
|
|
12
|
+
|
|
13
|
+
### Patch Changes
|
|
14
|
+
|
|
15
|
+
- Updated dependencies [2712098]
|
|
16
|
+
- @mastra/core@0.1.27-alpha.32
|
|
17
|
+
|
|
3
18
|
## 0.0.2-alpha.15
|
|
4
19
|
|
|
5
20
|
### Patch Changes
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { EmbeddingOptions } from '@mastra/core';
|
|
2
|
+
import { Document as Chunk } from 'llamaindex';
|
|
2
3
|
import { ChunkOptions, ChunkStrategy, ExtractParams } from './types';
|
|
3
4
|
export declare class MastraDocument {
|
|
4
|
-
private
|
|
5
|
+
private chunks;
|
|
5
6
|
private type;
|
|
6
7
|
constructor({ docs, type }: {
|
|
7
8
|
docs: {
|
|
@@ -28,8 +29,9 @@ export declare class MastraDocument {
|
|
|
28
29
|
strategy?: ChunkStrategy;
|
|
29
30
|
options?: ChunkOptions;
|
|
30
31
|
extract?: ExtractParams;
|
|
31
|
-
}): Promise<MastraDocument>;
|
|
32
|
-
|
|
32
|
+
}): Promise<MastraDocument['chunks']>;
|
|
33
|
+
embed(chunk: Chunk | string | string[] | Chunk[], options: Omit<EmbeddingOptions, 'value'>): Promise<import("ai").EmbedManyResult<string> | import("ai").EmbedResult<string>>;
|
|
34
|
+
getDocs(): Chunk[];
|
|
33
35
|
getText(): string[];
|
|
34
36
|
getMetadata(): Record<string, any>[];
|
|
35
37
|
}
|
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
4
|
|
|
5
|
+
var core = require('@mastra/core');
|
|
5
6
|
var llamaindex = require('llamaindex');
|
|
6
7
|
var jsdom = require('jsdom');
|
|
7
8
|
var tiktoken = require('@dqbd/tiktoken');
|
|
8
|
-
var core = require('@mastra/core');
|
|
9
9
|
var pg = require('pg');
|
|
10
10
|
var pinecone = require('@pinecone-database/pinecone');
|
|
11
11
|
var astraDbTs = require('@datastax/astra-db-ts');
|
|
@@ -1602,9 +1602,9 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1602
1602
|
function MastraDocument(_ref) {
|
|
1603
1603
|
var docs = _ref.docs,
|
|
1604
1604
|
type = _ref.type;
|
|
1605
|
-
this.
|
|
1605
|
+
this.chunks = void 0;
|
|
1606
1606
|
this.type = void 0;
|
|
1607
|
-
this.
|
|
1607
|
+
this.chunks = docs.map(function (d) {
|
|
1608
1608
|
return new llamaindex.Document({
|
|
1609
1609
|
text: d.text,
|
|
1610
1610
|
metadata: d.metadata
|
|
@@ -1638,11 +1638,11 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1638
1638
|
});
|
|
1639
1639
|
_context.next = 9;
|
|
1640
1640
|
return pipeline.run({
|
|
1641
|
-
documents: this.
|
|
1641
|
+
documents: this.chunks
|
|
1642
1642
|
});
|
|
1643
1643
|
case 9:
|
|
1644
1644
|
nodes = _context.sent;
|
|
1645
|
-
this.
|
|
1645
|
+
this.chunks = this.chunks.map(function (doc, i) {
|
|
1646
1646
|
var _nodes$i;
|
|
1647
1647
|
return new llamaindex.Document({
|
|
1648
1648
|
text: doc.text,
|
|
@@ -1777,8 +1777,8 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1777
1777
|
break;
|
|
1778
1778
|
}
|
|
1779
1779
|
_rt = RecursiveCharacterTransformer.fromLanguage(options.language, options);
|
|
1780
|
-
_textSplit = _rt.transformDocuments(this.
|
|
1781
|
-
this.
|
|
1780
|
+
_textSplit = _rt.transformDocuments(this.chunks);
|
|
1781
|
+
this.chunks = _textSplit;
|
|
1782
1782
|
return _context3.abrupt("return");
|
|
1783
1783
|
case 5:
|
|
1784
1784
|
rt = new RecursiveCharacterTransformer({
|
|
@@ -1786,8 +1786,8 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1786
1786
|
isSeparatorRegex: options == null ? void 0 : options.isSeparatorRegex,
|
|
1787
1787
|
options: options
|
|
1788
1788
|
});
|
|
1789
|
-
textSplit = rt.transformDocuments(this.
|
|
1790
|
-
this.
|
|
1789
|
+
textSplit = rt.transformDocuments(this.chunks);
|
|
1790
|
+
this.chunks = textSplit;
|
|
1791
1791
|
case 8:
|
|
1792
1792
|
case "end":
|
|
1793
1793
|
return _context3.stop();
|
|
@@ -1810,8 +1810,8 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1810
1810
|
isSeparatorRegex: options == null ? void 0 : options.isSeparatorRegex,
|
|
1811
1811
|
options: options
|
|
1812
1812
|
});
|
|
1813
|
-
textSplit = rt.transformDocuments(this.
|
|
1814
|
-
this.
|
|
1813
|
+
textSplit = rt.transformDocuments(this.chunks);
|
|
1814
|
+
this.chunks = textSplit;
|
|
1815
1815
|
case 3:
|
|
1816
1816
|
case "end":
|
|
1817
1817
|
return _context4.stop();
|
|
@@ -1835,8 +1835,8 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1835
1835
|
break;
|
|
1836
1836
|
}
|
|
1837
1837
|
rt = new HTMLHeaderTransformer(options.headers, options == null ? void 0 : options.returnEachLine);
|
|
1838
|
-
textSplit = rt.transformDocuments(this.
|
|
1839
|
-
this.
|
|
1838
|
+
textSplit = rt.transformDocuments(this.chunks);
|
|
1839
|
+
this.chunks = textSplit;
|
|
1840
1840
|
return _context5.abrupt("return");
|
|
1841
1841
|
case 5:
|
|
1842
1842
|
if (!(options != null && (_options$sections = options.sections) != null && _options$sections.length)) {
|
|
@@ -1844,8 +1844,8 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1844
1844
|
break;
|
|
1845
1845
|
}
|
|
1846
1846
|
_rt2 = new HTMLSectionTransformer(options.sections);
|
|
1847
|
-
_textSplit2 = _rt2.transformDocuments(this.
|
|
1848
|
-
this.
|
|
1847
|
+
_textSplit2 = _rt2.transformDocuments(this.chunks);
|
|
1848
|
+
this.chunks = _textSplit2;
|
|
1849
1849
|
return _context5.abrupt("return");
|
|
1850
1850
|
case 10:
|
|
1851
1851
|
throw new Error('HTML chunking requires either headers or sections to be specified');
|
|
@@ -1877,11 +1877,11 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1877
1877
|
minChunkSize: options == null ? void 0 : options.minChunkSize
|
|
1878
1878
|
});
|
|
1879
1879
|
textSplit = rt.transformDocuments({
|
|
1880
|
-
documents: this.
|
|
1880
|
+
documents: this.chunks,
|
|
1881
1881
|
ensureAscii: options == null ? void 0 : options.ensureAscii,
|
|
1882
1882
|
convertLists: options == null ? void 0 : options.convertLists
|
|
1883
1883
|
});
|
|
1884
|
-
this.
|
|
1884
|
+
this.chunks = textSplit;
|
|
1885
1885
|
case 5:
|
|
1886
1886
|
case "end":
|
|
1887
1887
|
return _context6.stop();
|
|
@@ -1900,8 +1900,8 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1900
1900
|
while (1) switch (_context7.prev = _context7.next) {
|
|
1901
1901
|
case 0:
|
|
1902
1902
|
rt = new LatexTransformer(options);
|
|
1903
|
-
textSplit = rt.transformDocuments(this.
|
|
1904
|
-
this.
|
|
1903
|
+
textSplit = rt.transformDocuments(this.chunks);
|
|
1904
|
+
this.chunks = textSplit;
|
|
1905
1905
|
case 3:
|
|
1906
1906
|
case "end":
|
|
1907
1907
|
return _context7.stop();
|
|
@@ -1924,8 +1924,8 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1924
1924
|
encodingName: options == null ? void 0 : options.encodingName,
|
|
1925
1925
|
modelName: options == null ? void 0 : options.modelName
|
|
1926
1926
|
});
|
|
1927
|
-
textSplit = rt.transformDocuments(this.
|
|
1928
|
-
this.
|
|
1927
|
+
textSplit = rt.transformDocuments(this.chunks);
|
|
1928
|
+
this.chunks = textSplit;
|
|
1929
1929
|
rt.dispose();
|
|
1930
1930
|
case 4:
|
|
1931
1931
|
case "end":
|
|
@@ -1949,13 +1949,13 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1949
1949
|
break;
|
|
1950
1950
|
}
|
|
1951
1951
|
_rt3 = new MarkdownHeaderTransformer(options.headers, options == null ? void 0 : options.returnEachLine, options == null ? void 0 : options.stripHeaders);
|
|
1952
|
-
_textSplit3 = _rt3.transformDocuments(this.
|
|
1953
|
-
this.
|
|
1952
|
+
_textSplit3 = _rt3.transformDocuments(this.chunks);
|
|
1953
|
+
this.chunks = _textSplit3;
|
|
1954
1954
|
return _context9.abrupt("return");
|
|
1955
1955
|
case 5:
|
|
1956
1956
|
rt = new MarkdownTransformer(options);
|
|
1957
|
-
textSplit = rt.transformDocuments(this.
|
|
1958
|
-
this.
|
|
1957
|
+
textSplit = rt.transformDocuments(this.chunks);
|
|
1958
|
+
this.chunks = textSplit;
|
|
1959
1959
|
case 8:
|
|
1960
1960
|
case "end":
|
|
1961
1961
|
return _context9.stop();
|
|
@@ -1985,7 +1985,7 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1985
1985
|
_context10.next = 6;
|
|
1986
1986
|
return this.extract(params.extract);
|
|
1987
1987
|
case 6:
|
|
1988
|
-
return _context10.abrupt("return", this);
|
|
1988
|
+
return _context10.abrupt("return", this.chunks);
|
|
1989
1989
|
case 7:
|
|
1990
1990
|
case "end":
|
|
1991
1991
|
return _context10.stop();
|
|
@@ -1997,16 +1997,45 @@ var MastraDocument = /*#__PURE__*/function () {
|
|
|
1997
1997
|
}
|
|
1998
1998
|
return chunk;
|
|
1999
1999
|
}();
|
|
2000
|
+
_proto.embed = /*#__PURE__*/function () {
|
|
2001
|
+
var _embed2 = /*#__PURE__*/_asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee11(chunk, options) {
|
|
2002
|
+
var value;
|
|
2003
|
+
return _regeneratorRuntime().wrap(function _callee11$(_context11) {
|
|
2004
|
+
while (1) switch (_context11.prev = _context11.next) {
|
|
2005
|
+
case 0:
|
|
2006
|
+
if (Array.isArray(chunk)) {
|
|
2007
|
+
value = chunk.map(function (chunk) {
|
|
2008
|
+
return typeof chunk === 'string' ? chunk : chunk.getText();
|
|
2009
|
+
});
|
|
2010
|
+
} else if (chunk instanceof llamaindex.Document) {
|
|
2011
|
+
value = chunk.getText();
|
|
2012
|
+
} else {
|
|
2013
|
+
value = chunk;
|
|
2014
|
+
}
|
|
2015
|
+
return _context11.abrupt("return", core.embed(_extends({}, options, {
|
|
2016
|
+
value: value
|
|
2017
|
+
})));
|
|
2018
|
+
case 2:
|
|
2019
|
+
case "end":
|
|
2020
|
+
return _context11.stop();
|
|
2021
|
+
}
|
|
2022
|
+
}, _callee11);
|
|
2023
|
+
}));
|
|
2024
|
+
function embed(_x12, _x13) {
|
|
2025
|
+
return _embed2.apply(this, arguments);
|
|
2026
|
+
}
|
|
2027
|
+
return embed;
|
|
2028
|
+
}();
|
|
2000
2029
|
_proto.getDocs = function getDocs() {
|
|
2001
|
-
return this.
|
|
2030
|
+
return this.chunks;
|
|
2002
2031
|
};
|
|
2003
2032
|
_proto.getText = function getText() {
|
|
2004
|
-
return this.
|
|
2033
|
+
return this.chunks.map(function (doc) {
|
|
2005
2034
|
return doc.text;
|
|
2006
2035
|
});
|
|
2007
2036
|
};
|
|
2008
2037
|
_proto.getMetadata = function getMetadata() {
|
|
2009
|
-
return this.
|
|
2038
|
+
return this.chunks.map(function (doc) {
|
|
2010
2039
|
return doc.metadata;
|
|
2011
2040
|
});
|
|
2012
2041
|
};
|