@mastra/rag 0.0.2-alpha.15 → 0.0.2-alpha.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # @mastra/rag
2
2
 
3
+ ## 0.0.2-alpha.17
4
+
5
+ ### Patch Changes
6
+
7
+ - e1dd94a: update the api for embeddings
8
+ - Updated dependencies [e1dd94a]
9
+ - @mastra/core@0.1.27-alpha.33
10
+
11
+ ## 0.0.2-alpha.16
12
+
13
+ ### Patch Changes
14
+
15
+ - Updated dependencies [2712098]
16
+ - @mastra/core@0.1.27-alpha.32
17
+
3
18
  ## 0.0.2-alpha.15
4
19
 
5
20
  ### Patch Changes
@@ -1,7 +1,8 @@
1
- import { Document } from 'llamaindex';
1
+ import { EmbeddingOptions } from '@mastra/core';
2
+ import { Document as Chunk } from 'llamaindex';
2
3
  import { ChunkOptions, ChunkStrategy, ExtractParams } from './types';
3
4
  export declare class MastraDocument {
4
- private docs;
5
+ private chunks;
5
6
  private type;
6
7
  constructor({ docs, type }: {
7
8
  docs: {
@@ -28,8 +29,9 @@ export declare class MastraDocument {
28
29
  strategy?: ChunkStrategy;
29
30
  options?: ChunkOptions;
30
31
  extract?: ExtractParams;
31
- }): Promise<MastraDocument>;
32
- getDocs(): Document[];
32
+ }): Promise<MastraDocument['chunks']>;
33
+ embed(chunk: Chunk | string | string[] | Chunk[], options: Omit<EmbeddingOptions, 'value'>): Promise<import("ai").EmbedManyResult<string> | import("ai").EmbedResult<string>>;
34
+ getDocs(): Chunk[];
33
35
  getText(): string[];
34
36
  getMetadata(): Record<string, any>[];
35
37
  }
@@ -2,10 +2,10 @@
2
2
 
3
3
  Object.defineProperty(exports, '__esModule', { value: true });
4
4
 
5
+ var core = require('@mastra/core');
5
6
  var llamaindex = require('llamaindex');
6
7
  var jsdom = require('jsdom');
7
8
  var tiktoken = require('@dqbd/tiktoken');
8
- var core = require('@mastra/core');
9
9
  var pg = require('pg');
10
10
  var pinecone = require('@pinecone-database/pinecone');
11
11
  var astraDbTs = require('@datastax/astra-db-ts');
@@ -1602,9 +1602,9 @@ var MastraDocument = /*#__PURE__*/function () {
1602
1602
  function MastraDocument(_ref) {
1603
1603
  var docs = _ref.docs,
1604
1604
  type = _ref.type;
1605
- this.docs = void 0;
1605
+ this.chunks = void 0;
1606
1606
  this.type = void 0;
1607
- this.docs = docs.map(function (d) {
1607
+ this.chunks = docs.map(function (d) {
1608
1608
  return new llamaindex.Document({
1609
1609
  text: d.text,
1610
1610
  metadata: d.metadata
@@ -1638,11 +1638,11 @@ var MastraDocument = /*#__PURE__*/function () {
1638
1638
  });
1639
1639
  _context.next = 9;
1640
1640
  return pipeline.run({
1641
- documents: this.docs
1641
+ documents: this.chunks
1642
1642
  });
1643
1643
  case 9:
1644
1644
  nodes = _context.sent;
1645
- this.docs = this.docs.map(function (doc, i) {
1645
+ this.chunks = this.chunks.map(function (doc, i) {
1646
1646
  var _nodes$i;
1647
1647
  return new llamaindex.Document({
1648
1648
  text: doc.text,
@@ -1777,8 +1777,8 @@ var MastraDocument = /*#__PURE__*/function () {
1777
1777
  break;
1778
1778
  }
1779
1779
  _rt = RecursiveCharacterTransformer.fromLanguage(options.language, options);
1780
- _textSplit = _rt.transformDocuments(this.docs);
1781
- this.docs = _textSplit;
1780
+ _textSplit = _rt.transformDocuments(this.chunks);
1781
+ this.chunks = _textSplit;
1782
1782
  return _context3.abrupt("return");
1783
1783
  case 5:
1784
1784
  rt = new RecursiveCharacterTransformer({
@@ -1786,8 +1786,8 @@ var MastraDocument = /*#__PURE__*/function () {
1786
1786
  isSeparatorRegex: options == null ? void 0 : options.isSeparatorRegex,
1787
1787
  options: options
1788
1788
  });
1789
- textSplit = rt.transformDocuments(this.docs);
1790
- this.docs = textSplit;
1789
+ textSplit = rt.transformDocuments(this.chunks);
1790
+ this.chunks = textSplit;
1791
1791
  case 8:
1792
1792
  case "end":
1793
1793
  return _context3.stop();
@@ -1810,8 +1810,8 @@ var MastraDocument = /*#__PURE__*/function () {
1810
1810
  isSeparatorRegex: options == null ? void 0 : options.isSeparatorRegex,
1811
1811
  options: options
1812
1812
  });
1813
- textSplit = rt.transformDocuments(this.docs);
1814
- this.docs = textSplit;
1813
+ textSplit = rt.transformDocuments(this.chunks);
1814
+ this.chunks = textSplit;
1815
1815
  case 3:
1816
1816
  case "end":
1817
1817
  return _context4.stop();
@@ -1835,8 +1835,8 @@ var MastraDocument = /*#__PURE__*/function () {
1835
1835
  break;
1836
1836
  }
1837
1837
  rt = new HTMLHeaderTransformer(options.headers, options == null ? void 0 : options.returnEachLine);
1838
- textSplit = rt.transformDocuments(this.docs);
1839
- this.docs = textSplit;
1838
+ textSplit = rt.transformDocuments(this.chunks);
1839
+ this.chunks = textSplit;
1840
1840
  return _context5.abrupt("return");
1841
1841
  case 5:
1842
1842
  if (!(options != null && (_options$sections = options.sections) != null && _options$sections.length)) {
@@ -1844,8 +1844,8 @@ var MastraDocument = /*#__PURE__*/function () {
1844
1844
  break;
1845
1845
  }
1846
1846
  _rt2 = new HTMLSectionTransformer(options.sections);
1847
- _textSplit2 = _rt2.transformDocuments(this.docs);
1848
- this.docs = _textSplit2;
1847
+ _textSplit2 = _rt2.transformDocuments(this.chunks);
1848
+ this.chunks = _textSplit2;
1849
1849
  return _context5.abrupt("return");
1850
1850
  case 10:
1851
1851
  throw new Error('HTML chunking requires either headers or sections to be specified');
@@ -1877,11 +1877,11 @@ var MastraDocument = /*#__PURE__*/function () {
1877
1877
  minChunkSize: options == null ? void 0 : options.minChunkSize
1878
1878
  });
1879
1879
  textSplit = rt.transformDocuments({
1880
- documents: this.docs,
1880
+ documents: this.chunks,
1881
1881
  ensureAscii: options == null ? void 0 : options.ensureAscii,
1882
1882
  convertLists: options == null ? void 0 : options.convertLists
1883
1883
  });
1884
- this.docs = textSplit;
1884
+ this.chunks = textSplit;
1885
1885
  case 5:
1886
1886
  case "end":
1887
1887
  return _context6.stop();
@@ -1900,8 +1900,8 @@ var MastraDocument = /*#__PURE__*/function () {
1900
1900
  while (1) switch (_context7.prev = _context7.next) {
1901
1901
  case 0:
1902
1902
  rt = new LatexTransformer(options);
1903
- textSplit = rt.transformDocuments(this.docs);
1904
- this.docs = textSplit;
1903
+ textSplit = rt.transformDocuments(this.chunks);
1904
+ this.chunks = textSplit;
1905
1905
  case 3:
1906
1906
  case "end":
1907
1907
  return _context7.stop();
@@ -1924,8 +1924,8 @@ var MastraDocument = /*#__PURE__*/function () {
1924
1924
  encodingName: options == null ? void 0 : options.encodingName,
1925
1925
  modelName: options == null ? void 0 : options.modelName
1926
1926
  });
1927
- textSplit = rt.transformDocuments(this.docs);
1928
- this.docs = textSplit;
1927
+ textSplit = rt.transformDocuments(this.chunks);
1928
+ this.chunks = textSplit;
1929
1929
  rt.dispose();
1930
1930
  case 4:
1931
1931
  case "end":
@@ -1949,13 +1949,13 @@ var MastraDocument = /*#__PURE__*/function () {
1949
1949
  break;
1950
1950
  }
1951
1951
  _rt3 = new MarkdownHeaderTransformer(options.headers, options == null ? void 0 : options.returnEachLine, options == null ? void 0 : options.stripHeaders);
1952
- _textSplit3 = _rt3.transformDocuments(this.docs);
1953
- this.docs = _textSplit3;
1952
+ _textSplit3 = _rt3.transformDocuments(this.chunks);
1953
+ this.chunks = _textSplit3;
1954
1954
  return _context9.abrupt("return");
1955
1955
  case 5:
1956
1956
  rt = new MarkdownTransformer(options);
1957
- textSplit = rt.transformDocuments(this.docs);
1958
- this.docs = textSplit;
1957
+ textSplit = rt.transformDocuments(this.chunks);
1958
+ this.chunks = textSplit;
1959
1959
  case 8:
1960
1960
  case "end":
1961
1961
  return _context9.stop();
@@ -1985,7 +1985,7 @@ var MastraDocument = /*#__PURE__*/function () {
1985
1985
  _context10.next = 6;
1986
1986
  return this.extract(params.extract);
1987
1987
  case 6:
1988
- return _context10.abrupt("return", this);
1988
+ return _context10.abrupt("return", this.chunks);
1989
1989
  case 7:
1990
1990
  case "end":
1991
1991
  return _context10.stop();
@@ -1997,16 +1997,45 @@ var MastraDocument = /*#__PURE__*/function () {
1997
1997
  }
1998
1998
  return chunk;
1999
1999
  }();
2000
+ _proto.embed = /*#__PURE__*/function () {
2001
+ var _embed2 = /*#__PURE__*/_asyncToGenerator(/*#__PURE__*/_regeneratorRuntime().mark(function _callee11(chunk, options) {
2002
+ var value;
2003
+ return _regeneratorRuntime().wrap(function _callee11$(_context11) {
2004
+ while (1) switch (_context11.prev = _context11.next) {
2005
+ case 0:
2006
+ if (Array.isArray(chunk)) {
2007
+ value = chunk.map(function (chunk) {
2008
+ return typeof chunk === 'string' ? chunk : chunk.getText();
2009
+ });
2010
+ } else if (chunk instanceof llamaindex.Document) {
2011
+ value = chunk.getText();
2012
+ } else {
2013
+ value = chunk;
2014
+ }
2015
+ return _context11.abrupt("return", core.embed(_extends({}, options, {
2016
+ value: value
2017
+ })));
2018
+ case 2:
2019
+ case "end":
2020
+ return _context11.stop();
2021
+ }
2022
+ }, _callee11);
2023
+ }));
2024
+ function embed(_x12, _x13) {
2025
+ return _embed2.apply(this, arguments);
2026
+ }
2027
+ return embed;
2028
+ }();
2000
2029
  _proto.getDocs = function getDocs() {
2001
- return this.docs;
2030
+ return this.chunks;
2002
2031
  };
2003
2032
  _proto.getText = function getText() {
2004
- return this.docs.map(function (doc) {
2033
+ return this.chunks.map(function (doc) {
2005
2034
  return doc.text;
2006
2035
  });
2007
2036
  };
2008
2037
  _proto.getMetadata = function getMetadata() {
2009
- return this.docs.map(function (doc) {
2038
+ return this.chunks.map(function (doc) {
2010
2039
  return doc.metadata;
2011
2040
  });
2012
2041
  };