@meaningfully/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/Meaningfully.d.ts +3 -2
  2. package/dist/Meaningfully.d.ts.map +1 -1
  3. package/dist/Meaningfully.js +8 -4
  4. package/dist/Meaningfully.js.map +1 -1
  5. package/dist/api/embedding.d.ts +1 -1
  6. package/dist/api/embedding.d.ts.map +1 -1
  7. package/dist/api/embedding.js +3 -3
  8. package/dist/api/embedding.js.map +1 -1
  9. package/dist/services/embeddings.d.ts +0 -1
  10. package/dist/services/embeddings.d.ts.map +1 -1
  11. package/dist/services/embeddings.js +51 -72
  12. package/dist/services/embeddings.js.map +1 -1
  13. package/dist/types/index.d.ts +2 -0
  14. package/dist/types/index.d.ts.map +1 -1
  15. package/package.json +5 -2
  16. package/src/Meaningfully.ts +9 -5
  17. package/{tests → src/__tests__}/MetadataManager.test.ts +1 -1
  18. package/{tests → src/api/__tests__}/embedding.test.ts +20 -20
  19. package/src/api/embedding.ts +3 -3
  20. package/{tests → src/services/__tests__}/csvLoader.test.ts +1 -1
  21. package/{tests → src/services/__tests__}/embeddings.test.ts +2 -2
  22. package/{tests → src/services/__tests__}/sentenceSplitter.test.ts +1 -1
  23. package/src/services/embeddings.d.ts +0 -1
  24. package/src/services/embeddings.ts +57 -81
  25. package/src/types/index.ts +2 -0
  26. package/tsconfig.json +1 -1
  27. package/dist/api/embedding.test.d.ts +0 -2
  28. package/dist/api/embedding.test.d.ts.map +0 -1
  29. package/dist/api/embedding.test.js +0 -340
  30. package/dist/api/embedding.test.js.map +0 -1
  31. package/dist/services/csvLoader.test.d.ts +0 -2
  32. package/dist/services/csvLoader.test.d.ts.map +0 -1
  33. package/dist/services/csvLoader.test.js +0 -75
  34. package/dist/services/csvLoader.test.js.map +0 -1
  35. package/dist/services/embeddings.test.d.ts +0 -2
  36. package/dist/services/embeddings.test.d.ts.map +0 -1
  37. package/dist/services/embeddings.test.js +0 -115
  38. package/dist/services/embeddings.test.js.map +0 -1
  39. package/dist/services/sentenceSplitter.test.d.ts +0 -2
  40. package/dist/services/sentenceSplitter.test.d.ts.map +0 -1
  41. package/dist/services/sentenceSplitter.test.js +0 -68
  42. package/dist/services/sentenceSplitter.test.js.map +0 -1
  43. package/src/api/embedding.d.ts +0 -6
  44. package/tests/csvLoader.test.d.ts +0 -1
  45. package/tests/embedding.test.d.ts +0 -1
  46. package/tests/embeddings.test.d.ts +0 -1
  47. package/tests/sentenceSplitter.test.d.ts +0 -1
@@ -1,68 +0,0 @@
1
- //@ts-nocheck
2
- import { expect, test } from 'vitest';
3
- import { CustomSentenceSplitter } from './sentenceSplitter';
4
- import { SentenceSplitter, IngestionPipeline, Document } from "llamaindex";
5
- // do these tests just to make sure that we can factor out my hacky fixes when llamaindex is fixed.
6
- // test that original sentenceSplitter splits on abbreviations
7
- // test that original sentenceSplitter splits on abbreviations even when specified
8
- // test that my modified sentenceSplitter excludes metadata when arg is specified
9
- // test that my modified sentenceSplitter includes metadata when arg is specified the other way
10
- let documents = [
11
- new Document({ text: "JPMorgan Chase & Co. elected Mark Weinberger as a director, effective January 16, 2024, and the Board of Directors appointed him as a member of the Audit Committee. Mr. Weinberger was Global Chairman and Chief Executive Officer of Ernst & Young from 2013 to 2019. He was also elected a director of JPMorgan Chase Bank, N.A. and a manager of JPMorgan Chase Holdings LLC, and may be elected a director of such other subsidiary or subsidiaries as may be determined from time to time." }),
12
- ];
13
- let originalSentenceSplitterPipeline = new IngestionPipeline({
14
- transformations: [
15
- new SentenceSplitter({ chunkSize: 50, chunkOverlap: 10 }),
16
- ],
17
- });
18
- let customSentenceSplitterPipeline = new IngestionPipeline({
19
- transformations: [
20
- new CustomSentenceSplitter({ chunkSize: 50, chunkOverlap: 10 }),
21
- ],
22
- });
23
- test("my modified sentenceSplitter doesn't eliminate spaces", () => {
24
- customSentenceSplitterPipeline.run({ documents: documents }).then((nodes) => {
25
- expect(nodes.some((node) => node["text"].indexOf("Co.elected") > -1)).toEqual(false);
26
- expect(nodes.some((node) => node["text"].indexOf("Mr.Weinberger") > -1)).toEqual(false);
27
- expect(nodes.some((node) => node["text"].indexOf("A.and") > -1)).toEqual(false);
28
- });
29
- });
30
- // test("original sentenceSplitter does eliminate spaces", () => {
31
- // originalSentenceSplitterPipeline.run({documents: documents}).then((nodes) => {
32
- // expect(nodes.some((node) => node["text"].indexOf("Co.elected") > -1)).toEqual(true);
33
- // expect(nodes.some((node) => node["text"].indexOf("Mr.Weinberger") > -1)).toEqual(true);
34
- // expect(nodes.some((node) => node["text"].indexOf("A.and") > -1)).toEqual(true);
35
- // });
36
- // });
37
- let noAbbrevsCustomSentenceSplitterPipeline = new IngestionPipeline({
38
- transformations: [
39
- new CustomSentenceSplitter({ chunkSize: 50, chunkOverlap: 10, abbreviations: [] }),
40
- ],
41
- });
42
- test("my modified sentenceSplitter doesn't split on specified abbreviations", () => {
43
- customSentenceSplitterPipeline.run({ documents: documents }).then((nodes) => {
44
- expect(nodes.map((node) => !!node["text"].match(/Mr\.$/))).not.toContainEqual(true);
45
- });
46
- });
47
- // this is only a problem on branch fix/sentence-splitter-spaces
48
- // where the chunker is eliminated entirely in favor of just splitting by sentences with natural.
49
- test("original sentenceSplitter splits in silly places, like Mr", () => {
50
- noAbbrevsCustomSentenceSplitterPipeline.run({ documents: documents }).then((nodes) => {
51
- expect(nodes.map((node) => !!node["text"].match(/Mr\.$/))).toContainEqual(true);
52
- });
53
- });
54
- const testcases = [
55
- ["USA v. 4227 JENIFER STREET N.W. WASHINGTON, D.C., AND ELECTRONIC DEVICES THEREIN UNDER RULE 41", "USA v. 4227 JENIFER STREET N.W. WASHINGTON, D.C., AND ELECTRONIC DEVICES THEREIN UNDER RULE 41"],
56
- ["JPMorgan Chase & Co. elected Mark Weinberger as a director, effective January 16, 2024, and the Board of Directors appointed him as a member of the Audit Committee.", "JPMorgan Chase & Co. elected Mark Weinberger as a director, effective January 16, 2024, and the Board of Directors appointed him as a member of the Audit Committee."],
57
- ["Mr. Weinberger was Global Chairman and Chief Executive Officer of Ernst & Young from 2013 to 2019.", "Mr. Weinberger was Global Chairman and Chief Executive Officer of Ernst & Young from 2013 to 2019."],
58
- ["He was also elected a director of JPMorgan Chase Bank, N.A. and a manager of JPMorgan Chase Holdings LLC, and may be elected a director of such other subsidiary or subsidiaries as may be determined from time to time.", "He was also elected a director of JPMorgan Chase Bank, N.A. and a manager of JPMorgan Chase Holdings LLC, and may be elected a director of such other subsidiary or subsidiaries as may be determined from time to time."],
59
- ];
60
- testcases.forEach(([testcase_input, testcase_expected_output]) => {
61
- test(`my sentenceSplitter correctly handles short sentence ${testcase_input}`, () => {
62
- customSentenceSplitterPipeline.run({ documents: [new Document({ text: testcase_input })] }).then((nodes) => {
63
- expect(nodes.length).toEqual(1);
64
- expect(nodes[0]["text"]).toEqual(testcase_expected_output);
65
- });
66
- });
67
- });
68
- //# sourceMappingURL=sentenceSplitter.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"sentenceSplitter.test.js","sourceRoot":"","sources":["../../src/services/sentenceSplitter.test.ts"],"names":[],"mappings":"AAAA,aAAa;AACb,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AACrC,OAAO,EAAE,sBAAsB,EAAE,MAAM,oBAAoB,CAAA;AAC3D,OAAO,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3E,mGAAmG;AACnG,8DAA8D;AAC9D,kFAAkF;AAElF,iFAAiF;AACjF,+FAA+F;AAI/F,IAAI,SAAS,GAAG;IACZ,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,oeAAoe,EAAE,CAAC;CAC/f,CAAC;AAEF,IAAI,gCAAgC,GAAG,IAAI,iBAAiB,CAAC;IACzD,eAAe,EAAE;QACb,IAAI,gBAAgB,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;KACxD;CACJ,CAAC,CAAC;AACP,IAAI,8BAA8B,GAAG,IAAI,iBAAiB,CAAC;IACvD,eAAe,EAAE;QACf,IAAI,sBAAsB,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;KAChE;CACF,CAAC,CAAC;AAEL,IAAI,CAAC,uDAAuD,EAAE,GAAG,EAAE;IAC/D,8BAA8B,CAAC,GAAG,CAAC,EAAC,SAAS,EAAE,SAAS,EAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;QACtE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACrF,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACxF,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IACpF,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,kEAAkE;AAClE,qFAAqF;AACrF,+FAA+F;AAC/F,kGAAkG;AAClG,0FAA0F;AAC1F,UAAU;AACV,MAAM;AAEN,IAAI,uCAAuC,GAAG,IAAI,iBAAiB,CAAC;IAChE,eAAe,EAAE;QACf,IAAI,sBAAsB,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAC,CAAC;KAClF;CACF,CAAC,CAAC;AAGH,IAAI,CAAC,uEAAuE,EAAE,GAAG,EAAE;IACjF,8BAA8B,CAAC,GAAG,CAAC,EAAC,SAAS,EAAE,SAAS,EAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;QACtE,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;IACxF,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,gEAAgE;AAChE,iGAAiG;AACjG,IAAI,CAAC,2DAA2D,EAAE,GAAG,EAAE;IACnE,uCAAuC,CAAC,GAAG,CAAC,EAAC,SAAS,EAAE,SAAS,EAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;QAC/E,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;IACpF,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,MAAM,SAAS,GAAG;IACd,CAAC,gGAAgG,EAAE,gGAAgG,CAAC;IACpM,CAAC,sKAAsK,EAAE,sKAAsK,CAAC;IAChV,CAAC,oGAAoG,EAAE,oGAAoG,CAAC;IAC5M,CAAC,0NAA0N,EAAE,0NAA0N,CAAC;CAE3b,CAAC;AACF,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,EAAE,wBAAwB,CAAC,EAAE,EAAE;IAC7D,IAAI,CAAC,wDAAwD,cAAc,EAAE,EAAE,GAAG,EAAE;QAChF,8BAA8B,CAAC,GAAG,CAAC,EAAC,SAAS,EAAE,CAAC,IAAI,QAAQ,CAAC,EAAC,IAAI,EAAE,cAAc,EAAC,CAAC,CAAC,EAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE;YACnG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,wBAAwB,CAAC,CAAC;QAC/D,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAA;AACN,CAAC,CAAC,CAAC"}
@@ -1,6 +0,0 @@
1
- import type { EmbeddingConfig, EmbeddingResult, SearchResult, PreviewResult, Settings, MetadataFilter, Clients } from "../types";
2
- export declare function createEmbeddings(csvPath: string, textColumnName: string, config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<EmbeddingResult>;
3
- export declare function previewResults(csvPath: string, textColumnName: string, config: EmbeddingConfig): Promise<PreviewResult>;
4
- export declare function getDocStore(config: EmbeddingConfig): Promise<import("llamaindex").BaseDocumentStore>;
5
- export declare function getIndex(config: EmbeddingConfig, settings: Settings, clients: Clients): Promise<import("llamaindex").VectorStoreIndex>;
6
- export declare function search(index: any, query: string, numResults?: number, filters?: MetadataFilter[]): Promise<SearchResult[]>;
@@ -1 +0,0 @@
1
- export {};
@@ -1 +0,0 @@
1
- export {};
@@ -1 +0,0 @@
1
- export {};
@@ -1 +0,0 @@
1
- export {};