@vertesia/workflow 0.50.1 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/lib/cjs/activities/createDocumentFromOther.js.map +1 -1
  2. package/lib/cjs/activities/executeInteraction.js +1 -2
  3. package/lib/cjs/activities/executeInteraction.js.map +1 -1
  4. package/lib/cjs/activities/extractDocumentText.js +1 -7
  5. package/lib/cjs/activities/extractDocumentText.js.map +1 -1
  6. package/lib/cjs/dsl/setup/ActivityContext.js +2 -2
  7. package/lib/cjs/dsl/setup/ActivityContext.js.map +1 -1
  8. package/lib/cjs/dsl.js +20 -0
  9. package/lib/cjs/dsl.js.map +1 -0
  10. package/lib/cjs/index.js +8 -2
  11. package/lib/cjs/index.js.map +1 -1
  12. package/lib/cjs/iterative-generation/activities/extractToc.js +2 -2
  13. package/lib/cjs/iterative-generation/activities/extractToc.js.map +1 -1
  14. package/lib/cjs/iterative-generation/activities/finalizeOutput.js +1 -1
  15. package/lib/cjs/iterative-generation/activities/finalizeOutput.js.map +1 -1
  16. package/lib/cjs/iterative-generation/activities/generatePart.js +1 -1
  17. package/lib/cjs/iterative-generation/activities/generatePart.js.map +1 -1
  18. package/lib/cjs/iterative-generation/activities/generateToc.js +1 -1
  19. package/lib/cjs/iterative-generation/activities/generateToc.js.map +1 -1
  20. package/lib/cjs/iterative-generation/utils.js +7 -4
  21. package/lib/cjs/iterative-generation/utils.js.map +1 -1
  22. package/lib/cjs/utils/blobs.js +2 -2
  23. package/lib/cjs/utils/blobs.js.map +1 -1
  24. package/lib/cjs/utils/client.js +2 -2
  25. package/lib/cjs/utils/client.js.map +1 -1
  26. package/lib/cjs/workflows.js +1 -3
  27. package/lib/cjs/workflows.js.map +1 -1
  28. package/lib/esm/activities/createDocumentFromOther.js.map +1 -1
  29. package/lib/esm/activities/executeInteraction.js +1 -2
  30. package/lib/esm/activities/executeInteraction.js.map +1 -1
  31. package/lib/esm/activities/extractDocumentText.js +1 -7
  32. package/lib/esm/activities/extractDocumentText.js.map +1 -1
  33. package/lib/esm/dsl/setup/ActivityContext.js +3 -3
  34. package/lib/esm/dsl/setup/ActivityContext.js.map +1 -1
  35. package/lib/esm/dsl.js +4 -0
  36. package/lib/esm/dsl.js.map +1 -0
  37. package/lib/esm/index.js +8 -2
  38. package/lib/esm/index.js.map +1 -1
  39. package/lib/esm/iterative-generation/activities/extractToc.js +3 -3
  40. package/lib/esm/iterative-generation/activities/extractToc.js.map +1 -1
  41. package/lib/esm/iterative-generation/activities/finalizeOutput.js +2 -2
  42. package/lib/esm/iterative-generation/activities/finalizeOutput.js.map +1 -1
  43. package/lib/esm/iterative-generation/activities/generatePart.js +2 -2
  44. package/lib/esm/iterative-generation/activities/generatePart.js.map +1 -1
  45. package/lib/esm/iterative-generation/activities/generateToc.js +2 -2
  46. package/lib/esm/iterative-generation/activities/generateToc.js.map +1 -1
  47. package/lib/esm/iterative-generation/utils.js +7 -4
  48. package/lib/esm/iterative-generation/utils.js.map +1 -1
  49. package/lib/esm/utils/blobs.js +2 -2
  50. package/lib/esm/utils/blobs.js.map +1 -1
  51. package/lib/esm/utils/client.js +1 -1
  52. package/lib/esm/utils/client.js.map +1 -1
  53. package/lib/esm/workflows.js +0 -1
  54. package/lib/esm/workflows.js.map +1 -1
  55. package/lib/types/activities/createDocumentFromOther.d.ts.map +1 -1
  56. package/lib/types/activities/executeInteraction.d.ts +5 -1
  57. package/lib/types/activities/executeInteraction.d.ts.map +1 -1
  58. package/lib/types/activities/extractDocumentText.d.ts.map +1 -1
  59. package/lib/types/dsl/dslProxyActivities.d.ts +2 -2
  60. package/lib/types/dsl/dslProxyActivities.d.ts.map +1 -1
  61. package/lib/types/dsl/setup/ActivityContext.d.ts.map +1 -1
  62. package/lib/types/dsl.d.ts +4 -0
  63. package/lib/types/dsl.d.ts.map +1 -0
  64. package/lib/types/index.d.ts +8 -2
  65. package/lib/types/index.d.ts.map +1 -1
  66. package/lib/types/iterative-generation/activities/extractToc.d.ts.map +1 -1
  67. package/lib/types/iterative-generation/activities/finalizeOutput.d.ts.map +1 -1
  68. package/lib/types/iterative-generation/activities/generatePart.d.ts.map +1 -1
  69. package/lib/types/iterative-generation/utils.d.ts +2 -2
  70. package/lib/types/iterative-generation/utils.d.ts.map +1 -1
  71. package/lib/types/utils/client.d.ts +2 -2
  72. package/lib/types/utils/client.d.ts.map +1 -1
  73. package/lib/types/workflows.d.ts +0 -1
  74. package/lib/types/workflows.d.ts.map +1 -1
  75. package/lib/workflows-bundle.js +1889 -160
  76. package/package.json +24 -8
  77. package/src/activities/createDocumentFromOther.ts +1 -1
  78. package/src/activities/executeInteraction.ts +7 -4
  79. package/src/activities/extractDocumentText.ts +2 -7
  80. package/src/dsl/dslProxyActivities.ts +2 -2
  81. package/src/dsl/setup/ActivityContext.ts +4 -4
  82. package/src/dsl.ts +3 -0
  83. package/src/index.ts +9 -2
  84. package/src/iterative-generation/activities/extractToc.ts +3 -3
  85. package/src/iterative-generation/activities/finalizeOutput.ts +3 -3
  86. package/src/iterative-generation/activities/generatePart.ts +3 -3
  87. package/src/iterative-generation/activities/generateToc.ts +2 -2
  88. package/src/iterative-generation/utils.ts +10 -6
  89. package/src/system/notifyWebhookWorkflow.ts +1 -1
  90. package/src/utils/blobs.ts +2 -2
  91. package/src/utils/client.ts +2 -2
  92. package/src/workflows.ts +0 -2
  93. package/lib/cjs/conversion/pdf.js +0 -13
  94. package/lib/cjs/conversion/pdf.js.map +0 -1
  95. package/lib/cjs/system/generateObjectText.js +0 -76
  96. package/lib/cjs/system/generateObjectText.js.map +0 -1
  97. package/lib/esm/conversion/pdf.js +0 -7
  98. package/lib/esm/conversion/pdf.js.map +0 -1
  99. package/lib/esm/system/generateObjectText.js +0 -73
  100. package/lib/esm/system/generateObjectText.js.map +0 -1
  101. package/lib/types/conversion/pdf.d.ts +0 -2
  102. package/lib/types/conversion/pdf.d.ts.map +0 -1
  103. package/lib/types/system/generateObjectText.d.ts +0 -4
  104. package/lib/types/system/generateObjectText.d.ts.map +0 -1
  105. package/src/conversion/pdf.test.ts +0 -35
  106. package/src/conversion/pdf.ts +0 -8
  107. package/src/system/generateObjectText.ts +0 -95
@@ -1,8 +0,0 @@
1
- import pdf2md from "@opendocsg/pdf2md";
2
-
3
- const pdf2mdFn = pdf2md as unknown as (buffer: Uint8Array) => Promise<string>;
4
-
5
- export function trasformPdfToMarkdown(buffer: Buffer) {
6
- const arr = new Uint8Array(buffer);
7
- return pdf2mdFn(arr);
8
- }
@@ -1,95 +0,0 @@
1
-
2
- import { log } from "@temporalio/workflow";
3
- import { WorkflowExecutionPayload } from "@vertesia/common";
4
- import * as activities from "../activities/index-dsl.js";
5
- import { dslProxyActivities } from "../dsl/dslProxyActivities.js";
6
- import { NoDocumentFound } from "../errors.js";
7
- import { TextExtractionResult } from "../index.js";
8
-
9
- const {
10
- getObjectFromStore,
11
- extractDocumentText
12
- } = dslProxyActivities<typeof activities>("generateTextWorkflow", {
13
- startToCloseTimeout: "5 minute",
14
- retry: {
15
- initialInterval: '5s',
16
- backoffCoefficient: 2,
17
- maximumAttempts: 5,
18
- maximumInterval: 100 * 30 * 1000, //ms
19
- nonRetryableErrorTypes: [],
20
- },
21
- });
22
-
23
- const {
24
- transcribeMedia,
25
- convertPdfToStructuredText
26
- } = dslProxyActivities<typeof activities>("generateTextWorkflow", {
27
- startToCloseTimeout: "30 minute",
28
- retry: {
29
- initialInterval: '30s',
30
- backoffCoefficient: 2,
31
- maximumAttempts: 5,
32
- maximumInterval: 100 * 30 * 1000, //ms
33
- nonRetryableErrorTypes: [],
34
- },
35
- });
36
-
37
-
38
- export async function generateObjectText(payload: WorkflowExecutionPayload): Promise<TextExtractionResult> {
39
-
40
- const { objectIds } = payload;
41
- const objectId = objectIds[0];
42
-
43
- const object = await getObjectFromStore(payload, {});
44
-
45
- if (!object.content?.source) {
46
- throw new NoDocumentFound(`No source or mimetype found for object ${objectId}`, objectIds);
47
- }
48
- const mimetype = object.content.type;
49
- if (!mimetype) {
50
- throw new NoDocumentFound(`No mimetype found for object ${objectId}`, objectIds);
51
- }
52
-
53
- const converter = ConverterActivity.find(({ type }) => type.test(mimetype));
54
- if (!converter) {
55
- throw new NoDocumentFound(`No converter found for mimetype ${mimetype}`, objectIds);
56
- }
57
- log.info(`Converting file type ${mimetype} to text with ${converter.name}`);
58
-
59
- const res = await converter.activity(payload)(payload, converter.params);
60
-
61
- log.info("Generated text for object", { res, objectId });
62
- return res;
63
-
64
- }
65
-
66
-
67
- const ConverterActivity = [
68
- {
69
- type: /application\/pdf/,
70
- activity: (payload: WorkflowExecutionPayload) => {
71
- const useTextractForPDF = payload.vars?.useTextractForPdf ?? false;
72
- return useTextractForPDF ? convertPdfToStructuredText : extractDocumentText;
73
- },
74
- name: "ConvertPdfToStructuredText",
75
- params: {},
76
- },
77
- {
78
- type: /audio\/.+/,
79
- activity: () => transcribeMedia,
80
- name: "TranscribeMedia",
81
- params: {},
82
- },
83
- {
84
- type: /video\/.+/,
85
- activity: () => transcribeMedia,
86
- name: "TranscribeMedia",
87
- params: {},
88
- },
89
- {
90
- type: /.+/,
91
- activity: () => extractDocumentText,
92
- name: "extractText",
93
- params: {},
94
- }
95
- ]