@bubblelab/bubble-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. package/LICENSE.txt +202 -0
  2. package/dist/bubble-bundle.d.ts +2021 -0
  3. package/dist/bubble-factory.d.ts +161 -0
  4. package/dist/bubble-factory.d.ts.map +1 -0
  5. package/dist/bubble-factory.js +426 -0
  6. package/dist/bubble-factory.js.map +1 -0
  7. package/dist/bubble-flow/bubble-flow-class.d.ts +19 -0
  8. package/dist/bubble-flow/bubble-flow-class.d.ts.map +1 -0
  9. package/dist/bubble-flow/bubble-flow-class.js +23 -0
  10. package/dist/bubble-flow/bubble-flow-class.js.map +1 -0
  11. package/dist/bubble-flow/sample/data-analyst-flow.d.ts +15 -0
  12. package/dist/bubble-flow/sample/data-analyst-flow.d.ts.map +1 -0
  13. package/dist/bubble-flow/sample/data-analyst-flow.js +63 -0
  14. package/dist/bubble-flow/sample/data-analyst-flow.js.map +1 -0
  15. package/dist/bubble-flow/sample/error-ts.d.ts +23 -0
  16. package/dist/bubble-flow/sample/error-ts.d.ts.map +1 -0
  17. package/dist/bubble-flow/sample/error-ts.js +31 -0
  18. package/dist/bubble-flow/sample/error-ts.js.map +1 -0
  19. package/dist/bubble-flow/sample/sanitytest.d.ts +10 -0
  20. package/dist/bubble-flow/sample/sanitytest.d.ts.map +1 -0
  21. package/dist/bubble-flow/sample/sanitytest.js +13 -0
  22. package/dist/bubble-flow/sample/sanitytest.js.map +1 -0
  23. package/dist/bubble-flow/sample/simple-webhook-2.d.ts +19 -0
  24. package/dist/bubble-flow/sample/simple-webhook-2.d.ts.map +1 -0
  25. package/dist/bubble-flow/sample/simple-webhook-2.js +23 -0
  26. package/dist/bubble-flow/sample/simple-webhook-2.js.map +1 -0
  27. package/dist/bubble-flow/sample/simple-webhook.d.ts +10 -0
  28. package/dist/bubble-flow/sample/simple-webhook.d.ts.map +1 -0
  29. package/dist/bubble-flow/sample/simple-webhook.js +18 -0
  30. package/dist/bubble-flow/sample/simple-webhook.js.map +1 -0
  31. package/dist/bubble-flow/sample/simplified-data-analysis.flow.d.ts +29 -0
  32. package/dist/bubble-flow/sample/simplified-data-analysis.flow.d.ts.map +1 -0
  33. package/dist/bubble-flow/sample/simplified-data-analysis.flow.js +150 -0
  34. package/dist/bubble-flow/sample/simplified-data-analysis.flow.js.map +1 -0
  35. package/dist/bubble-flow/sample/slack-v0.1.d.ts +10 -0
  36. package/dist/bubble-flow/sample/slack-v0.1.d.ts.map +1 -0
  37. package/dist/bubble-flow/sample/slack-v0.1.js +59 -0
  38. package/dist/bubble-flow/sample/slack-v0.1.js.map +1 -0
  39. package/dist/bubble-flow/sample/slackagenttest.d.ts +10 -0
  40. package/dist/bubble-flow/sample/slackagenttest.d.ts.map +1 -0
  41. package/dist/bubble-flow/sample/slackagenttest.js +59 -0
  42. package/dist/bubble-flow/sample/slackagenttest.js.map +1 -0
  43. package/dist/bubble-trigger/index.d.ts +2 -0
  44. package/dist/bubble-trigger/index.d.ts.map +1 -0
  45. package/dist/bubble-trigger/index.js +2 -0
  46. package/dist/bubble-trigger/index.js.map +1 -0
  47. package/dist/bubble-trigger/types.d.ts +87 -0
  48. package/dist/bubble-trigger/types.d.ts.map +1 -0
  49. package/dist/bubble-trigger/types.js +14 -0
  50. package/dist/bubble-trigger/types.js.map +1 -0
  51. package/dist/bubbles/service-bubble/ai-agent.d.ts +428 -0
  52. package/dist/bubbles/service-bubble/ai-agent.d.ts.map +1 -0
  53. package/dist/bubbles/service-bubble/ai-agent.js +881 -0
  54. package/dist/bubbles/service-bubble/ai-agent.js.map +1 -0
  55. package/dist/bubbles/service-bubble/gmail.d.ts +3073 -0
  56. package/dist/bubbles/service-bubble/gmail.d.ts.map +1 -0
  57. package/dist/bubbles/service-bubble/gmail.js +908 -0
  58. package/dist/bubbles/service-bubble/gmail.js.map +1 -0
  59. package/dist/bubbles/service-bubble/google-calendar.d.ts +3377 -0
  60. package/dist/bubbles/service-bubble/google-calendar.d.ts.map +1 -0
  61. package/dist/bubbles/service-bubble/google-calendar.js +527 -0
  62. package/dist/bubbles/service-bubble/google-calendar.js.map +1 -0
  63. package/dist/bubbles/service-bubble/google-drive.d.ts +1152 -0
  64. package/dist/bubbles/service-bubble/google-drive.d.ts.map +1 -0
  65. package/dist/bubbles/service-bubble/google-drive.js +943 -0
  66. package/dist/bubbles/service-bubble/google-drive.js.map +1 -0
  67. package/dist/bubbles/service-bubble/google-sheets.d.ts +1811 -0
  68. package/dist/bubbles/service-bubble/google-sheets.d.ts.map +1 -0
  69. package/dist/bubbles/service-bubble/google-sheets.js +904 -0
  70. package/dist/bubbles/service-bubble/google-sheets.js.map +1 -0
  71. package/dist/bubbles/service-bubble/hello-world.d.ts +74 -0
  72. package/dist/bubbles/service-bubble/hello-world.d.ts.map +1 -0
  73. package/dist/bubbles/service-bubble/hello-world.js +67 -0
  74. package/dist/bubbles/service-bubble/hello-world.js.map +1 -0
  75. package/dist/bubbles/service-bubble/http.d.ts +134 -0
  76. package/dist/bubbles/service-bubble/http.d.ts.map +1 -0
  77. package/dist/bubbles/service-bubble/http.js +184 -0
  78. package/dist/bubbles/service-bubble/http.js.map +1 -0
  79. package/dist/bubbles/service-bubble/postgresql.d.ts +180 -0
  80. package/dist/bubbles/service-bubble/postgresql.d.ts.map +1 -0
  81. package/dist/bubbles/service-bubble/postgresql.js +448 -0
  82. package/dist/bubbles/service-bubble/postgresql.js.map +1 -0
  83. package/dist/bubbles/service-bubble/resend.d.ts +301 -0
  84. package/dist/bubbles/service-bubble/resend.d.ts.map +1 -0
  85. package/dist/bubbles/service-bubble/resend.js +253 -0
  86. package/dist/bubbles/service-bubble/resend.js.map +1 -0
  87. package/dist/bubbles/service-bubble/slack.d.ts +5869 -0
  88. package/dist/bubbles/service-bubble/slack.d.ts.map +1 -0
  89. package/dist/bubbles/service-bubble/slack.js +1536 -0
  90. package/dist/bubbles/service-bubble/slack.js.map +1 -0
  91. package/dist/bubbles/service-bubble/storage.d.ts +571 -0
  92. package/dist/bubbles/service-bubble/storage.d.ts.map +1 -0
  93. package/dist/bubbles/service-bubble/storage.js +504 -0
  94. package/dist/bubbles/service-bubble/storage.js.map +1 -0
  95. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.d.ts +308 -0
  96. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.d.ts.map +1 -0
  97. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.js +285 -0
  98. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.js.map +1 -0
  99. package/dist/bubbles/tool-bubble/chart-js-tool.d.ts +416 -0
  100. package/dist/bubbles/tool-bubble/chart-js-tool.d.ts.map +1 -0
  101. package/dist/bubbles/tool-bubble/chart-js-tool.js +570 -0
  102. package/dist/bubbles/tool-bubble/chart-js-tool.js.map +1 -0
  103. package/dist/bubbles/tool-bubble/get-bubble-details-tool.d.ts +99 -0
  104. package/dist/bubbles/tool-bubble/get-bubble-details-tool.d.ts.map +1 -0
  105. package/dist/bubbles/tool-bubble/get-bubble-details-tool.js +645 -0
  106. package/dist/bubbles/tool-bubble/get-bubble-details-tool.js.map +1 -0
  107. package/dist/bubbles/tool-bubble/list-bubbles-tool.d.ts +112 -0
  108. package/dist/bubbles/tool-bubble/list-bubbles-tool.d.ts.map +1 -0
  109. package/dist/bubbles/tool-bubble/list-bubbles-tool.js +82 -0
  110. package/dist/bubbles/tool-bubble/list-bubbles-tool.js.map +1 -0
  111. package/dist/bubbles/tool-bubble/reddit-scrape-tool.d.ts +413 -0
  112. package/dist/bubbles/tool-bubble/reddit-scrape-tool.d.ts.map +1 -0
  113. package/dist/bubbles/tool-bubble/reddit-scrape-tool.js +327 -0
  114. package/dist/bubbles/tool-bubble/reddit-scrape-tool.js.map +1 -0
  115. package/dist/bubbles/tool-bubble/research-agent-tool.d.ts +122 -0
  116. package/dist/bubbles/tool-bubble/research-agent-tool.d.ts.map +1 -0
  117. package/dist/bubbles/tool-bubble/research-agent-tool.js +343 -0
  118. package/dist/bubbles/tool-bubble/research-agent-tool.js.map +1 -0
  119. package/dist/bubbles/tool-bubble/sql-query-tool.d.ts +131 -0
  120. package/dist/bubbles/tool-bubble/sql-query-tool.d.ts.map +1 -0
  121. package/dist/bubbles/tool-bubble/sql-query-tool.js +147 -0
  122. package/dist/bubbles/tool-bubble/sql-query-tool.js.map +1 -0
  123. package/dist/bubbles/tool-bubble/tool-template.d.ts +257 -0
  124. package/dist/bubbles/tool-bubble/tool-template.d.ts.map +1 -0
  125. package/dist/bubbles/tool-bubble/tool-template.js +238 -0
  126. package/dist/bubbles/tool-bubble/tool-template.js.map +1 -0
  127. package/dist/bubbles/tool-bubble/virtual-file-editor-example.d.ts +8 -0
  128. package/dist/bubbles/tool-bubble/virtual-file-editor-example.d.ts.map +1 -0
  129. package/dist/bubbles/tool-bubble/virtual-file-editor-example.js +65 -0
  130. package/dist/bubbles/tool-bubble/virtual-file-editor-example.js.map +1 -0
  131. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.d.ts +125 -0
  132. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.d.ts.map +1 -0
  133. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.js +169 -0
  134. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.js.map +1 -0
  135. package/dist/bubbles/tool-bubble/web-crawl-tool.d.ts +218 -0
  136. package/dist/bubbles/tool-bubble/web-crawl-tool.d.ts.map +1 -0
  137. package/dist/bubbles/tool-bubble/web-crawl-tool.js +255 -0
  138. package/dist/bubbles/tool-bubble/web-crawl-tool.js.map +1 -0
  139. package/dist/bubbles/tool-bubble/web-extract-tool.d.ts +134 -0
  140. package/dist/bubbles/tool-bubble/web-extract-tool.d.ts.map +1 -0
  141. package/dist/bubbles/tool-bubble/web-extract-tool.js +175 -0
  142. package/dist/bubbles/tool-bubble/web-extract-tool.js.map +1 -0
  143. package/dist/bubbles/tool-bubble/web-scrape-tool.d.ts +228 -0
  144. package/dist/bubbles/tool-bubble/web-scrape-tool.d.ts.map +1 -0
  145. package/dist/bubbles/tool-bubble/web-scrape-tool.js +214 -0
  146. package/dist/bubbles/tool-bubble/web-scrape-tool.js.map +1 -0
  147. package/dist/bubbles/tool-bubble/web-search-tool.d.ts +134 -0
  148. package/dist/bubbles/tool-bubble/web-search-tool.d.ts.map +1 -0
  149. package/dist/bubbles/tool-bubble/web-search-tool.js +155 -0
  150. package/dist/bubbles/tool-bubble/web-search-tool.js.map +1 -0
  151. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.d.ts +114 -0
  152. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.d.ts.map +1 -0
  153. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.js +777 -0
  154. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.js.map +1 -0
  155. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.d.ts +97 -0
  156. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.d.ts.map +1 -0
  157. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.js +327 -0
  158. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.js.map +1 -0
  159. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.d.ts +303 -0
  160. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.d.ts.map +1 -0
  161. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.js +297 -0
  162. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.js.map +1 -0
  163. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.d.ts +157 -0
  164. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.d.ts.map +1 -0
  165. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.js +310 -0
  166. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.js.map +1 -0
  167. package/dist/bubbles/workflow-bubble/generate-document.workflow.d.ts +543 -0
  168. package/dist/bubbles/workflow-bubble/generate-document.workflow.d.ts.map +1 -0
  169. package/dist/bubbles/workflow-bubble/generate-document.workflow.js +628 -0
  170. package/dist/bubbles/workflow-bubble/generate-document.workflow.js.map +1 -0
  171. package/dist/bubbles/workflow-bubble/parse-document.workflow.d.ts +679 -0
  172. package/dist/bubbles/workflow-bubble/parse-document.workflow.d.ts.map +1 -0
  173. package/dist/bubbles/workflow-bubble/parse-document.workflow.js +604 -0
  174. package/dist/bubbles/workflow-bubble/parse-document.workflow.js.map +1 -0
  175. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.d.ts +1011 -0
  176. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.d.ts.map +1 -0
  177. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.js +841 -0
  178. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.js.map +1 -0
  179. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.d.ts +883 -0
  180. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.d.ts.map +1 -0
  181. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.js +781 -0
  182. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.js.map +1 -0
  183. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.d.ts +300 -0
  184. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.d.ts.map +1 -0
  185. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.js +508 -0
  186. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.js.map +1 -0
  187. package/dist/bubbles/workflow-bubble/slack-formatter-agent.d.ts +731 -0
  188. package/dist/bubbles/workflow-bubble/slack-formatter-agent.d.ts.map +1 -0
  189. package/dist/bubbles/workflow-bubble/slack-formatter-agent.js +690 -0
  190. package/dist/bubbles/workflow-bubble/slack-formatter-agent.js.map +1 -0
  191. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.d.ts +401 -0
  192. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.d.ts.map +1 -0
  193. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.js +382 -0
  194. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.js.map +1 -0
  195. package/dist/bubbles/workflow-bubble/workflow-template.d.ts +144 -0
  196. package/dist/bubbles/workflow-bubble/workflow-template.d.ts.map +1 -0
  197. package/dist/bubbles/workflow-bubble/workflow-template.js +124 -0
  198. package/dist/bubbles/workflow-bubble/workflow-template.js.map +1 -0
  199. package/dist/index.d.ts +46 -0
  200. package/dist/index.d.ts.map +1 -0
  201. package/dist/index.js +53 -0
  202. package/dist/index.js.map +1 -0
  203. package/dist/logging/BubbleLogger.d.ts +146 -0
  204. package/dist/logging/BubbleLogger.d.ts.map +1 -0
  205. package/dist/logging/BubbleLogger.js +472 -0
  206. package/dist/logging/BubbleLogger.js.map +1 -0
  207. package/dist/logging/StreamingBubbleLogger.d.ts +85 -0
  208. package/dist/logging/StreamingBubbleLogger.d.ts.map +1 -0
  209. package/dist/logging/StreamingBubbleLogger.js +340 -0
  210. package/dist/logging/StreamingBubbleLogger.js.map +1 -0
  211. package/dist/types/ai-models.d.ts +4 -0
  212. package/dist/types/ai-models.d.ts.map +1 -0
  213. package/dist/types/ai-models.js +14 -0
  214. package/dist/types/ai-models.js.map +1 -0
  215. package/dist/types/available-tools.d.ts +4 -0
  216. package/dist/types/available-tools.d.ts.map +1 -0
  217. package/dist/types/available-tools.js +19 -0
  218. package/dist/types/available-tools.js.map +1 -0
  219. package/dist/types/base-bubble-class.d.ts +47 -0
  220. package/dist/types/base-bubble-class.d.ts.map +1 -0
  221. package/dist/types/base-bubble-class.js +212 -0
  222. package/dist/types/base-bubble-class.js.map +1 -0
  223. package/dist/types/bubble-errors.d.ts +44 -0
  224. package/dist/types/bubble-errors.d.ts.map +1 -0
  225. package/dist/types/bubble-errors.js +51 -0
  226. package/dist/types/bubble-errors.js.map +1 -0
  227. package/dist/types/bubble.d.ts +73 -0
  228. package/dist/types/bubble.d.ts.map +1 -0
  229. package/dist/types/bubble.js +2 -0
  230. package/dist/types/bubble.js.map +1 -0
  231. package/dist/types/credentials.d.ts +6 -0
  232. package/dist/types/credentials.d.ts.map +1 -0
  233. package/dist/types/credentials.js +6 -0
  234. package/dist/types/credentials.js.map +1 -0
  235. package/dist/types/service-bubble-class.d.ts +31 -0
  236. package/dist/types/service-bubble-class.d.ts.map +1 -0
  237. package/dist/types/service-bubble-class.js +36 -0
  238. package/dist/types/service-bubble-class.js.map +1 -0
  239. package/dist/types/streaming-events.d.ts +18 -0
  240. package/dist/types/streaming-events.d.ts.map +1 -0
  241. package/dist/types/streaming-events.js +5 -0
  242. package/dist/types/streaming-events.js.map +1 -0
  243. package/dist/types/tool-bubble-class.d.ts +19 -0
  244. package/dist/types/tool-bubble-class.d.ts.map +1 -0
  245. package/dist/types/tool-bubble-class.js +48 -0
  246. package/dist/types/tool-bubble-class.js.map +1 -0
  247. package/dist/types/workflow-bubble-class.d.ts +25 -0
  248. package/dist/types/workflow-bubble-class.d.ts.map +1 -0
  249. package/dist/types/workflow-bubble-class.js +30 -0
  250. package/dist/types/workflow-bubble-class.js.map +1 -0
  251. package/dist/utils/bubbleflow-parser.d.ts +32 -0
  252. package/dist/utils/bubbleflow-parser.d.ts.map +1 -0
  253. package/dist/utils/bubbleflow-parser.js +332 -0
  254. package/dist/utils/bubbleflow-parser.js.map +1 -0
  255. package/dist/utils/bubbleflow-validation.d.ts +9 -0
  256. package/dist/utils/bubbleflow-validation.d.ts.map +1 -0
  257. package/dist/utils/bubbleflow-validation.js +116 -0
  258. package/dist/utils/bubbleflow-validation.js.map +1 -0
  259. package/dist/utils/json-parsing.d.ts +20 -0
  260. package/dist/utils/json-parsing.d.ts.map +1 -0
  261. package/dist/utils/json-parsing.js +394 -0
  262. package/dist/utils/json-parsing.js.map +1 -0
  263. package/dist/utils/mock-data-generator.d.ts +43 -0
  264. package/dist/utils/mock-data-generator.d.ts.map +1 -0
  265. package/dist/utils/mock-data-generator.js +312 -0
  266. package/dist/utils/mock-data-generator.js.map +1 -0
  267. package/dist/utils/param-helper.d.ts +2 -0
  268. package/dist/utils/param-helper.d.ts.map +1 -0
  269. package/dist/utils/param-helper.js +5 -0
  270. package/dist/utils/param-helper.js.map +1 -0
  271. package/dist/utils/source-bubble-parser.d.ts +31 -0
  272. package/dist/utils/source-bubble-parser.d.ts.map +1 -0
  273. package/dist/utils/source-bubble-parser.js +231 -0
  274. package/dist/utils/source-bubble-parser.js.map +1 -0
  275. package/package.json +63 -0
@@ -0,0 +1,841 @@
1
+ /**
2
+ * PDF FORM OPERATIONS WORKFLOW
3
+ *
4
+ * A unified workflow for PDF form operations including field discovery,
5
+ * form filling, checkbox analysis, and form validation using pdf-lib.
6
+ *
7
+ * Provides multiple operations similar to Resend email service:
8
+ * - discover: Extract all form fields with metadata
9
+ * - fill: Fill form fields with provided values
10
+ * - analyze-checkboxes: Get checkbox states and possible values
11
+ * - validate: Verify form field values and completion
12
+ */
13
+ import { z } from 'zod';
14
+ import { WorkflowBubble } from '../../types/workflow-bubble-class.js';
15
+ import { CredentialType } from '@bubblelab/shared-schemas';
16
+ import { spawn } from 'child_process';
17
+ import path from 'path';
18
+ import { writeFileSync, unlinkSync, mkdtempSync } from 'fs';
19
+ import { tmpdir } from 'os';
20
+ import { AIAgentBubble } from '../service-bubble/ai-agent.js';
21
+ /**
22
+ * Operation-specific parameter schemas
23
+ */
24
+ const DiscoverOperationSchema = z.object({
25
+ operation: z.literal('discover'),
26
+ pdfData: z.string().describe('Base64 encoded PDF data'),
27
+ targetPage: z
28
+ .number()
29
+ .optional()
30
+ .describe('Extract fields from specific page only (default: all pages)'),
31
+ credentials: z.record(z.nativeEnum(CredentialType), z.string()).optional(),
32
+ });
33
+ const FillOperationSchema = z.object({
34
+ operation: z.literal('fill'),
35
+ pdfData: z.string().describe('Base64 encoded PDF data'),
36
+ fieldValues: z
37
+ .record(z.string(), z.string())
38
+ .describe('Field name to value mapping'),
39
+ credentials: z.record(z.nativeEnum(CredentialType), z.string()).optional(),
40
+ });
41
+ const AnalyzeCheckboxesOperationSchema = z.object({
42
+ operation: z.literal('analyze-checkboxes'),
43
+ pdfData: z.string().describe('Base64 encoded PDF data'),
44
+ credentials: z.record(z.nativeEnum(CredentialType), z.string()).optional(),
45
+ });
46
+ const ValidateOperationSchema = z.object({
47
+ operation: z.literal('validate'),
48
+ pdfData: z.string().describe('Base64 encoded PDF data'),
49
+ credentials: z.record(z.nativeEnum(CredentialType), z.string()).optional(),
50
+ });
51
+ const ConvertToImagesOperationSchema = z.object({
52
+ operation: z.literal('convert-to-images'),
53
+ pdfData: z.string().describe('Base64 encoded PDF data'),
54
+ format: z
55
+ .enum(['png', 'jpeg'])
56
+ .default('png')
57
+ .describe('Output image format'),
58
+ quality: z
59
+ .number()
60
+ .min(0.1)
61
+ .max(1.0)
62
+ .default(0.8)
63
+ .describe('JPEG quality (0.1-1.0, only for JPEG format)'),
64
+ dpi: z
65
+ .number()
66
+ .min(72)
67
+ .max(300)
68
+ .default(150)
69
+ .describe('Output DPI (dots per inch)'),
70
+ pages: z
71
+ .array(z.number())
72
+ .optional()
73
+ .describe('Specific page numbers to convert (1-indexed). If not provided, converts all pages'),
74
+ credentials: z.record(z.nativeEnum(CredentialType), z.string()).optional(),
75
+ });
76
+ const ConvertToMarkdownOperationSchema = z.object({
77
+ operation: z.literal('convert-to-markdown'),
78
+ pdfData: z.string().describe('Base64 encoded PDF data'),
79
+ pages: z
80
+ .array(z.number())
81
+ .optional()
82
+ .describe('Specific page numbers to convert (1-indexed). If not provided, converts all pages'),
83
+ includeFormFields: z
84
+ .boolean()
85
+ .default(true)
86
+ .describe('Whether to include form field information in the markdown'),
87
+ credentials: z.record(z.nativeEnum(CredentialType), z.string()).optional(),
88
+ });
89
+ /**
90
+ * Combined parameters schema using discriminated union
91
+ */
92
+ const PDFFormOperationsParamsSchema = z.discriminatedUnion('operation', [
93
+ DiscoverOperationSchema,
94
+ FillOperationSchema,
95
+ AnalyzeCheckboxesOperationSchema,
96
+ ValidateOperationSchema,
97
+ ConvertToImagesOperationSchema,
98
+ ConvertToMarkdownOperationSchema,
99
+ ]);
100
+ /**
101
+ * Operation-specific result schemas
102
+ */
103
+ const DiscoverResultSchema = z.object({
104
+ operation: z.literal('discover'),
105
+ fields: z.array(z.object({
106
+ id: z.number(),
107
+ page: z.number(),
108
+ name: z.string(),
109
+ type: z.string(),
110
+ field_type: z.string(),
111
+ current_value: z.string(),
112
+ choices: z.array(z.string()),
113
+ rect: z.tuple([z.number(), z.number(), z.number(), z.number()]),
114
+ x: z.number(),
115
+ y: z.number(),
116
+ width: z.number(),
117
+ height: z.number(),
118
+ field_flags: z.number(),
119
+ label: z.string(),
120
+ potential_labels: z.array(z.string()),
121
+ })),
122
+ totalFields: z.number(),
123
+ success: z.boolean(),
124
+ error: z.string(),
125
+ });
126
+ const FillResultSchema = z.object({
127
+ operation: z.literal('fill'),
128
+ filledPdfData: z.string().describe('Base64 encoded filled PDF data'),
129
+ filledFields: z.number(),
130
+ verification: z.record(z.string(), z.object({
131
+ value: z.string(),
132
+ type: z.string(),
133
+ page: z.number(),
134
+ })),
135
+ success: z.boolean(),
136
+ error: z.string(),
137
+ });
138
+ const AnalyzeCheckboxesResultSchema = z.object({
139
+ operation: z.literal('analyze-checkboxes'),
140
+ checkboxes: z.record(z.string(), z.object({
141
+ page: z.number(),
142
+ current_value: z.string(),
143
+ possible_values: z.array(z.string()),
144
+ field_flags: z.number(),
145
+ })),
146
+ totalCheckboxes: z.number(),
147
+ success: z.boolean(),
148
+ error: z.string(),
149
+ });
150
+ const ValidateResultSchema = z.object({
151
+ operation: z.literal('validate'),
152
+ fields: z.record(z.string(), z.object({
153
+ value: z.string(),
154
+ type: z.string(),
155
+ page: z.number(),
156
+ })),
157
+ totalFields: z.number(),
158
+ filledFields: z.number(),
159
+ emptyFields: z.number(),
160
+ success: z.boolean(),
161
+ error: z.string(),
162
+ });
163
+ const ConvertToImagesResultSchema = z.object({
164
+ operation: z.literal('convert-to-images'),
165
+ images: z.array(z.object({
166
+ pageNumber: z.number(),
167
+ imageData: z.string().describe('Base64 encoded image data'),
168
+ format: z.string(),
169
+ width: z.number(),
170
+ height: z.number(),
171
+ })),
172
+ totalPages: z.number(),
173
+ convertedPages: z.number(),
174
+ success: z.boolean(),
175
+ error: z.string(),
176
+ });
177
+ const ConvertToMarkdownResultSchema = z.object({
178
+ operation: z.literal('convert-to-markdown'),
179
+ markdown: z.string().describe('Markdown representation of the PDF content'),
180
+ pages: z.array(z.object({
181
+ pageNumber: z.number(),
182
+ markdown: z.string().describe('Markdown content for this page'),
183
+ formFields: z
184
+ .array(z.object({
185
+ id: z.number(),
186
+ name: z.string(),
187
+ type: z.string(),
188
+ value: z.string(),
189
+ x: z.number(),
190
+ y: z.number(),
191
+ }))
192
+ .optional(),
193
+ })),
194
+ totalPages: z.number(),
195
+ convertedPages: z.number(),
196
+ success: z.boolean(),
197
+ error: z.string(),
198
+ });
199
+ /**
200
+ * Combined result schema using discriminated union
201
+ */
202
+ const PDFFormOperationsResultSchema = z.discriminatedUnion('operation', [
203
+ DiscoverResultSchema,
204
+ FillResultSchema,
205
+ AnalyzeCheckboxesResultSchema,
206
+ ValidateResultSchema,
207
+ ConvertToImagesResultSchema,
208
+ ConvertToMarkdownResultSchema,
209
+ ]);
210
+ /**
211
+ * PDF Form Operations Workflow
212
+ * Provides unified interface for PDF form operations using pdf-lib
213
+ */
214
+ export class PDFFormOperationsWorkflow extends WorkflowBubble {
215
+ static type = 'workflow';
216
+ static bubbleName = 'pdf-form-operations';
217
+ static schema = PDFFormOperationsParamsSchema;
218
+ static resultSchema = PDFFormOperationsResultSchema;
219
+ static shortDescription = 'PDF form field operations (discover, fill, analyze, validate, convert-to-images, convert-to-markdown)';
220
+ static longDescription = `
221
+ Unified PDF form operations workflow providing comprehensive form field manipulation.
222
+
223
+ Operations:
224
+ - discover: Extract all form fields with coordinates and metadata
225
+ - fill: Fill form fields with provided values and return filled PDF
226
+ - analyze-checkboxes: Analyze checkbox fields and their possible values
227
+ - validate: Verify form field values and completion status
228
+ - convert-to-images: Convert PDF pages to PNG/JPEG images with customizable quality and DPI
229
+ - convert-to-markdown: Convert PDF to markdown format using AI analysis of visual content
230
+
231
+ Uses PyMuPDF (fitz) library via Python scripts for all PDF operations.
232
+
233
+ Input: Base64 encoded PDF data
234
+ Output: Operation-specific results with success/error handling
235
+ `;
236
+ static alias = 'pdf-forms';
237
+ constructor(params = {
238
+ operation: 'discover',
239
+ pdfData: '',
240
+ }, context) {
241
+ super(params, context);
242
+ }
243
+ /**
244
+ * Execute a Python script with the given arguments (stdin input)
245
+ */
246
+ async executePythonScript(scriptName, args = [], stdinData) {
247
+ const scriptsDir = path.join(__dirname, '..', '..', '..', 'scripts');
248
+ const scriptPath = path.join(scriptsDir, scriptName);
249
+ const pythonPath = process.env.PYTHON_PATH || 'python3';
250
+ return new Promise((resolve, reject) => {
251
+ const child = spawn(pythonPath, [scriptPath, ...args], {
252
+ stdio: ['pipe', 'pipe', 'pipe'],
253
+ cwd: scriptsDir,
254
+ });
255
+ let stdout = '';
256
+ let stderr = '';
257
+ const stdoutChunks = [];
258
+ child.stdout.on('data', (data) => {
259
+ stdout += data.toString();
260
+ stdoutChunks.push(data);
261
+ });
262
+ child.stderr.on('data', (data) => {
263
+ stderr += data.toString();
264
+ });
265
+ child.on('close', (code) => {
266
+ if (code === 0) {
267
+ const stdoutBuffer = Buffer.concat(stdoutChunks);
268
+ resolve({ stdout, stderr, stdoutBuffer });
269
+ }
270
+ else {
271
+ reject(new Error(`Script ${scriptName} exited with code ${code}: ${stderr}`));
272
+ }
273
+ });
274
+ child.on('error', (error) => {
275
+ reject(new Error(`Failed to spawn script ${scriptName}: ${error.message}`));
276
+ });
277
+ // Send stdin data if provided
278
+ if (stdinData) {
279
+ child.stdin.write(stdinData);
280
+ }
281
+ child.stdin.end();
282
+ });
283
+ }
284
+ /**
285
+ * Execute a Python script with file input (for optimized memory usage)
286
+ */
287
+ async executePythonFileScript(scriptName, args = [], inputFilePath) {
288
+ const scriptsDir = path.join(__dirname, '..', '..', '..', 'scripts');
289
+ const scriptPath = path.join(scriptsDir, scriptName);
290
+ const pythonPath = process.env.PYTHON_PATH || 'python3';
291
+ const { createReadStream } = await import('fs');
292
+ return new Promise((resolve, reject) => {
293
+ const child = spawn(pythonPath, [scriptPath, ...args], {
294
+ stdio: ['pipe', 'pipe', 'pipe'],
295
+ cwd: scriptsDir,
296
+ });
297
+ let stdout = '';
298
+ let stderr = '';
299
+ const stdoutChunks = [];
300
+ child.stdout.on('data', (data) => {
301
+ stdout += data.toString();
302
+ stdoutChunks.push(data);
303
+ });
304
+ child.stderr.on('data', (data) => {
305
+ stderr += data.toString();
306
+ });
307
+ child.on('close', (code) => {
308
+ if (code === 0) {
309
+ const stdoutBuffer = Buffer.concat(stdoutChunks);
310
+ resolve({ stdout, stderr, stdoutBuffer });
311
+ }
312
+ else {
313
+ reject(new Error(`Script ${scriptName} exited with code ${code}: ${stderr}`));
314
+ }
315
+ });
316
+ child.on('error', (error) => {
317
+ reject(new Error(`Failed to spawn script ${scriptName}: ${error.message}`));
318
+ });
319
+ // For file-based scripts, we read the file and pipe it to stdin
320
+ const fileStream = createReadStream(inputFilePath);
321
+ fileStream.on('error', (error) => {
322
+ reject(new Error(`Failed to read input file: ${error.message}`));
323
+ });
324
+ fileStream.pipe(child.stdin);
325
+ fileStream.on('end', () => {
326
+ child.stdin.end();
327
+ });
328
+ });
329
+ }
330
+ async performAction(context) {
331
+ void context;
332
+ console.log('[PDFFormOperations] Starting operation:', this.params.operation);
333
+ try {
334
+ // Decode PDF data
335
+ const pdfBuffer = Buffer.from(this.params.pdfData, 'base64');
336
+ // TypeScript can't narrow the generic T inside the switch, so we need to help it
337
+ const result = await (async () => {
338
+ switch (this.params.operation) {
339
+ case 'discover':
340
+ return await this.discoverFields(pdfBuffer, this.params.targetPage);
341
+ case 'fill':
342
+ return await this.fillFields(pdfBuffer, this.params.fieldValues);
343
+ case 'analyze-checkboxes':
344
+ return await this.analyzeCheckboxes(pdfBuffer);
345
+ case 'validate':
346
+ return await this.validateFields(pdfBuffer);
347
+ case 'convert-to-images':
348
+ return await this.convertToImages(pdfBuffer, this.params.pages);
349
+ case 'convert-to-markdown':
350
+ return await this.convertToMarkdown(pdfBuffer, this.params.pages, this.params.includeFormFields);
351
+ default:
352
+ // TypeScript should prevent this, but adding for safety
353
+ throw new Error(`Unknown operation: ${this.params.operation}`);
354
+ }
355
+ })();
356
+ // The result is guaranteed to match T['operation'] because of the discriminated union
357
+ return result;
358
+ }
359
+ catch (error) {
360
+ console.error('[PDFFormOperations] Error during operation:', error);
361
+ // Return error result with proper operation type
362
+ const baseError = {
363
+ success: false,
364
+ error: error instanceof Error
365
+ ? error.message
366
+ : 'Unknown error during PDF operation',
367
+ };
368
+ // Return type-safe error result based on operation
369
+ const errorResult = (() => {
370
+ switch (this.params.operation) {
371
+ case 'discover':
372
+ return {
373
+ ...baseError,
374
+ operation: 'discover',
375
+ fields: [],
376
+ totalFields: 0,
377
+ };
378
+ case 'fill':
379
+ return {
380
+ ...baseError,
381
+ operation: 'fill',
382
+ filledPdfData: '',
383
+ filledFields: 0,
384
+ verification: {},
385
+ };
386
+ case 'analyze-checkboxes':
387
+ return {
388
+ ...baseError,
389
+ operation: 'analyze-checkboxes',
390
+ checkboxes: {},
391
+ totalCheckboxes: 0,
392
+ };
393
+ case 'validate':
394
+ return {
395
+ ...baseError,
396
+ operation: 'validate',
397
+ fields: {},
398
+ totalFields: 0,
399
+ filledFields: 0,
400
+ emptyFields: 0,
401
+ };
402
+ case 'convert-to-images':
403
+ return {
404
+ ...baseError,
405
+ operation: 'convert-to-images',
406
+ images: [],
407
+ totalPages: 0,
408
+ convertedPages: 0,
409
+ };
410
+ case 'convert-to-markdown':
411
+ return {
412
+ ...baseError,
413
+ operation: 'convert-to-markdown',
414
+ markdown: '',
415
+ pages: [],
416
+ totalPages: 0,
417
+ convertedPages: 0,
418
+ };
419
+ default:
420
+ throw error; // Should never reach here due to TypeScript
421
+ }
422
+ })();
423
+ return errorResult;
424
+ }
425
+ }
426
+ /**
427
+ * Discover all form fields in the PDF
428
+ */
429
+ async discoverFields(pdfBuffer, targetPage) {
430
+ console.log('[PDFFormOperations] Discovering fields...');
431
+ try {
432
+ const args = targetPage !== undefined ? ['--page', targetPage.toString()] : [];
433
+ const result = await this.executePythonScript('discover_pdf_fields.py', args, pdfBuffer);
434
+ const fields = JSON.parse(result.stdout);
435
+ console.log('[PDFFormOperations] Discovered', fields.length, 'fields');
436
+ return {
437
+ operation: 'discover',
438
+ fields,
439
+ totalFields: fields.length,
440
+ success: true,
441
+ error: '',
442
+ };
443
+ }
444
+ catch (error) {
445
+ console.error('[PDFFormOperations] Error discovering fields:', error);
446
+ return {
447
+ operation: 'discover',
448
+ fields: [],
449
+ totalFields: 0,
450
+ success: false,
451
+ error: error instanceof Error
452
+ ? error.message
453
+ : 'Failed to discover PDF fields',
454
+ };
455
+ }
456
+ }
457
+ /**
458
+ * Fill form fields with provided values
459
+ */
460
+ async fillFields(pdfBuffer, fieldValues) {
461
+ console.log('[PDFFormOperations] Filling fields...');
462
+ console.log('[PDFFormOperations] Field values to set:', Object.keys(fieldValues));
463
+ try {
464
+ const args = [JSON.stringify(fieldValues)];
465
+ const result = await this.executePythonScript('fill_form_fields_fitz_v2.py', args, pdfBuffer);
466
+ // The Python script returns the filled PDF binary data via stdout.buffer
467
+ const filledPdfData = result.stdoutBuffer.toString('base64');
468
+ // Get verification data by calling the discover script on the filled PDF
469
+ const verifyResult = await this.executePythonScript('discover_pdf_fields.py', [], result.stdoutBuffer);
470
+ const fields = JSON.parse(verifyResult.stdout);
471
+ // Build verification object
472
+ const verification = {};
473
+ let filledCount = 0;
474
+ for (const field of fields) {
475
+ if (field.name in fieldValues) {
476
+ verification[field.name] = {
477
+ value: field.current_value,
478
+ type: field.field_type,
479
+ page: field.page,
480
+ };
481
+ if (field.current_value) {
482
+ filledCount++;
483
+ }
484
+ }
485
+ }
486
+ console.log('[PDFFormOperations] Successfully filled', filledCount, 'fields');
487
+ return {
488
+ operation: 'fill',
489
+ filledPdfData,
490
+ filledFields: filledCount,
491
+ verification,
492
+ success: true,
493
+ error: '',
494
+ };
495
+ }
496
+ catch (error) {
497
+ console.error('[PDFFormOperations] Error filling fields:', error);
498
+ return {
499
+ operation: 'fill',
500
+ filledPdfData: '',
501
+ filledFields: 0,
502
+ verification: {},
503
+ success: false,
504
+ error: error instanceof Error ? error.message : 'Failed to fill PDF fields',
505
+ };
506
+ }
507
+ }
508
+ /**
509
+ * Analyze checkbox fields and their possible values
510
+ */
511
+ async analyzeCheckboxes(pdfBuffer) {
512
+ console.log('[PDFFormOperations] Analyzing checkboxes...');
513
+ try {
514
+ const result = await this.executePythonScript('get_checkbox_values.py', [], pdfBuffer);
515
+ const checkboxes = JSON.parse(result.stdout);
516
+ console.log('[PDFFormOperations] Found', Object.keys(checkboxes).length, 'checkboxes');
517
+ return {
518
+ operation: 'analyze-checkboxes',
519
+ checkboxes,
520
+ totalCheckboxes: Object.keys(checkboxes).length,
521
+ success: true,
522
+ error: '',
523
+ };
524
+ }
525
+ catch (error) {
526
+ console.error('[PDFFormOperations] Error analyzing checkboxes:', error);
527
+ return {
528
+ operation: 'analyze-checkboxes',
529
+ checkboxes: {},
530
+ totalCheckboxes: 0,
531
+ success: false,
532
+ error: error instanceof Error
533
+ ? error.message
534
+ : 'Failed to analyze PDF checkboxes',
535
+ };
536
+ }
537
+ }
538
+ /**
539
+ * Validate form fields and their values
540
+ */
541
+ async validateFields(pdfBuffer) {
542
+ console.log('[PDFFormOperations] Validating fields...');
543
+ try {
544
+ const result = await this.executePythonScript('discover_pdf_fields.py', [], pdfBuffer);
545
+ const fields = JSON.parse(result.stdout);
546
+ const validationResults = {};
547
+ let filledFields = 0;
548
+ for (const field of fields) {
549
+ const hasValue = field.current_value && field.current_value.trim().length > 0;
550
+ // For checkboxes, consider 'Yes' as filled, 'Off' as empty
551
+ const isFilledCheckbox = field.field_type === 'CheckBox' && field.current_value !== 'Off';
552
+ if (hasValue || isFilledCheckbox) {
553
+ filledFields++;
554
+ }
555
+ validationResults[field.name] = {
556
+ value: field.current_value,
557
+ type: field.field_type,
558
+ page: field.page,
559
+ };
560
+ }
561
+ const totalFields = Object.keys(validationResults).length;
562
+ const emptyFields = totalFields - filledFields;
563
+ console.log('[PDFFormOperations] Validation complete:', {
564
+ total: totalFields,
565
+ filled: filledFields,
566
+ empty: emptyFields,
567
+ });
568
+ return {
569
+ operation: 'validate',
570
+ fields: validationResults,
571
+ totalFields,
572
+ filledFields,
573
+ emptyFields,
574
+ success: true,
575
+ error: '',
576
+ };
577
+ }
578
+ catch (error) {
579
+ console.error('[PDFFormOperations] Error validating fields:', error);
580
+ return {
581
+ operation: 'validate',
582
+ fields: {},
583
+ totalFields: 0,
584
+ filledFields: 0,
585
+ emptyFields: 0,
586
+ success: false,
587
+ error: error instanceof Error
588
+ ? error.message
589
+ : 'Failed to validate PDF fields',
590
+ };
591
+ }
592
+ }
593
+ /**
594
+ * Convert PDF pages to images
595
+ */
596
+ async convertToImages(pdfBuffer, pages) {
597
+ console.log('[PDFFormOperations] Converting PDF to images...');
598
+ console.log('[PDFFormOperations] Using optimized memory-efficient conversion');
599
+ let tempDir = null;
600
+ let tempPdfPath = null;
601
+ try {
602
+ // Create temporary directory and save PDF file
603
+ tempDir = mkdtempSync(path.join(tmpdir(), 'pdf-conversion-'));
604
+ tempPdfPath = path.join(tempDir, 'input.pdf');
605
+ writeFileSync(tempPdfPath, pdfBuffer);
606
+ console.log('[PDFFormOperations] Saved PDF to temp file:', tempPdfPath);
607
+ // Prepare arguments for the optimized script
608
+ const args = [];
609
+ // Add specific pages if provided (convert to JSON array format)
610
+ if (pages && pages.length > 0) {
611
+ args.push(JSON.stringify(pages));
612
+ }
613
+ // Use the memory-optimized script
614
+ const result = await this.executePythonFileScript('pdf_to_markdown_images.py', args, tempPdfPath);
615
+ console.log('[PDFFormOperations] Python stderr:', result.stderr);
616
+ console.log('[PDFFormOperations] Python stdout length:', result.stdout.length);
617
+ console.log('[PDFFormOperations] Python stdout preview:', result.stdout.substring(0, 200));
618
+ // Parse and immediately transform to avoid storing duplicate data
619
+ const imageResults = JSON.parse(result.stdout);
620
+ // Transform to final format and clear original data
621
+ const images = [];
622
+ for (const img of imageResults) {
623
+ images.push({
624
+ pageNumber: img.page,
625
+ imageData: img.data,
626
+ format: img.format,
627
+ width: img.width,
628
+ height: img.height,
629
+ });
630
+ // Clear the original image data immediately after processing
631
+ img.data = undefined;
632
+ }
633
+ // Clear the original array and result stdout to free memory
634
+ imageResults.length = 0;
635
+ result.stdout = undefined;
636
+ result.stdoutBuffer =
637
+ undefined;
638
+ const convertedPages = images.length;
639
+ const totalPages = pages ? pages.length : convertedPages;
640
+ console.log('[PDFFormOperations] Successfully converted', convertedPages, 'pages using memory-optimized JPEG format');
641
+ return {
642
+ operation: 'convert-to-images',
643
+ images,
644
+ totalPages,
645
+ convertedPages,
646
+ success: true,
647
+ error: '',
648
+ };
649
+ }
650
+ catch (error) {
651
+ console.error('[PDFFormOperations] Error converting PDF to images:', error);
652
+ return {
653
+ operation: 'convert-to-images',
654
+ images: [],
655
+ totalPages: 0,
656
+ convertedPages: 0,
657
+ success: false,
658
+ error: error instanceof Error
659
+ ? error.message
660
+ : 'Failed to convert PDF to images',
661
+ };
662
+ }
663
+ finally {
664
+ // Cleanup temporary files
665
+ try {
666
+ if (tempPdfPath) {
667
+ unlinkSync(tempPdfPath);
668
+ }
669
+ if (tempDir) {
670
+ // Remove the temp directory (should be empty now)
671
+ const { rmSync } = await import('fs');
672
+ rmSync(tempDir, { recursive: true, force: true });
673
+ }
674
+ }
675
+ catch (cleanupError) {
676
+ console.warn('[PDFFormOperations] Failed to cleanup temp files:', cleanupError);
677
+ }
678
+ }
679
+ }
680
+ /**
681
+ * Convert PDF to markdown using AI analysis of visual content
682
+ */
683
+ async convertToMarkdown(pdfBuffer, pages, includeFormFields = true) {
684
+ console.log('[PDFFormOperations] Converting PDF to markdown...');
685
+ let tempDir = null;
686
+ let tempPdfPath = null;
687
+ try {
688
+ // Create temporary directory and save PDF file for better memory management
689
+ tempDir = mkdtempSync(path.join(tmpdir(), 'pdf-markdown-'));
690
+ tempPdfPath = path.join(tempDir, 'input.pdf');
691
+ writeFileSync(tempPdfPath, pdfBuffer);
692
+ console.log('[PDFFormOperations] Saved PDF to temp file for markdown conversion:', tempPdfPath);
693
+ // First, convert PDF pages to images for AI analysis
694
+ const args = pages ? [JSON.stringify(pages)] : [];
695
+ const imageResult = await this.executePythonFileScript('pdf_to_markdown_images.py', args, tempPdfPath);
696
+ const images = JSON.parse(imageResult.stdout);
697
+ console.log('[PDFFormOperations] Generated', images.length, 'images for AI analysis');
698
+ // Get form fields if requested
699
+ let formFieldsData = [];
700
+ if (includeFormFields) {
701
+ try {
702
+ const fieldsResult = await this.executePythonScript('discover_pdf_fields.py', [], pdfBuffer);
703
+ formFieldsData = JSON.parse(fieldsResult.stdout);
704
+ }
705
+ catch (error) {
706
+ console.warn('[PDFFormOperations] Could not extract form fields:', error);
707
+ }
708
+ }
709
+ // Process each page with AI
710
+ const pageResults = [];
711
+ let combinedMarkdown = '';
712
+ for (const image of images) {
713
+ console.log(`[PDFFormOperations] Analyzing page ${image.page} with AI...`);
714
+ // Get form fields for this page
715
+ const pageFormFields = formFieldsData
716
+ .filter((field) => field.page === image.page)
717
+ .map((field) => ({
718
+ id: field.id,
719
+ name: field.name,
720
+ type: field.field_type,
721
+ value: field.current_value,
722
+ x: field.x,
723
+ y: field.y,
724
+ }));
725
+ // Prepare prompt for AI analysis
726
+ let prompt = `Convert this PDF form page to markdown format, reading from left to right, top to bottom in natural reading order.
727
+
728
+ The form fields are numbered in this same natural reading order (top-left to bottom-right). For fillable form fields, use these EXACT formats:
729
+ - Text fields: **[FIELD-ID]** (where ID is the numeric identifier, surrounded by double asterisks and brackets)
730
+ - Checkboxes: [ ] **[FIELD-ID]** or [x] **[FIELD-ID]** (if checked)
731
+
732
+ Use markdown tables to preserve layout structure. Include all text, labels, and instructions exactly as they appear.
733
+
734
+ Example: **[25]** for field ID 25, or [x] **[42]** for a checked checkbox with ID 42.`;
735
+ if (pageFormFields.length > 0) {
736
+ prompt += `\n\nForm fields (in natural reading order):\n`;
737
+ pageFormFields
738
+ .sort((a, b) => a.id - b.id) // Sort by ID to ensure natural order
739
+ .forEach((field) => {
740
+ prompt += `- ID ${field.id}: ${field.type}\n`;
741
+ });
742
+ prompt +=
743
+ '\nUse these numeric IDs in the markdown where you see the corresponding form fields.';
744
+ }
745
+ try {
746
+ const aiAgent = new AIAgentBubble({
747
+ message: prompt,
748
+ images: [
749
+ {
750
+ type: 'base64',
751
+ data: image.data,
752
+ mimeType: 'image/png',
753
+ description: `PDF page ${image.page} (${image.width}x${image.height}px)`,
754
+ },
755
+ ],
756
+ systemPrompt: 'You are an expert at analyzing PDF documents and converting them to clean, well-structured markdown format.',
757
+ model: {
758
+ model: 'google/gemini-2.5-pro',
759
+ temperature: 0.3,
760
+ jsonMode: false,
761
+ },
762
+ }, this.context);
763
+ // Set credentials from the workflow context if available
764
+ if (this.params.credentials) {
765
+ aiAgent.setCredentials(this.params.credentials);
766
+ }
767
+ const aiResult = await aiAgent.action();
768
+ if (!aiResult.success) {
769
+ throw new Error(`AI analysis failed: ${aiResult.error}`);
770
+ }
771
+ const pageMarkdown = aiResult.data?.response || '';
772
+ pageResults.push({
773
+ pageNumber: image.page,
774
+ markdown: pageMarkdown,
775
+ formFields: pageFormFields.length > 0 ? pageFormFields : undefined,
776
+ });
777
+ // Add to combined markdown
778
+ if (images.length > 1) {
779
+ combinedMarkdown += `\n\n---\n# Page ${image.page}\n\n`;
780
+ }
781
+ combinedMarkdown += pageMarkdown;
782
+ console.log(`[PDFFormOperations] Page ${image.page} analyzed successfully`);
783
+ }
784
+ catch (error) {
785
+ console.error(`[PDFFormOperations] Error analyzing page ${image.page}:`, error);
786
+ const fallbackMarkdown = `*(Error analyzing page ${image.page}: ${error instanceof Error ? error.message : 'Unknown error'})*`;
787
+ pageResults.push({
788
+ pageNumber: image.page,
789
+ markdown: fallbackMarkdown,
790
+ formFields: pageFormFields.length > 0 ? pageFormFields : undefined,
791
+ });
792
+ if (images.length > 1) {
793
+ combinedMarkdown += `\n\n---\n# Page ${image.page}\n\n`;
794
+ }
795
+ combinedMarkdown += fallbackMarkdown;
796
+ }
797
+ }
798
+ console.log('[PDFFormOperations] Successfully converted', pageResults.length, 'pages to markdown');
799
+ return {
800
+ operation: 'convert-to-markdown',
801
+ markdown: combinedMarkdown.trim(),
802
+ pages: pageResults,
803
+ totalPages: images.length,
804
+ convertedPages: pageResults.length,
805
+ success: true,
806
+ error: '',
807
+ };
808
+ }
809
+ catch (error) {
810
+ console.error('[PDFFormOperations] Error converting PDF to markdown:', error);
811
+ return {
812
+ operation: 'convert-to-markdown',
813
+ markdown: '',
814
+ pages: [],
815
+ totalPages: 0,
816
+ convertedPages: 0,
817
+ success: false,
818
+ error: error instanceof Error
819
+ ? error.message
820
+ : 'Failed to convert PDF to markdown',
821
+ };
822
+ }
823
+ finally {
824
+ // Cleanup temporary files
825
+ try {
826
+ if (tempPdfPath) {
827
+ unlinkSync(tempPdfPath);
828
+ }
829
+ if (tempDir) {
830
+ // Remove the temp directory (should be empty now)
831
+ const { rmSync } = await import('fs');
832
+ rmSync(tempDir, { recursive: true, force: true });
833
+ }
834
+ }
835
+ catch (cleanupError) {
836
+ console.warn('[PDFFormOperations] Failed to cleanup temp files:', cleanupError);
837
+ }
838
+ }
839
+ }
840
+ }
841
+ //# sourceMappingURL=pdf-form-operations.workflow.js.map