@bubblelab/bubble-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (275) hide show
  1. package/LICENSE.txt +202 -0
  2. package/dist/bubble-bundle.d.ts +2021 -0
  3. package/dist/bubble-factory.d.ts +161 -0
  4. package/dist/bubble-factory.d.ts.map +1 -0
  5. package/dist/bubble-factory.js +426 -0
  6. package/dist/bubble-factory.js.map +1 -0
  7. package/dist/bubble-flow/bubble-flow-class.d.ts +19 -0
  8. package/dist/bubble-flow/bubble-flow-class.d.ts.map +1 -0
  9. package/dist/bubble-flow/bubble-flow-class.js +23 -0
  10. package/dist/bubble-flow/bubble-flow-class.js.map +1 -0
  11. package/dist/bubble-flow/sample/data-analyst-flow.d.ts +15 -0
  12. package/dist/bubble-flow/sample/data-analyst-flow.d.ts.map +1 -0
  13. package/dist/bubble-flow/sample/data-analyst-flow.js +63 -0
  14. package/dist/bubble-flow/sample/data-analyst-flow.js.map +1 -0
  15. package/dist/bubble-flow/sample/error-ts.d.ts +23 -0
  16. package/dist/bubble-flow/sample/error-ts.d.ts.map +1 -0
  17. package/dist/bubble-flow/sample/error-ts.js +31 -0
  18. package/dist/bubble-flow/sample/error-ts.js.map +1 -0
  19. package/dist/bubble-flow/sample/sanitytest.d.ts +10 -0
  20. package/dist/bubble-flow/sample/sanitytest.d.ts.map +1 -0
  21. package/dist/bubble-flow/sample/sanitytest.js +13 -0
  22. package/dist/bubble-flow/sample/sanitytest.js.map +1 -0
  23. package/dist/bubble-flow/sample/simple-webhook-2.d.ts +19 -0
  24. package/dist/bubble-flow/sample/simple-webhook-2.d.ts.map +1 -0
  25. package/dist/bubble-flow/sample/simple-webhook-2.js +23 -0
  26. package/dist/bubble-flow/sample/simple-webhook-2.js.map +1 -0
  27. package/dist/bubble-flow/sample/simple-webhook.d.ts +10 -0
  28. package/dist/bubble-flow/sample/simple-webhook.d.ts.map +1 -0
  29. package/dist/bubble-flow/sample/simple-webhook.js +18 -0
  30. package/dist/bubble-flow/sample/simple-webhook.js.map +1 -0
  31. package/dist/bubble-flow/sample/simplified-data-analysis.flow.d.ts +29 -0
  32. package/dist/bubble-flow/sample/simplified-data-analysis.flow.d.ts.map +1 -0
  33. package/dist/bubble-flow/sample/simplified-data-analysis.flow.js +150 -0
  34. package/dist/bubble-flow/sample/simplified-data-analysis.flow.js.map +1 -0
  35. package/dist/bubble-flow/sample/slack-v0.1.d.ts +10 -0
  36. package/dist/bubble-flow/sample/slack-v0.1.d.ts.map +1 -0
  37. package/dist/bubble-flow/sample/slack-v0.1.js +59 -0
  38. package/dist/bubble-flow/sample/slack-v0.1.js.map +1 -0
  39. package/dist/bubble-flow/sample/slackagenttest.d.ts +10 -0
  40. package/dist/bubble-flow/sample/slackagenttest.d.ts.map +1 -0
  41. package/dist/bubble-flow/sample/slackagenttest.js +59 -0
  42. package/dist/bubble-flow/sample/slackagenttest.js.map +1 -0
  43. package/dist/bubble-trigger/index.d.ts +2 -0
  44. package/dist/bubble-trigger/index.d.ts.map +1 -0
  45. package/dist/bubble-trigger/index.js +2 -0
  46. package/dist/bubble-trigger/index.js.map +1 -0
  47. package/dist/bubble-trigger/types.d.ts +87 -0
  48. package/dist/bubble-trigger/types.d.ts.map +1 -0
  49. package/dist/bubble-trigger/types.js +14 -0
  50. package/dist/bubble-trigger/types.js.map +1 -0
  51. package/dist/bubbles/service-bubble/ai-agent.d.ts +428 -0
  52. package/dist/bubbles/service-bubble/ai-agent.d.ts.map +1 -0
  53. package/dist/bubbles/service-bubble/ai-agent.js +881 -0
  54. package/dist/bubbles/service-bubble/ai-agent.js.map +1 -0
  55. package/dist/bubbles/service-bubble/gmail.d.ts +3073 -0
  56. package/dist/bubbles/service-bubble/gmail.d.ts.map +1 -0
  57. package/dist/bubbles/service-bubble/gmail.js +908 -0
  58. package/dist/bubbles/service-bubble/gmail.js.map +1 -0
  59. package/dist/bubbles/service-bubble/google-calendar.d.ts +3377 -0
  60. package/dist/bubbles/service-bubble/google-calendar.d.ts.map +1 -0
  61. package/dist/bubbles/service-bubble/google-calendar.js +527 -0
  62. package/dist/bubbles/service-bubble/google-calendar.js.map +1 -0
  63. package/dist/bubbles/service-bubble/google-drive.d.ts +1152 -0
  64. package/dist/bubbles/service-bubble/google-drive.d.ts.map +1 -0
  65. package/dist/bubbles/service-bubble/google-drive.js +943 -0
  66. package/dist/bubbles/service-bubble/google-drive.js.map +1 -0
  67. package/dist/bubbles/service-bubble/google-sheets.d.ts +1811 -0
  68. package/dist/bubbles/service-bubble/google-sheets.d.ts.map +1 -0
  69. package/dist/bubbles/service-bubble/google-sheets.js +904 -0
  70. package/dist/bubbles/service-bubble/google-sheets.js.map +1 -0
  71. package/dist/bubbles/service-bubble/hello-world.d.ts +74 -0
  72. package/dist/bubbles/service-bubble/hello-world.d.ts.map +1 -0
  73. package/dist/bubbles/service-bubble/hello-world.js +67 -0
  74. package/dist/bubbles/service-bubble/hello-world.js.map +1 -0
  75. package/dist/bubbles/service-bubble/http.d.ts +134 -0
  76. package/dist/bubbles/service-bubble/http.d.ts.map +1 -0
  77. package/dist/bubbles/service-bubble/http.js +184 -0
  78. package/dist/bubbles/service-bubble/http.js.map +1 -0
  79. package/dist/bubbles/service-bubble/postgresql.d.ts +180 -0
  80. package/dist/bubbles/service-bubble/postgresql.d.ts.map +1 -0
  81. package/dist/bubbles/service-bubble/postgresql.js +448 -0
  82. package/dist/bubbles/service-bubble/postgresql.js.map +1 -0
  83. package/dist/bubbles/service-bubble/resend.d.ts +301 -0
  84. package/dist/bubbles/service-bubble/resend.d.ts.map +1 -0
  85. package/dist/bubbles/service-bubble/resend.js +253 -0
  86. package/dist/bubbles/service-bubble/resend.js.map +1 -0
  87. package/dist/bubbles/service-bubble/slack.d.ts +5869 -0
  88. package/dist/bubbles/service-bubble/slack.d.ts.map +1 -0
  89. package/dist/bubbles/service-bubble/slack.js +1536 -0
  90. package/dist/bubbles/service-bubble/slack.js.map +1 -0
  91. package/dist/bubbles/service-bubble/storage.d.ts +571 -0
  92. package/dist/bubbles/service-bubble/storage.d.ts.map +1 -0
  93. package/dist/bubbles/service-bubble/storage.js +504 -0
  94. package/dist/bubbles/service-bubble/storage.js.map +1 -0
  95. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.d.ts +308 -0
  96. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.d.ts.map +1 -0
  97. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.js +285 -0
  98. package/dist/bubbles/tool-bubble/bubbleflow-validation-tool.js.map +1 -0
  99. package/dist/bubbles/tool-bubble/chart-js-tool.d.ts +416 -0
  100. package/dist/bubbles/tool-bubble/chart-js-tool.d.ts.map +1 -0
  101. package/dist/bubbles/tool-bubble/chart-js-tool.js +570 -0
  102. package/dist/bubbles/tool-bubble/chart-js-tool.js.map +1 -0
  103. package/dist/bubbles/tool-bubble/get-bubble-details-tool.d.ts +99 -0
  104. package/dist/bubbles/tool-bubble/get-bubble-details-tool.d.ts.map +1 -0
  105. package/dist/bubbles/tool-bubble/get-bubble-details-tool.js +645 -0
  106. package/dist/bubbles/tool-bubble/get-bubble-details-tool.js.map +1 -0
  107. package/dist/bubbles/tool-bubble/list-bubbles-tool.d.ts +112 -0
  108. package/dist/bubbles/tool-bubble/list-bubbles-tool.d.ts.map +1 -0
  109. package/dist/bubbles/tool-bubble/list-bubbles-tool.js +82 -0
  110. package/dist/bubbles/tool-bubble/list-bubbles-tool.js.map +1 -0
  111. package/dist/bubbles/tool-bubble/reddit-scrape-tool.d.ts +413 -0
  112. package/dist/bubbles/tool-bubble/reddit-scrape-tool.d.ts.map +1 -0
  113. package/dist/bubbles/tool-bubble/reddit-scrape-tool.js +327 -0
  114. package/dist/bubbles/tool-bubble/reddit-scrape-tool.js.map +1 -0
  115. package/dist/bubbles/tool-bubble/research-agent-tool.d.ts +122 -0
  116. package/dist/bubbles/tool-bubble/research-agent-tool.d.ts.map +1 -0
  117. package/dist/bubbles/tool-bubble/research-agent-tool.js +343 -0
  118. package/dist/bubbles/tool-bubble/research-agent-tool.js.map +1 -0
  119. package/dist/bubbles/tool-bubble/sql-query-tool.d.ts +131 -0
  120. package/dist/bubbles/tool-bubble/sql-query-tool.d.ts.map +1 -0
  121. package/dist/bubbles/tool-bubble/sql-query-tool.js +147 -0
  122. package/dist/bubbles/tool-bubble/sql-query-tool.js.map +1 -0
  123. package/dist/bubbles/tool-bubble/tool-template.d.ts +257 -0
  124. package/dist/bubbles/tool-bubble/tool-template.d.ts.map +1 -0
  125. package/dist/bubbles/tool-bubble/tool-template.js +238 -0
  126. package/dist/bubbles/tool-bubble/tool-template.js.map +1 -0
  127. package/dist/bubbles/tool-bubble/virtual-file-editor-example.d.ts +8 -0
  128. package/dist/bubbles/tool-bubble/virtual-file-editor-example.d.ts.map +1 -0
  129. package/dist/bubbles/tool-bubble/virtual-file-editor-example.js +65 -0
  130. package/dist/bubbles/tool-bubble/virtual-file-editor-example.js.map +1 -0
  131. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.d.ts +125 -0
  132. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.d.ts.map +1 -0
  133. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.js +169 -0
  134. package/dist/bubbles/tool-bubble/virtual-file-editor.tool.js.map +1 -0
  135. package/dist/bubbles/tool-bubble/web-crawl-tool.d.ts +218 -0
  136. package/dist/bubbles/tool-bubble/web-crawl-tool.d.ts.map +1 -0
  137. package/dist/bubbles/tool-bubble/web-crawl-tool.js +255 -0
  138. package/dist/bubbles/tool-bubble/web-crawl-tool.js.map +1 -0
  139. package/dist/bubbles/tool-bubble/web-extract-tool.d.ts +134 -0
  140. package/dist/bubbles/tool-bubble/web-extract-tool.d.ts.map +1 -0
  141. package/dist/bubbles/tool-bubble/web-extract-tool.js +175 -0
  142. package/dist/bubbles/tool-bubble/web-extract-tool.js.map +1 -0
  143. package/dist/bubbles/tool-bubble/web-scrape-tool.d.ts +228 -0
  144. package/dist/bubbles/tool-bubble/web-scrape-tool.d.ts.map +1 -0
  145. package/dist/bubbles/tool-bubble/web-scrape-tool.js +214 -0
  146. package/dist/bubbles/tool-bubble/web-scrape-tool.js.map +1 -0
  147. package/dist/bubbles/tool-bubble/web-search-tool.d.ts +134 -0
  148. package/dist/bubbles/tool-bubble/web-search-tool.d.ts.map +1 -0
  149. package/dist/bubbles/tool-bubble/web-search-tool.js +155 -0
  150. package/dist/bubbles/tool-bubble/web-search-tool.js.map +1 -0
  151. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.d.ts +114 -0
  152. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.d.ts.map +1 -0
  153. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.js +777 -0
  154. package/dist/bubbles/workflow-bubble/bubbleflow-generator.workflow.js.map +1 -0
  155. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.d.ts +97 -0
  156. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.d.ts.map +1 -0
  157. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.js +327 -0
  158. package/dist/bubbles/workflow-bubble/bubblscript-generateor.workflow.js.map +1 -0
  159. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.d.ts +303 -0
  160. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.d.ts.map +1 -0
  161. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.js +297 -0
  162. package/dist/bubbles/workflow-bubble/database-analyzer.workflow.js.map +1 -0
  163. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.d.ts +157 -0
  164. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.d.ts.map +1 -0
  165. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.js +310 -0
  166. package/dist/bubbles/workflow-bubble/file-editor-agent.workflow.js.map +1 -0
  167. package/dist/bubbles/workflow-bubble/generate-document.workflow.d.ts +543 -0
  168. package/dist/bubbles/workflow-bubble/generate-document.workflow.d.ts.map +1 -0
  169. package/dist/bubbles/workflow-bubble/generate-document.workflow.js +628 -0
  170. package/dist/bubbles/workflow-bubble/generate-document.workflow.js.map +1 -0
  171. package/dist/bubbles/workflow-bubble/parse-document.workflow.d.ts +679 -0
  172. package/dist/bubbles/workflow-bubble/parse-document.workflow.d.ts.map +1 -0
  173. package/dist/bubbles/workflow-bubble/parse-document.workflow.js +604 -0
  174. package/dist/bubbles/workflow-bubble/parse-document.workflow.js.map +1 -0
  175. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.d.ts +1011 -0
  176. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.d.ts.map +1 -0
  177. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.js +841 -0
  178. package/dist/bubbles/workflow-bubble/pdf-form-operations.workflow.js.map +1 -0
  179. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.d.ts +883 -0
  180. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.d.ts.map +1 -0
  181. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.js +781 -0
  182. package/dist/bubbles/workflow-bubble/pdf-ocr.workflow.js.map +1 -0
  183. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.d.ts +300 -0
  184. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.d.ts.map +1 -0
  185. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.js +508 -0
  186. package/dist/bubbles/workflow-bubble/slack-data-assistant.workflow.js.map +1 -0
  187. package/dist/bubbles/workflow-bubble/slack-formatter-agent.d.ts +731 -0
  188. package/dist/bubbles/workflow-bubble/slack-formatter-agent.d.ts.map +1 -0
  189. package/dist/bubbles/workflow-bubble/slack-formatter-agent.js +690 -0
  190. package/dist/bubbles/workflow-bubble/slack-formatter-agent.js.map +1 -0
  191. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.d.ts +401 -0
  192. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.d.ts.map +1 -0
  193. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.js +382 -0
  194. package/dist/bubbles/workflow-bubble/slack-notifier.workflow.js.map +1 -0
  195. package/dist/bubbles/workflow-bubble/workflow-template.d.ts +144 -0
  196. package/dist/bubbles/workflow-bubble/workflow-template.d.ts.map +1 -0
  197. package/dist/bubbles/workflow-bubble/workflow-template.js +124 -0
  198. package/dist/bubbles/workflow-bubble/workflow-template.js.map +1 -0
  199. package/dist/index.d.ts +46 -0
  200. package/dist/index.d.ts.map +1 -0
  201. package/dist/index.js +53 -0
  202. package/dist/index.js.map +1 -0
  203. package/dist/logging/BubbleLogger.d.ts +146 -0
  204. package/dist/logging/BubbleLogger.d.ts.map +1 -0
  205. package/dist/logging/BubbleLogger.js +472 -0
  206. package/dist/logging/BubbleLogger.js.map +1 -0
  207. package/dist/logging/StreamingBubbleLogger.d.ts +85 -0
  208. package/dist/logging/StreamingBubbleLogger.d.ts.map +1 -0
  209. package/dist/logging/StreamingBubbleLogger.js +340 -0
  210. package/dist/logging/StreamingBubbleLogger.js.map +1 -0
  211. package/dist/types/ai-models.d.ts +4 -0
  212. package/dist/types/ai-models.d.ts.map +1 -0
  213. package/dist/types/ai-models.js +14 -0
  214. package/dist/types/ai-models.js.map +1 -0
  215. package/dist/types/available-tools.d.ts +4 -0
  216. package/dist/types/available-tools.d.ts.map +1 -0
  217. package/dist/types/available-tools.js +19 -0
  218. package/dist/types/available-tools.js.map +1 -0
  219. package/dist/types/base-bubble-class.d.ts +47 -0
  220. package/dist/types/base-bubble-class.d.ts.map +1 -0
  221. package/dist/types/base-bubble-class.js +212 -0
  222. package/dist/types/base-bubble-class.js.map +1 -0
  223. package/dist/types/bubble-errors.d.ts +44 -0
  224. package/dist/types/bubble-errors.d.ts.map +1 -0
  225. package/dist/types/bubble-errors.js +51 -0
  226. package/dist/types/bubble-errors.js.map +1 -0
  227. package/dist/types/bubble.d.ts +73 -0
  228. package/dist/types/bubble.d.ts.map +1 -0
  229. package/dist/types/bubble.js +2 -0
  230. package/dist/types/bubble.js.map +1 -0
  231. package/dist/types/credentials.d.ts +6 -0
  232. package/dist/types/credentials.d.ts.map +1 -0
  233. package/dist/types/credentials.js +6 -0
  234. package/dist/types/credentials.js.map +1 -0
  235. package/dist/types/service-bubble-class.d.ts +31 -0
  236. package/dist/types/service-bubble-class.d.ts.map +1 -0
  237. package/dist/types/service-bubble-class.js +36 -0
  238. package/dist/types/service-bubble-class.js.map +1 -0
  239. package/dist/types/streaming-events.d.ts +18 -0
  240. package/dist/types/streaming-events.d.ts.map +1 -0
  241. package/dist/types/streaming-events.js +5 -0
  242. package/dist/types/streaming-events.js.map +1 -0
  243. package/dist/types/tool-bubble-class.d.ts +19 -0
  244. package/dist/types/tool-bubble-class.d.ts.map +1 -0
  245. package/dist/types/tool-bubble-class.js +48 -0
  246. package/dist/types/tool-bubble-class.js.map +1 -0
  247. package/dist/types/workflow-bubble-class.d.ts +25 -0
  248. package/dist/types/workflow-bubble-class.d.ts.map +1 -0
  249. package/dist/types/workflow-bubble-class.js +30 -0
  250. package/dist/types/workflow-bubble-class.js.map +1 -0
  251. package/dist/utils/bubbleflow-parser.d.ts +32 -0
  252. package/dist/utils/bubbleflow-parser.d.ts.map +1 -0
  253. package/dist/utils/bubbleflow-parser.js +332 -0
  254. package/dist/utils/bubbleflow-parser.js.map +1 -0
  255. package/dist/utils/bubbleflow-validation.d.ts +9 -0
  256. package/dist/utils/bubbleflow-validation.d.ts.map +1 -0
  257. package/dist/utils/bubbleflow-validation.js +116 -0
  258. package/dist/utils/bubbleflow-validation.js.map +1 -0
  259. package/dist/utils/json-parsing.d.ts +20 -0
  260. package/dist/utils/json-parsing.d.ts.map +1 -0
  261. package/dist/utils/json-parsing.js +394 -0
  262. package/dist/utils/json-parsing.js.map +1 -0
  263. package/dist/utils/mock-data-generator.d.ts +43 -0
  264. package/dist/utils/mock-data-generator.d.ts.map +1 -0
  265. package/dist/utils/mock-data-generator.js +312 -0
  266. package/dist/utils/mock-data-generator.js.map +1 -0
  267. package/dist/utils/param-helper.d.ts +2 -0
  268. package/dist/utils/param-helper.d.ts.map +1 -0
  269. package/dist/utils/param-helper.js +5 -0
  270. package/dist/utils/param-helper.js.map +1 -0
  271. package/dist/utils/source-bubble-parser.d.ts +31 -0
  272. package/dist/utils/source-bubble-parser.d.ts.map +1 -0
  273. package/dist/utils/source-bubble-parser.js +231 -0
  274. package/dist/utils/source-bubble-parser.js.map +1 -0
  275. package/package.json +63 -0
@@ -0,0 +1,781 @@
1
+ /**
2
+ * PDF OCR WORKFLOW
3
+ *
4
+ * A comprehensive workflow that converts PDF documents to images and passes them
5
+ * to an AI agent along with discovered form fields to parse and extract schema information.
6
+ *
7
+ * This workflow combines:
8
+ * 1. PDF field discovery using pdf-lib
9
+ * 2. PDF to images conversion using pdf-img-convert
10
+ * 3. AI agent analysis for schema parsing and field extraction
11
+ *
12
+ * Returns structured JSON containing field IDs from discovery and extracted field names
13
+ * with their values from AI analysis.
14
+ */
15
+ import { z } from 'zod';
16
+ import { WorkflowBubble } from '../../types/workflow-bubble-class.js';
17
+ import { CredentialType } from '@bubblelab/shared-schemas';
18
+ import { PDFFormOperationsWorkflow } from './pdf-form-operations.workflow.js';
19
+ import { AIAgentBubble } from '../service-bubble/ai-agent.js';
20
+ import { AvailableModels } from '../../types/ai-models.js';
21
+ /**
22
+ * System prompts for different modes
23
+ */
24
+ const IDENTIFY_MODE_PROMPT = `You are an expert OCR and form field extraction specialist. Analyze the provided PDF images and form field discovery data to extract structured information.
25
+
26
+ IMPORTANT: The form field IDs are numbered in natural reading order (left to right, top to bottom). Use this ordering to help identify what each field represents.
27
+
28
+ Your task:
29
+ 1. Examine the PDF images to identify all visible text and form fields
30
+ 2. Cross-reference with the discovered form field metadata (field IDs follow natural reading order)
31
+ 3. Generate descriptive field names based on the PDF content, context, and field position
32
+ 4. Return a JSON array with field information
33
+
34
+ Return format: JSON array of objects with:
35
+ - id: number (MUST use the exact ID from discovery data when available - these IDs are in natural reading order)
36
+ - fieldName: string (descriptive name based on PDF content, context, and field position)
37
+ - confidence: number (0.0-1.0, your confidence in the field identification)
38
+
39
+ Focus on generating meaningful, descriptive field names that accurately represent what each field is for based on the PDF context and natural reading order.`;
40
+ const AUTOFILL_MODE_PROMPT = `You are an expert OCR and form field extraction specialist with autofill capabilities. Analyze the provided PDF images, form field discovery data, and client information to extract and fill structured information.
41
+
42
+ IMPORTANT: The form field IDs are numbered in natural reading order (left to right, top to bottom). Use this ordering to help identify what each field represents and to match client information appropriately.
43
+
44
+
45
+ Your task:
46
+ 1. Examine the PDF images to identify all visible text and form fields
47
+ 2. Cross-reference with the discovered form field metadata (field IDs follow natural reading order)
48
+ 3. Generate descriptive field names based on the PDF content, context, and field position
49
+ 4. Use the provided client information to determine appropriate values for each field
50
+ 5. Return a JSON array with ALL discovered fields (every single field ID must have an entry)
51
+
52
+ Return format: JSON array of objects with:
53
+ - id: number (MUST use the exact ID from discovery data when available - these IDs are in natural reading order)
54
+ - originalFieldName: string (MUST use the exact field name from discovery data when available, for precise field matching)
55
+ - fieldName: string (descriptive name based on PDF content, context, and natural reading order)
56
+ - value: string (appropriate value from client information, or empty string if not applicable)
57
+ - confidence: number (0.0-1.0, your confidence in the field identification and value assignment)
58
+
59
+ CRITICAL:
60
+ - You MUST return an entry for EVERY field ID from the discovery data - no field should be omitted
61
+ - For fields that match discovered form field metadata, you MUST include the originalFieldName exactly as provided in the discovery data. This is essential for proper form filling.
62
+
63
+ Rules for value assignment:
64
+ - Use the natural reading order (field ID sequence) to understand form structure and field relationships
65
+ - Only fill values that clearly match the client information provided
66
+ - Use empty string for fields where no appropriate value can be determined from client information
67
+ - Format values appropriately for the field type (dates, numbers, etc.)
68
+ - Be conservative - if unsure, use empty string rather than guessing
69
+ - EVERY discovered field ID must appear in your response, even if the value is empty
70
+
71
+ Focus on accuracy and appropriate value mapping based on the client information context and natural field ordering.`;
72
+ /**
73
+ * Parameters schema for PDF OCR workflow using discriminated union for different modes
74
+ */
75
+ const PDFOcrWorkflowParamsSchema = z.discriminatedUnion('mode', [
76
+ // Identify mode - just identifies fields and generates descriptive names
77
+ z.object({
78
+ mode: z
79
+ .literal('identify')
80
+ .describe('Identify form fields and generate descriptive names'),
81
+ pdfData: z
82
+ .string()
83
+ .min(1, 'PDF data is required')
84
+ .describe('Base64 encoded PDF data'),
85
+ discoveryOptions: z
86
+ .object({
87
+ targetPage: z
88
+ .number()
89
+ .positive()
90
+ .optional()
91
+ .describe('Extract fields from specific page only (default: all pages)'),
92
+ })
93
+ .default({})
94
+ .describe('Options for PDF field discovery'),
95
+ imageOptions: z
96
+ .object({
97
+ format: z
98
+ .enum(['png', 'jpeg'])
99
+ .default('png')
100
+ .describe('Output image format'),
101
+ quality: z
102
+ .number()
103
+ .min(0.1)
104
+ .max(1.0)
105
+ .default(0.8)
106
+ .describe('JPEG quality (0.1-1.0, only for JPEG format)'),
107
+ dpi: z
108
+ .number()
109
+ .min(72)
110
+ .max(300)
111
+ .default(150)
112
+ .describe('Output DPI (dots per inch)'),
113
+ pages: z
114
+ .array(z.number().positive())
115
+ .optional()
116
+ .describe('Specific page numbers to convert (1-indexed). If not provided, converts all pages'),
117
+ })
118
+ .default({
119
+ format: 'png',
120
+ quality: 0.8,
121
+ dpi: 150,
122
+ })
123
+ .describe('Options for PDF to images conversion'),
124
+ aiOptions: z
125
+ .object({
126
+ model: AvailableModels.default('google/gemini-2.5-flash').describe('AI model to use for field identification'),
127
+ temperature: z
128
+ .number()
129
+ .min(0)
130
+ .max(2)
131
+ .default(0.3)
132
+ .describe('Temperature for AI responses (lower = more consistent)'),
133
+ maxTokens: z
134
+ .number()
135
+ .positive()
136
+ .default(50000)
137
+ .describe('Maximum tokens for AI response'),
138
+ jsonMode: z
139
+ .boolean()
140
+ .default(true)
141
+ .describe('Enable JSON mode to ensure clean JSON output'),
142
+ })
143
+ .default({
144
+ model: 'google/gemini-2.5-flash',
145
+ temperature: 0.3,
146
+ maxTokens: 50000,
147
+ jsonMode: true,
148
+ })
149
+ .describe('AI agent configuration options'),
150
+ credentials: z
151
+ .record(z.nativeEnum(CredentialType), z.string())
152
+ .optional()
153
+ .describe('Credentials for AI model access (GOOGLE_GEMINI_CRED, OPENAI_CRED, etc.)'),
154
+ }),
155
+ // Autofill mode - identifies fields and fills them based on client information
156
+ z.object({
157
+ mode: z
158
+ .literal('autofill')
159
+ .describe('Identify form fields and autofill with client information'),
160
+ pdfData: z
161
+ .string()
162
+ .min(1, 'PDF data is required')
163
+ .describe('Base64 encoded PDF data'),
164
+ clientInformation: z
165
+ .string()
166
+ .min(1, 'Client information is required for autofill mode')
167
+ .describe('Free text containing client information to use for autofilling form fields'),
168
+ discoveryOptions: z
169
+ .object({
170
+ targetPage: z
171
+ .number()
172
+ .positive()
173
+ .optional()
174
+ .describe('Extract fields from specific page only (default: all pages)'),
175
+ })
176
+ .default({})
177
+ .describe('Options for PDF field discovery'),
178
+ imageOptions: z
179
+ .object({
180
+ format: z
181
+ .enum(['png', 'jpeg'])
182
+ .default('png')
183
+ .describe('Output image format'),
184
+ quality: z
185
+ .number()
186
+ .min(0.1)
187
+ .max(1.0)
188
+ .default(0.8)
189
+ .describe('JPEG quality (0.1-1.0, only for JPEG format)'),
190
+ dpi: z
191
+ .number()
192
+ .min(72)
193
+ .max(300)
194
+ .default(150)
195
+ .describe('Output DPI (dots per inch)'),
196
+ pages: z
197
+ .array(z.number().positive())
198
+ .optional()
199
+ .describe('Specific page numbers to convert (1-indexed). If not provided, converts all pages'),
200
+ })
201
+ .default({
202
+ format: 'png',
203
+ quality: 0.8,
204
+ dpi: 150,
205
+ })
206
+ .describe('Options for PDF to images conversion'),
207
+ aiOptions: z
208
+ .object({
209
+ model: AvailableModels.default('google/gemini-2.5-flash').describe('AI model to use for field identification and autofill'),
210
+ temperature: z
211
+ .number()
212
+ .min(0)
213
+ .max(2)
214
+ .default(0.3)
215
+ .describe('Temperature for AI responses (lower = more consistent)'),
216
+ maxTokens: z
217
+ .number()
218
+ .positive()
219
+ .default(50000)
220
+ .describe('Maximum tokens for AI response'),
221
+ jsonMode: z
222
+ .boolean()
223
+ .default(true)
224
+ .describe('Enable JSON mode to ensure clean JSON output'),
225
+ })
226
+ .default({
227
+ model: 'google/gemini-2.5-flash',
228
+ temperature: 0.3,
229
+ maxTokens: 50000,
230
+ jsonMode: true,
231
+ })
232
+ .describe('AI agent configuration options'),
233
+ credentials: z
234
+ .record(z.nativeEnum(CredentialType), z.string())
235
+ .optional()
236
+ .describe('Credentials for AI model access (GOOGLE_GEMINI_CRED, OPENAI_CRED, etc.)'),
237
+ }),
238
+ ]);
239
+ /**
240
+ * Result schema for PDF OCR workflow using discriminated union for different modes
241
+ */
242
+ const PDFOcrWorkflowResultSchema = z.discriminatedUnion('mode', [
243
+ // Identify mode result
244
+ z.object({
245
+ mode: z.literal('identify').describe('Result from identify mode'),
246
+ extractedFields: z
247
+ .array(z.object({
248
+ id: z.number().describe('Field ID from discovery or auto-generated'),
249
+ fieldName: z
250
+ .string()
251
+ .describe('Descriptive name generated based on PDF content'),
252
+ confidence: z
253
+ .number()
254
+ .min(0)
255
+ .max(1)
256
+ .describe('AI confidence in the field identification (0.0-1.0)'),
257
+ }))
258
+ .describe('Array of identified fields with descriptive names'),
259
+ discoveryData: z
260
+ .object({
261
+ totalFields: z.number(),
262
+ fieldsWithCoordinates: z.number(),
263
+ pages: z.array(z.number()),
264
+ })
265
+ .describe('Summary of field discovery results'),
266
+ imageData: z
267
+ .object({
268
+ totalPages: z.number(),
269
+ convertedPages: z.number(),
270
+ format: z.string(),
271
+ dpi: z.number(),
272
+ })
273
+ .describe('Summary of image conversion results'),
274
+ aiAnalysis: z
275
+ .object({
276
+ model: z.string(),
277
+ iterations: z.number(),
278
+ processingTime: z.number().optional(),
279
+ })
280
+ .describe('AI analysis metadata'),
281
+ success: z
282
+ .boolean()
283
+ .describe('Whether the workflow completed successfully'),
284
+ error: z.string().describe('Error message if workflow failed'),
285
+ }),
286
+ // Autofill mode result
287
+ z.object({
288
+ mode: z.literal('autofill').describe('Result from autofill mode'),
289
+ extractedFields: z
290
+ .array(z.object({
291
+ id: z.number().describe('Field ID from discovery or auto-generated'),
292
+ originalFieldName: z
293
+ .string()
294
+ .optional()
295
+ .describe('Original field name from discovery for precise matching'),
296
+ fieldName: z
297
+ .string()
298
+ .describe('Descriptive name generated based on PDF content'),
299
+ value: z
300
+ .string()
301
+ .describe('Value to fill in the field based on client information'),
302
+ confidence: z
303
+ .number()
304
+ .min(0)
305
+ .max(1)
306
+ .describe('AI confidence in the field identification and value assignment (0.0-1.0)'),
307
+ }))
308
+ .describe('Array of identified fields with values for autofill'),
309
+ filledPdfData: z.string().describe('Base64 encoded filled PDF data'),
310
+ discoveryData: z
311
+ .object({
312
+ totalFields: z.number(),
313
+ fieldsWithCoordinates: z.number(),
314
+ pages: z.array(z.number()),
315
+ })
316
+ .describe('Summary of field discovery results'),
317
+ imageData: z
318
+ .object({
319
+ totalPages: z.number(),
320
+ convertedPages: z.number(),
321
+ format: z.string(),
322
+ dpi: z.number(),
323
+ })
324
+ .describe('Summary of image conversion results'),
325
+ aiAnalysis: z
326
+ .object({
327
+ model: z.string(),
328
+ iterations: z.number(),
329
+ processingTime: z.number().optional(),
330
+ })
331
+ .describe('AI analysis metadata'),
332
+ fillResults: z
333
+ .object({
334
+ filledFields: z.number(),
335
+ successfullyFilled: z.number(),
336
+ })
337
+ .describe('Summary of PDF filling results'),
338
+ success: z
339
+ .boolean()
340
+ .describe('Whether the workflow completed successfully'),
341
+ error: z.string().describe('Error message if workflow failed'),
342
+ }),
343
+ ]);
344
+ /**
345
+ * PDF OCR Workflow
346
+ * Combines PDF field discovery, image conversion, and AI analysis for comprehensive form field extraction
347
+ */
348
+ export class PDFOcrWorkflow extends WorkflowBubble {
349
+ static type = 'workflow';
350
+ static bubbleName = 'pdf-ocr-workflow';
351
+ static schema = PDFOcrWorkflowParamsSchema;
352
+ static resultSchema = PDFOcrWorkflowResultSchema;
353
+ static shortDescription = 'PDF OCR workflow: identify fields or autofill forms using AI analysis';
354
+ static longDescription = `
355
+ Comprehensive PDF OCR workflow with two modes for form field processing:
356
+
357
+ **Identify Mode:**
358
+ - Discovers and names form fields from PDF documents
359
+ - Returns field IDs, descriptive names, and confidence scores
360
+ - Useful for form schema generation and document understanding
361
+
362
+ **Autofill Mode:**
363
+ - Identifies form fields AND fills them using provided client information
364
+ - Returns field data with values plus a filled PDF
365
+ - Uses AI to intelligently map client data to appropriate form fields
366
+
367
+ Process:
368
+ 1. Discover form fields using PyMuPDF (field names, types, coordinates)
369
+ 2. Convert PDF pages to high-quality images using PyMuPDF
370
+ 3. Send images + discovery data + client info (autofill mode) to AI agent
371
+ 4. For autofill mode: Use PDF Form Operations to fill the form with AI-determined values
372
+
373
+ Features:
374
+ - Two distinct modes: identify vs autofill
375
+ - Cross-references visual analysis with form field metadata
376
+ - Supports both fillable PDFs and scanned documents
377
+ - Generates meaningful field names based on PDF content and context
378
+ - Intelligent value mapping from client information (autofill mode)
379
+ - Configurable image quality and AI model selection
380
+ - Returns confidence scores for field identification accuracy
381
+
382
+ Use cases:
383
+ - **Identify**: Form schema generation, document structure analysis
384
+ - **Autofill**: Automated form filling, client onboarding, data entry automation
385
+
386
+ Input: Base64 encoded PDF data + mode + client information (autofill mode)
387
+ Output: Mode-specific results with field data and optional filled PDF
388
+ `;
389
+ static alias = 'pdf-ocr';
390
+ constructor(params, context) {
391
+ super(params, context);
392
+ }
393
+ async performAction() {
394
+ const startTime = Date.now();
395
+ console.log('[PDFOcrWorkflow] Starting comprehensive PDF OCR analysis');
396
+ console.log('[PDFOcrWorkflow] PDF data length:', this.params.pdfData.length);
397
+ try {
398
+ // Step 1: Discover form fields
399
+ console.log('[PDFOcrWorkflow] Step 1: Discovering form fields...');
400
+ const discoveryWorkflow = new PDFFormOperationsWorkflow({
401
+ operation: 'discover',
402
+ pdfData: this.params.pdfData,
403
+ targetPage: this.params.discoveryOptions?.targetPage,
404
+ credentials: this.params.credentials,
405
+ }, this.context);
406
+ const discoveryResult = await discoveryWorkflow.action();
407
+ if (!discoveryResult.success) {
408
+ throw new Error(`Field discovery failed: ${discoveryResult.error}`);
409
+ }
410
+ console.log(`[PDFOcrWorkflow] Discovered ${discoveryResult.data?.totalFields} fields`);
411
+ // Step 2: Convert PDF to images
412
+ console.log('[PDFOcrWorkflow] Step 2: Converting PDF to images...');
413
+ const imageWorkflow = new PDFFormOperationsWorkflow({
414
+ operation: 'convert-to-images',
415
+ pdfData: this.params.pdfData,
416
+ format: this.params.imageOptions?.format || 'png',
417
+ quality: this.params.imageOptions?.quality || 0.8,
418
+ dpi: this.params.imageOptions?.dpi || 150,
419
+ pages: this.params.imageOptions?.pages,
420
+ credentials: this.params.credentials,
421
+ }, this.context);
422
+ const imageResult = await imageWorkflow.action();
423
+ if (!imageResult.success) {
424
+ throw new Error(`Image conversion failed: ${imageResult.error}`);
425
+ }
426
+ console.log(`[PDFOcrWorkflow] Converted ${imageResult.data?.convertedPages} pages to images`);
427
+ // Step 3: Prepare data for AI analysis
428
+ console.log('[PDFOcrWorkflow] Step 3: Preparing data for AI analysis...');
429
+ // Prepare images for AI agent
430
+ const imageInputs = imageResult.data?.images?.map((image) => ({
431
+ type: 'base64',
432
+ data: image.imageData,
433
+ mimeType: image.format === 'png' ? 'image/png' : 'image/jpeg',
434
+ description: `Page ${image.pageNumber} - PDF form field extraction`,
435
+ })) || [];
436
+ // Prepare discovery data summary for AI prompt
437
+ const fieldsData = discoveryResult.data?.fields || [];
438
+ const discoveryContext = fieldsData.length > 0
439
+ ? `\nDiscovered form fields metadata:
440
+ ${fieldsData
441
+ .map((field) => `- ID: ${field.id}, Name: "${field.name}", Type: ${field.type}, Page: ${field.page}, Value: "${field.current_value}", Coordinates: (${field.x}, ${field.y}, ${field.width}x${field.height})`)
442
+ .join('\n')}`
443
+ : '\nNo structured form fields discovered. Perform pure OCR analysis of the images.';
444
+ // Choose system prompt based on mode
445
+ const basePrompt = this.params.mode === 'identify'
446
+ ? IDENTIFY_MODE_PROMPT
447
+ : AUTOFILL_MODE_PROMPT;
448
+ // Add client information context for autofill mode
449
+ const clientContext = this.params.mode === 'autofill'
450
+ ? `\n\nClient Information:\n${this.params.clientInformation}\n\nUse this information to fill appropriate field values.`
451
+ : '';
452
+ const enhancedPrompt = basePrompt + discoveryContext + clientContext;
453
+ // Step 4: AI analysis
454
+ console.log('[PDFOcrWorkflow] Step 4: Performing AI analysis...');
455
+ const aiAgent = new AIAgentBubble({
456
+ message: this.params.mode === 'identify'
457
+ ? `Please analyze these PDF pages and identify all form fields.
458
+
459
+ Please return a JSON array of field objects as specified in the system prompt. Focus on:
460
+ 1. Identifying all text fields, checkboxes, and form elements
461
+ 2. Generating descriptive field names based on labels, context, and purpose
462
+ 3. Cross-referencing with any discovered form field metadata provided
463
+ 4. Providing confidence scores for field identification
464
+
465
+ Return only the JSON array, no additional text or formatting.`
466
+ : `Please analyze these PDF pages and identify all form fields, then fill them using the provided client information.
467
+
468
+ Please return a JSON array of field objects as specified in the system prompt. Focus on:
469
+ 1. Identifying all text fields, checkboxes, and form elements
470
+ 2. Generating descriptive field names based on labels, context, and purpose
471
+ 3. Cross-referencing with any discovered form field metadata provided
472
+ 4. Using the client information to determine appropriate values for each field
473
+ 5. Providing confidence scores for field identification and value assignment
474
+
475
+ Return only the JSON array, no additional text or formatting.`,
476
+ images: imageInputs,
477
+ systemPrompt: enhancedPrompt,
478
+ model: {
479
+ model: this.params.aiOptions?.model || 'google/gemini-2.5-flash',
480
+ temperature: this.params.aiOptions?.temperature || 0.3,
481
+ maxTokens: this.params.aiOptions?.maxTokens || 50000,
482
+ jsonMode: this.params.aiOptions?.jsonMode ?? true,
483
+ },
484
+ credentials: this.params.credentials,
485
+ tools: [], // No tools needed for this analysis
486
+ maxIterations: 3,
487
+ }, this.context);
488
+ const aiResult = await aiAgent.action();
489
+ if (!aiResult.success) {
490
+ throw new Error(`AI analysis failed: ${aiResult.error}`);
491
+ }
492
+ console.log('[PDFOcrWorkflow] AI analysis completed');
493
+ // Step 5: Parse AI response and structure results
494
+ console.log('[PDFOcrWorkflow] Step 5: Processing AI results...');
495
+ let extractedFields = [];
496
+ try {
497
+ // Parse the AI response as JSON
498
+ const aiResponse = aiResult.data?.response || '[]';
499
+ const parsedFields = JSON.parse(aiResponse);
500
+ if (Array.isArray(parsedFields)) {
501
+ extractedFields = parsedFields.map((field, index) => {
502
+ const baseField = {
503
+ id: field.id || index + 1000, // Use provided ID or generate one
504
+ fieldName: field.fieldName || field.name || `field_${index + 1}`,
505
+ confidence: Math.min(Math.max(field.confidence || 0.8, 0), 1), // Clamp between 0-1
506
+ };
507
+ // Add value for autofill mode
508
+ if (this.params.mode === 'autofill') {
509
+ return {
510
+ ...baseField,
511
+ originalFieldName: field.originalFieldName,
512
+ value: field.value || '',
513
+ };
514
+ }
515
+ return baseField;
516
+ });
517
+ }
518
+ else {
519
+ console.warn('[PDFOcrWorkflow] AI response was not an array, attempting to extract fields from object');
520
+ // If AI returned an object instead of array, try to convert it
521
+ if (typeof parsedFields === 'object' && parsedFields !== null) {
522
+ extractedFields = Object.entries(parsedFields).map(([key], index) => {
523
+ const baseField = {
524
+ id: index + 1000,
525
+ fieldName: key,
526
+ confidence: 0.7, // Lower confidence for converted data
527
+ };
528
+ // Add value for autofill mode
529
+ if (this.params.mode === 'autofill') {
530
+ return {
531
+ ...baseField,
532
+ originalFieldName: undefined,
533
+ value: '',
534
+ };
535
+ }
536
+ return baseField;
537
+ });
538
+ }
539
+ }
540
+ }
541
+ catch {
542
+ console.warn('[PDFOcrWorkflow] Failed to parse AI response as JSON, attempting text extraction');
543
+ // Fallback: try to extract field-value pairs from text response
544
+ const response = aiResult.data?.response || '';
545
+ const lines = response
546
+ .split('\n')
547
+ .filter((line) => line.trim().length > 0);
548
+ extractedFields = lines.map((line, index) => {
549
+ const match = line.match(/^[•\-*]?\s*(.+?):\s*(.+)$/);
550
+ if (match) {
551
+ const baseField = {
552
+ id: index + 1000,
553
+ fieldName: match[1].trim(),
554
+ confidence: 0.6, // Lower confidence for regex-extracted data
555
+ };
556
+ // Add value for autofill mode
557
+ if (this.params.mode === 'autofill') {
558
+ return {
559
+ ...baseField,
560
+ originalFieldName: undefined,
561
+ value: match[2]?.trim() || '',
562
+ };
563
+ }
564
+ return baseField;
565
+ }
566
+ const baseField = {
567
+ id: index + 1000,
568
+ fieldName: `extracted_text_${index + 1}`,
569
+ confidence: 0.5,
570
+ };
571
+ // Add value for autofill mode
572
+ if (this.params.mode === 'autofill') {
573
+ return {
574
+ ...baseField,
575
+ originalFieldName: undefined,
576
+ value: '',
577
+ };
578
+ }
579
+ return baseField;
580
+ });
581
+ }
582
+ const processingTime = Date.now() - startTime;
583
+ console.log(`[PDFOcrWorkflow] Extracted ${extractedFields.length} fields`);
584
+ console.log(`[PDFOcrWorkflow] Total processing time: ${processingTime}ms`);
585
+ // Handle autofill mode - fill the PDF with extracted values
586
+ let filledPdfData = '';
587
+ let fillResults = { filledFields: 0, successfullyFilled: 0 };
588
+ if (this.params.mode === 'autofill') {
589
+ console.log('[PDFOcrWorkflow] Step 5: Filling PDF with extracted values...');
590
+ // Create field values map from autofill results
591
+ const fieldValues = {};
592
+ extractedFields.forEach((field) => {
593
+ if ('value' in field && field.value) {
594
+ let matchingDiscoveredField = null;
595
+ // First try: Use originalFieldName if available (most precise)
596
+ if (field.originalFieldName) {
597
+ matchingDiscoveredField = fieldsData.find((f) => f.name === field.originalFieldName);
598
+ if (matchingDiscoveredField) {
599
+ console.log(`[PDFOcrWorkflow] DEBUG: Direct match via originalFieldName: "${field.originalFieldName}" = "${field.value}"`);
600
+ }
601
+ }
602
+ // Second try: Match by exact field name
603
+ if (!matchingDiscoveredField) {
604
+ matchingDiscoveredField = fieldsData.find((f) => f.name === field.fieldName);
605
+ if (matchingDiscoveredField) {
606
+ console.log(`[PDFOcrWorkflow] DEBUG: Exact match via fieldName: "${field.fieldName}" -> "${matchingDiscoveredField.name}" = "${field.value}"`);
607
+ }
608
+ }
609
+ // Third try: Fuzzy match by normalized field name (remove spaces, lowercase)
610
+ if (!matchingDiscoveredField) {
611
+ const normalizedFieldName = field.fieldName
612
+ .toLowerCase()
613
+ .replace(/\s+/g, '');
614
+ matchingDiscoveredField = fieldsData.find((f) => f.name.toLowerCase().replace(/\s+/g, '') ===
615
+ normalizedFieldName);
616
+ if (matchingDiscoveredField) {
617
+ console.log(`[PDFOcrWorkflow] DEBUG: Fuzzy match: "${field.fieldName}" -> "${matchingDiscoveredField.name}" = "${field.value}"`);
618
+ }
619
+ }
620
+ if (matchingDiscoveredField) {
621
+ fieldValues[matchingDiscoveredField.name] = field.value;
622
+ }
623
+ else {
624
+ console.log(`[PDFOcrWorkflow] DEBUG: No match found for AI field: "${field.fieldName}" (originalFieldName: "${field.originalFieldName}", value: "${field.value}")`);
625
+ }
626
+ }
627
+ });
628
+ if (Object.keys(fieldValues).length > 0) {
629
+ console.log(`[PDFOcrWorkflow] Attempting to fill ${Object.keys(fieldValues).length} fields`);
630
+ // Use PDF Form Operations to fill the form
631
+ const fillWorkflow = new PDFFormOperationsWorkflow({
632
+ operation: 'fill',
633
+ pdfData: this.params.pdfData,
634
+ fieldValues,
635
+ credentials: this.params.credentials,
636
+ }, this.context);
637
+ const fillResult = await fillWorkflow.action();
638
+ if (fillResult.success && fillResult.data) {
639
+ filledPdfData = fillResult.data.filledPdfData;
640
+ fillResults = {
641
+ filledFields: Object.keys(fieldValues).length,
642
+ successfullyFilled: fillResult.data.filledFields,
643
+ };
644
+ console.log(`[PDFOcrWorkflow] Successfully filled ${fillResults.successfullyFilled} fields`);
645
+ }
646
+ else {
647
+ console.warn(`[PDFOcrWorkflow] PDF filling failed: ${fillResult.error}`);
648
+ // Fall back to original PDF
649
+ filledPdfData = this.params.pdfData;
650
+ }
651
+ }
652
+ else {
653
+ console.log('[PDFOcrWorkflow] No field values found for filling, returning original PDF');
654
+ filledPdfData = this.params.pdfData;
655
+ }
656
+ }
657
+ // TypeScript can't narrow the generic T inside the conditional, so we need to help it
658
+ const result = await (async () => {
659
+ // Return appropriate result based on mode
660
+ if (this.params.mode === 'identify') {
661
+ return {
662
+ mode: 'identify',
663
+ extractedFields: extractedFields,
664
+ discoveryData: {
665
+ totalFields: discoveryResult.data?.totalFields || 0,
666
+ fieldsWithCoordinates: fieldsData.filter((f) => f.x !== 0 || f.y !== 0).length,
667
+ pages: [...new Set(fieldsData.map((f) => f.page))],
668
+ },
669
+ imageData: {
670
+ totalPages: imageResult.data?.totalPages || 0,
671
+ convertedPages: imageResult.data?.convertedPages || 0,
672
+ format: this.params.imageOptions?.format || 'png',
673
+ dpi: this.params.imageOptions?.dpi || 150,
674
+ },
675
+ aiAnalysis: {
676
+ model: this.params.aiOptions?.model || 'google/gemini-2.5-flash',
677
+ iterations: aiResult.data?.iterations || 0,
678
+ processingTime,
679
+ },
680
+ success: true,
681
+ error: '',
682
+ };
683
+ }
684
+ else {
685
+ return {
686
+ mode: 'autofill',
687
+ extractedFields: extractedFields,
688
+ filledPdfData,
689
+ discoveryData: {
690
+ totalFields: discoveryResult.data?.totalFields || 0,
691
+ fieldsWithCoordinates: fieldsData.filter((f) => f.x !== 0 || f.y !== 0).length,
692
+ pages: [...new Set(fieldsData.map((f) => f.page))],
693
+ },
694
+ imageData: {
695
+ totalPages: imageResult.data?.totalPages || 0,
696
+ convertedPages: imageResult.data?.convertedPages || 0,
697
+ format: this.params.imageOptions?.format || 'png',
698
+ dpi: this.params.imageOptions?.dpi || 150,
699
+ },
700
+ aiAnalysis: {
701
+ model: this.params.aiOptions?.model || 'google/gemini-2.5-flash',
702
+ iterations: aiResult.data?.iterations || 0,
703
+ processingTime,
704
+ },
705
+ fillResults,
706
+ success: true,
707
+ error: '',
708
+ };
709
+ }
710
+ })();
711
+ // The result is guaranteed to match T['mode'] because of the discriminated union
712
+ return result;
713
+ }
714
+ catch (error) {
715
+ const processingTime = Date.now() - startTime;
716
+ console.error('[PDFOcrWorkflow] Workflow failed:', error);
717
+ // Return appropriate error result based on mode
718
+ const errorResult = (() => {
719
+ if (this.params.mode === 'identify') {
720
+ return {
721
+ mode: 'identify',
722
+ extractedFields: [],
723
+ discoveryData: {
724
+ totalFields: 0,
725
+ fieldsWithCoordinates: 0,
726
+ pages: [],
727
+ },
728
+ imageData: {
729
+ totalPages: 0,
730
+ convertedPages: 0,
731
+ format: this.params.imageOptions?.format || 'png',
732
+ dpi: this.params.imageOptions?.dpi || 150,
733
+ },
734
+ aiAnalysis: {
735
+ model: this.params.aiOptions?.model || 'google/gemini-2.5-flash',
736
+ iterations: 0,
737
+ processingTime,
738
+ },
739
+ success: false,
740
+ error: error instanceof Error
741
+ ? error.message
742
+ : 'Unknown error during PDF OCR workflow',
743
+ };
744
+ }
745
+ else {
746
+ return {
747
+ mode: 'autofill',
748
+ extractedFields: [],
749
+ filledPdfData: '',
750
+ discoveryData: {
751
+ totalFields: 0,
752
+ fieldsWithCoordinates: 0,
753
+ pages: [],
754
+ },
755
+ imageData: {
756
+ totalPages: 0,
757
+ convertedPages: 0,
758
+ format: this.params.imageOptions?.format || 'png',
759
+ dpi: this.params.imageOptions?.dpi || 150,
760
+ },
761
+ aiAnalysis: {
762
+ model: this.params.aiOptions?.model || 'google/gemini-2.5-flash',
763
+ iterations: 0,
764
+ processingTime,
765
+ },
766
+ fillResults: {
767
+ filledFields: 0,
768
+ successfullyFilled: 0,
769
+ },
770
+ success: false,
771
+ error: error instanceof Error
772
+ ? error.message
773
+ : 'Unknown error during PDF OCR workflow',
774
+ };
775
+ }
776
+ })();
777
+ return errorResult;
778
+ }
779
+ }
780
+ }
781
+ //# sourceMappingURL=pdf-ocr.workflow.js.map