Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. flowfile/__init__.py +8 -1
  2. flowfile/api.py +1 -3
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
  8. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  9. flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  22. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
  30. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
  43. flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
  45. flowfile/web/static/assets/Output-283fe388.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
  47. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
  48. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  49. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  50. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
  52. flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
  54. flowfile/web/static/assets/Read-64a3f259.js +218 -0
  55. flowfile/web/static/assets/Read-e808b239.css +62 -0
  56. flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  65. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  66. flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
  67. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
  68. flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
  71. flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
  75. flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
  82. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  83. flowfile/web/static/assets/Union-bfe9b996.js +77 -0
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
  93. flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
  94. flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
  95. flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
  102. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  103. flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
  104. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  105. flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
  106. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  107. flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
  108. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  109. flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
  110. flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
  111. flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
  112. flowfile/web/static/assets/readExcel-806d2826.css +64 -0
  113. flowfile/web/static/assets/readParquet-48c81530.css +19 -0
  114. flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
  115. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
  116. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
  117. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  118. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  119. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
  120. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
  121. flowfile/web/static/index.html +2 -2
  122. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
  123. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
  124. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  125. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  126. flowfile_core/__init__.py +3 -0
  127. flowfile_core/configs/flow_logger.py +5 -13
  128. flowfile_core/configs/node_store/__init__.py +30 -0
  129. flowfile_core/configs/node_store/nodes.py +383 -99
  130. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  131. flowfile_core/configs/settings.py +2 -1
  132. flowfile_core/database/connection.py +5 -21
  133. flowfile_core/fileExplorer/funcs.py +239 -121
  134. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  135. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  136. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  137. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  138. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  139. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  140. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  141. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  142. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  143. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  144. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
  145. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  146. flowfile_core/flowfile/flow_graph.py +240 -54
  147. flowfile_core/flowfile/flow_node/flow_node.py +48 -13
  148. flowfile_core/flowfile/flow_node/models.py +2 -1
  149. flowfile_core/flowfile/handler.py +24 -5
  150. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  151. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  152. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  153. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  154. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  155. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  156. flowfile_core/flowfile/schema_callbacks.py +17 -10
  157. flowfile_core/flowfile/setting_generator/settings.py +15 -10
  158. flowfile_core/main.py +5 -1
  159. flowfile_core/routes/routes.py +73 -30
  160. flowfile_core/routes/user_defined_components.py +55 -0
  161. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  162. flowfile_core/schemas/input_schema.py +228 -65
  163. flowfile_core/schemas/output_model.py +5 -2
  164. flowfile_core/schemas/schemas.py +153 -35
  165. flowfile_core/schemas/transform_schema.py +1083 -412
  166. flowfile_core/schemas/yaml_types.py +103 -0
  167. flowfile_core/types.py +156 -0
  168. flowfile_core/utils/validate_setup.py +3 -1
  169. flowfile_frame/__init__.py +3 -1
  170. flowfile_frame/flow_frame.py +31 -24
  171. flowfile_frame/flow_frame_methods.py +12 -9
  172. flowfile_worker/__init__.py +9 -35
  173. flowfile_worker/create/__init__.py +3 -21
  174. flowfile_worker/create/funcs.py +68 -56
  175. flowfile_worker/create/models.py +130 -62
  176. flowfile_worker/main.py +5 -2
  177. flowfile_worker/routes.py +52 -13
  178. shared/__init__.py +15 -0
  179. shared/storage_config.py +258 -0
  180. tools/migrate/README.md +56 -0
  181. tools/migrate/__init__.py +12 -0
  182. tools/migrate/__main__.py +131 -0
  183. tools/migrate/legacy_schemas.py +621 -0
  184. tools/migrate/migrate.py +598 -0
  185. tools/migrate/tests/__init__.py +0 -0
  186. tools/migrate/tests/conftest.py +23 -0
  187. tools/migrate/tests/test_migrate.py +627 -0
  188. tools/migrate/tests/test_migration_e2e.py +1010 -0
  189. tools/migrate/tests/test_node_migrations.py +813 -0
  190. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  191. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  192. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  193. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  194. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  195. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  196. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  197. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  198. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  199. flowfile_core/flowfile/manage/open_flowfile.py +0 -135
  200. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
  201. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -23,15 +23,10 @@ from flowfile_core.flowfile.sources.external_sources.sql_source.models import (D
23
23
  DatabaseExternalWriteSettings)
24
24
  from flowfile_core.schemas.cloud_storage_schemas import CloudStorageWriteSettingsWorkerInterface
25
25
  from flowfile_core.schemas.input_schema import (
26
- ReceivedCsvTable,
27
- ReceivedExcelTable,
28
- ReceivedJsonTable,
29
- ReceivedParquetTable
26
+ ReceivedTable
30
27
  )
31
28
  from flowfile_core.utils.arrow_reader import read
32
29
 
33
- ReceivedTableCollection = ReceivedCsvTable | ReceivedParquetTable | ReceivedJsonTable | ReceivedExcelTable
34
-
35
30
 
36
31
  def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, file_ref: str, operation_type: OperationType = 'store') -> Status:
37
32
  encoded_operation = encodebytes(lf.serialize()).decode()
@@ -39,7 +34,7 @@ def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, fil
39
34
  'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
40
35
  v = requests.post(url=f'{WORKER_URL}/submit_query/', json=_json)
41
36
  if not v.ok:
42
- raise Exception(f'Could not cache the data, {v.text}')
37
+ raise Exception(f'trigger_df_operation: Could not cache the data, {v.text}')
43
38
  return Status(**v.json())
44
39
 
45
40
 
@@ -49,7 +44,7 @@ def trigger_sample_operation(lf: pl.LazyFrame, file_ref: str, flow_id: int, node
49
44
  'sample_size': sample_size, 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
50
45
  v = requests.post(url=f'{WORKER_URL}/store_sample/', json=_json)
51
46
  if not v.ok:
52
- raise Exception(f'Could not cache the data, {v.text}')
47
+ raise Exception(f'trigger_sample_operation: Could not cache the data, {v.text}')
53
48
  return Status(**v.json())
54
49
 
55
50
 
@@ -67,18 +62,19 @@ def trigger_fuzzy_match_operation(left_df: pl.LazyFrame, right_df: pl.LazyFrame,
67
62
  flowfile_flow_id=flow_id,
68
63
  flowfile_node_id=node_id
69
64
  )
65
+ print("fuzzy join input", fuzzy_join_input)
70
66
  v = requests.post(f'{WORKER_URL}/add_fuzzy_join', data=fuzzy_join_input.model_dump_json())
71
67
  if not v.ok:
72
- raise Exception(f'Could not cache the data, {v.text}')
68
+ raise Exception(f'trigger_fuzzy_match_operation: Could not cache the data, {v.text}')
73
69
  return Status(**v.json())
74
70
 
75
71
 
76
- def trigger_create_operation(flow_id: int, node_id: int | str, received_table: ReceivedTableCollection,
72
+ def trigger_create_operation(flow_id: int, node_id: int | str, received_table: ReceivedTable,
77
73
  file_type: str = Literal['csv', 'parquet', 'json', 'excel']):
78
74
  f = requests.post(url=f'{WORKER_URL}/create_table/{file_type}', data=received_table.model_dump_json(),
79
75
  params={'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id})
80
76
  if not f.ok:
81
- raise Exception(f'Could not cache the data, {f.text}')
77
+ raise Exception(f'trigger_create_operation: Could not cache the data, {f.text}')
82
78
  return Status(**f.json())
83
79
 
84
80
 
@@ -86,7 +82,7 @@ def trigger_database_read_collector(database_external_read_settings: DatabaseExt
86
82
  f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
87
83
  data=database_external_read_settings.model_dump_json())
88
84
  if not f.ok:
89
- raise Exception(f'Could not cache the data, {f.text}')
85
+ raise Exception(f'trigger_database_read_collector: Could not cache the data, {f.text}')
90
86
  return Status(**f.json())
91
87
 
92
88
 
@@ -94,7 +90,7 @@ def trigger_database_write(database_external_write_settings: DatabaseExternalWri
94
90
  f = requests.post(url=f'{WORKER_URL}/store_database_write_result',
95
91
  data=database_external_write_settings.model_dump_json())
96
92
  if not f.ok:
97
- raise Exception(f'Could not cache the data, {f.text}')
93
+ raise Exception(f'trigger_database_write: Could not cache the data, {f.text}')
98
94
  return Status(**f.json())
99
95
 
100
96
 
@@ -102,7 +98,7 @@ def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWr
102
98
  f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
103
99
  data=database_external_write_settings.model_dump_json())
104
100
  if not f.ok:
105
- raise Exception(f'Could not cache the data, {f.text}')
101
+ raise Exception(f'trigger_cloud_storage_write: Could not cache the data, {f.text}')
106
102
  return Status(**f.json())
107
103
 
108
104
 
@@ -111,7 +107,7 @@ def get_results(file_ref: str) -> Status | None:
111
107
  if f.status_code == 200:
112
108
  return Status(**f.json())
113
109
  else:
114
- raise Exception(f'Could not fetch the data, {f.text}')
110
+ raise Exception(f'get_results: Could not fetch the data, {f.text}')
115
111
 
116
112
 
117
113
  def results_exists(file_ref: str):
@@ -128,6 +124,25 @@ def results_exists(file_ref: str):
128
124
  return False
129
125
 
130
126
 
127
+ def clear_task_from_worker(file_ref: str) -> bool:
128
+ """
129
+ Clears a task from the worker service by making a DELETE request. It also removes associated cached files.
130
+ Args:
131
+ file_ref (str): The unique identifier of the task to clear.
132
+
133
+ Returns:
134
+ bool: True if the task was successfully cleared, False otherwise.
135
+ """
136
+ try:
137
+ f = requests.delete(f'{WORKER_URL}/clear_task/{file_ref}')
138
+ if f.status_code == 200:
139
+ return True
140
+ return False
141
+ except requests.RequestException as e:
142
+ logger.error(f"Failed to remove results: {str(e)}")
143
+ return False
144
+
145
+
131
146
  def get_df_result(encoded_df: str) -> pl.LazyFrame:
132
147
  r = decodebytes(encoded_df.encode())
133
148
  return pl.LazyFrame.deserialize(io.BytesIO(r))
@@ -174,87 +189,161 @@ def cancel_task(file_ref: str) -> bool:
174
189
 
175
190
 
176
191
  class BaseFetcher:
177
- result: Optional[Any] = None
178
- started: bool = False
179
- running: bool = False
180
- error_code: int = 0
181
- error_description: Optional[str] = None
182
- file_ref: Optional[str] = None
192
+ """
193
+ Thread-safe fetcher for polling worker status and retrieving results.
194
+ """
183
195
 
184
196
  def __init__(self, file_ref: str = None):
185
197
  self.file_ref = file_ref if file_ref else str(uuid4())
186
- self.stop_event = threading.Event()
187
- self.thread = threading.Thread(target=self._fetch_cached_df)
188
- self.result = None
189
- self.error_description = None
190
- self.running = False
191
- self.started = False
192
- self.condition = threading.Condition()
193
- self.error_code = 0
198
+
199
+ # Thread synchronization
200
+ self._lock = threading.Lock()
201
+ self._condition = threading.Condition(self._lock)
202
+ self._stop_event = threading.Event()
203
+ self._thread = None
204
+
205
+ # State variables - use properties for thread-safe access
206
+ self._result: Optional[Any] = None
207
+ self._started: bool = False
208
+ self._running: bool = False
209
+ self._error_code: int = 0
210
+ self._error_description: Optional[str] = None
211
+
212
+ # Public properties for compatibility with subclasses
213
+ @property
214
+ def result(self) -> Optional[Any]:
215
+ with self._lock:
216
+ return self._result
217
+
218
+ @property
219
+ def started(self) -> bool:
220
+ with self._lock:
221
+ return self._started
222
+
223
+ @property
224
+ def running(self) -> bool:
225
+ with self._lock:
226
+ return self._running
227
+
228
+ @running.setter
229
+ def running(self, value: bool):
230
+ """Allow subclasses to set running status and auto-start if needed."""
231
+ with self._lock:
232
+ self._running = value
233
+ # If subclass sets running=True, auto-start the thread
234
+ if value and not self._started:
235
+ self._start_thread()
236
+
237
+ @property
238
+ def error_code(self) -> int:
239
+ with self._lock:
240
+ return self._error_code
241
+
242
+ @property
243
+ def error_description(self) -> Optional[str]:
244
+ with self._lock:
245
+ return self._error_description
246
+
247
+ def _start_thread(self):
248
+ """Internal method to start thread (must be called under lock)."""
249
+ if not self._started:
250
+ self._thread = threading.Thread(target=self._fetch_cached_df, daemon=True)
251
+ self._thread.start()
252
+ self._started = True
194
253
 
195
254
  def _fetch_cached_df(self):
196
- with self.condition:
197
- if self.running:
198
- logger.info('Already running the fetching')
199
- return
255
+ """Background thread that polls for results."""
256
+ sleep_time = 0.5
200
257
 
201
- sleep_time = .5
202
- self.running = True
203
- while not self.stop_event.is_set():
258
+ # Don't check _running here - subclasses already set it
259
+ try:
260
+ while not self._stop_event.is_set():
204
261
  try:
205
- r = requests.get(f'{WORKER_URL}/status/{self.file_ref}')
262
+ r = requests.get(f'{WORKER_URL}/status/{self.file_ref}', timeout=10)
263
+
206
264
  if r.status_code == 200:
207
265
  status = Status(**r.json())
266
+
208
267
  if status.status == 'Completed':
209
268
  self._handle_completion(status)
210
269
  return
211
270
  elif status.status == 'Error':
212
271
  self._handle_error(1, status.error_message)
213
- break
272
+ return
214
273
  elif status.status == 'Unknown Error':
215
- self._handle_error(-1,
216
- 'There was an unknown error with the process, '
217
- 'and the process got killed by the server')
218
- break
274
+ self._handle_error(
275
+ -1,
276
+ 'There was an unknown error with the process, '
277
+ 'and the process got killed by the server'
278
+ )
279
+ return
219
280
  else:
220
- self._handle_error(2, r.text)
221
- break
281
+ self._handle_error(2, f"HTTP {r.status_code}: {r.text}")
282
+ return
283
+
222
284
  except requests.RequestException as e:
223
285
  self._handle_error(2, f"Request failed: {e}")
224
- break
286
+ return
225
287
 
226
- sleep(sleep_time)
288
+ # Sleep without holding the lock
289
+ if not self._stop_event.wait(timeout=sleep_time):
290
+ continue
291
+ else:
292
+ break
227
293
 
294
+ # Only reached if stop_event was set
228
295
  self._handle_cancellation()
229
296
 
297
+ except Exception as e:
298
+ # Catch any unexpected errors
299
+ logger.exception("Unexpected error in fetch thread")
300
+ self._handle_error(-1, f"Unexpected error: {e}")
301
+
230
302
  def _handle_completion(self, status):
231
- self.running = False
232
- self.condition.notify_all()
233
- if status.result_type == 'polars':
234
- self.result = get_df_result(status.results)
235
- else:
236
- self.result = status.results
237
-
238
- def _handle_error(self, code, description):
239
- self.error_code = code
240
- self.error_description = description
241
- self.running = False
242
- self.condition.notify_all()
303
+ """Handle successful completion. Must be called from fetch thread."""
304
+ with self._condition:
305
+ try:
306
+ if status.result_type == 'polars':
307
+ self._result = get_df_result(status.results)
308
+ else:
309
+ self._result = status.results
310
+ except Exception as e:
311
+ logger.exception("Error processing result")
312
+ self._error_code = -1
313
+ self._error_description = f"Error processing result: {e}"
314
+ finally:
315
+ self._running = False
316
+ self._condition.notify_all()
317
+
318
+ def _handle_error(self, code: int, description: str):
319
+ """Handle error state. Must be called from fetch thread."""
320
+ with self._condition:
321
+ self._error_code = code
322
+ self._error_description = description
323
+ self._running = False
324
+ self._condition.notify_all()
243
325
 
244
326
  def _handle_cancellation(self):
245
- logger.warning("Fetch operation cancelled")
246
- if self.error_description is not None:
247
- logger.warning(self.error_description)
248
- self.running = False
249
- self.condition.notify_all()
327
+ """Handle cancellation. Must be called from fetch thread."""
328
+ with self._condition:
329
+ if self._error_description is None:
330
+ self._error_description = "Task cancelled"
331
+ logger.warning(f"Fetch operation cancelled: {self._error_description}")
332
+ self._running = False
333
+ self._condition.notify_all()
250
334
 
251
335
  def start(self):
252
- if self.running:
253
- logger.info('Already running the fetching')
254
- return
255
- if not self.started:
256
- self.thread.start()
257
- self.started = True
336
+ """Start the background fetch thread."""
337
+ with self._lock:
338
+ if self._started:
339
+ logger.info('Fetcher already started')
340
+ return
341
+ if self._running:
342
+ logger.info('Already running the fetching')
343
+ return
344
+
345
+ self._running = True
346
+ self._start_thread()
258
347
 
259
348
  def cancel(self):
260
349
  """
@@ -262,30 +351,67 @@ class BaseFetcher:
262
351
  Also cleans up any resources being used.
263
352
  """
264
353
  logger.warning('Cancelling the operation')
354
+
355
+ # Cancel on the worker side
265
356
  try:
266
357
  cancel_task(self.file_ref)
267
358
  except Exception as e:
268
359
  logger.error(f'Failed to cancel task on worker: {str(e)}')
269
360
 
270
- # Then stop the local monitoring thread
271
- self.stop_event.set()
272
- self.thread.join()
361
+ # Signal the thread to stop
362
+ self._stop_event.set()
273
363
 
274
- # Update local state
275
- with self.condition:
276
- self.running = False
277
- self.error_description = "Task cancelled by user"
278
- self.condition.notify_all()
364
+ # Wait for thread to finish
365
+ if self._thread and self._thread.is_alive():
366
+ self._thread.join(timeout=5.0)
367
+ if self._thread.is_alive():
368
+ logger.warning("Fetch thread did not stop within timeout")
279
369
 
280
370
  def get_result(self) -> Optional[Any]:
281
- if not self.started:
282
- self.start()
283
- with self.condition:
284
- while self.running and self.result is None:
285
- self.condition.wait() # Wait until notified
286
- if self.error_description is not None:
287
- raise Exception(self.error_description)
288
- return self.result
371
+ """
372
+ Get the result, blocking until it's available.
373
+
374
+ Returns:
375
+ The fetched result.
376
+
377
+ Raises:
378
+ Exception: If an error occurred during fetching.
379
+ """
380
+ # Start if not already started (for manual usage)
381
+ with self._lock:
382
+ if not self._started:
383
+ if not self._running:
384
+ self._running = True
385
+ self._start_thread()
386
+
387
+ # Wait for completion
388
+ with self._condition:
389
+ while self._running:
390
+ self._condition.wait()
391
+
392
+ # Check for errors
393
+ with self._lock:
394
+ if self._error_description is not None:
395
+ raise Exception(self._error_description)
396
+ return self._result
397
+
398
+ @property
399
+ def is_running(self) -> bool:
400
+ """Check if the fetcher is currently running."""
401
+ with self._lock:
402
+ return self._running
403
+
404
+ @property
405
+ def has_error(self) -> bool:
406
+ """Check if the fetcher encountered an error."""
407
+ with self._lock:
408
+ return self._error_description is not None
409
+
410
+ @property
411
+ def error_info(self) -> tuple[int, Optional[str]]:
412
+ """Get error code and description."""
413
+ with self._lock:
414
+ return self._error_code, self._error_description
289
415
 
290
416
 
291
417
  class ExternalDfFetcher(BaseFetcher):
@@ -334,7 +460,7 @@ class ExternalFuzzyMatchFetcher(BaseFetcher):
334
460
 
335
461
 
336
462
  class ExternalCreateFetcher(BaseFetcher):
337
- def __init__(self, received_table: ReceivedTableCollection, node_id: int, flow_id: int,
463
+ def __init__(self, received_table: ReceivedTable, node_id: int, flow_id: int,
338
464
  file_type: str = 'csv', wait_on_completion: bool = True):
339
465
  r = trigger_create_operation(received_table=received_table, file_type=file_type,
340
466
  node_id=node_id, flow_id=flow_id)
@@ -23,8 +23,10 @@ def get_data_type(vals: Iterable[Any]):
23
23
  def calculate_schema(lf: pl.LazyFrame) -> List[Dict]:
24
24
  r = ExternalDfFetcher(lf=lf, operation_type='calculate_schema', wait_on_completion=False, flow_id=-1, node_id=-1)
25
25
  schema_stats: List[Dict] = r.get_result()
26
+
26
27
  for schema_stat in schema_stats:
27
28
  schema_stat['pl_datatype'] = getattr(pl.datatypes, schema_stat['pl_datatype'])
29
+
28
30
  return schema_stats
29
31
 
30
32