Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. flowfile/__init__.py +8 -1
  2. flowfile/api.py +1 -3
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
  8. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  9. flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  22. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
  30. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
  43. flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
  45. flowfile/web/static/assets/Output-283fe388.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
  47. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
  48. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  49. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  50. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
  52. flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
  54. flowfile/web/static/assets/Read-64a3f259.js +218 -0
  55. flowfile/web/static/assets/Read-e808b239.css +62 -0
  56. flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  65. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  66. flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
  67. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
  68. flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
  71. flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
  75. flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
  82. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  83. flowfile/web/static/assets/Union-bfe9b996.js +77 -0
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
  93. flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
  94. flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
  95. flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
  102. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  103. flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
  104. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  105. flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
  106. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  107. flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
  108. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  109. flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
  110. flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
  111. flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
  112. flowfile/web/static/assets/readExcel-806d2826.css +64 -0
  113. flowfile/web/static/assets/readParquet-48c81530.css +19 -0
  114. flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
  115. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
  116. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
  117. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  118. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  119. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
  120. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
  121. flowfile/web/static/index.html +2 -2
  122. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
  123. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
  124. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  125. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  126. flowfile_core/__init__.py +3 -0
  127. flowfile_core/configs/flow_logger.py +5 -13
  128. flowfile_core/configs/node_store/__init__.py +30 -0
  129. flowfile_core/configs/node_store/nodes.py +383 -99
  130. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  131. flowfile_core/configs/settings.py +2 -1
  132. flowfile_core/database/connection.py +5 -21
  133. flowfile_core/fileExplorer/funcs.py +239 -121
  134. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  135. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  136. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  137. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  138. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  139. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  140. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  141. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  142. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  143. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  144. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
  145. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  146. flowfile_core/flowfile/flow_graph.py +240 -54
  147. flowfile_core/flowfile/flow_node/flow_node.py +48 -13
  148. flowfile_core/flowfile/flow_node/models.py +2 -1
  149. flowfile_core/flowfile/handler.py +24 -5
  150. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  151. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  152. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  153. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  154. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  155. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  156. flowfile_core/flowfile/schema_callbacks.py +17 -10
  157. flowfile_core/flowfile/setting_generator/settings.py +15 -10
  158. flowfile_core/main.py +5 -1
  159. flowfile_core/routes/routes.py +73 -30
  160. flowfile_core/routes/user_defined_components.py +55 -0
  161. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  162. flowfile_core/schemas/input_schema.py +228 -65
  163. flowfile_core/schemas/output_model.py +5 -2
  164. flowfile_core/schemas/schemas.py +153 -35
  165. flowfile_core/schemas/transform_schema.py +1083 -412
  166. flowfile_core/schemas/yaml_types.py +103 -0
  167. flowfile_core/types.py +156 -0
  168. flowfile_core/utils/validate_setup.py +3 -1
  169. flowfile_frame/__init__.py +3 -1
  170. flowfile_frame/flow_frame.py +31 -24
  171. flowfile_frame/flow_frame_methods.py +12 -9
  172. flowfile_worker/__init__.py +9 -35
  173. flowfile_worker/create/__init__.py +3 -21
  174. flowfile_worker/create/funcs.py +68 -56
  175. flowfile_worker/create/models.py +130 -62
  176. flowfile_worker/main.py +5 -2
  177. flowfile_worker/routes.py +52 -13
  178. shared/__init__.py +15 -0
  179. shared/storage_config.py +258 -0
  180. tools/migrate/README.md +56 -0
  181. tools/migrate/__init__.py +12 -0
  182. tools/migrate/__main__.py +131 -0
  183. tools/migrate/legacy_schemas.py +621 -0
  184. tools/migrate/migrate.py +598 -0
  185. tools/migrate/tests/__init__.py +0 -0
  186. tools/migrate/tests/conftest.py +23 -0
  187. tools/migrate/tests/test_migrate.py +627 -0
  188. tools/migrate/tests/test_migration_e2e.py +1010 -0
  189. tools/migrate/tests/test_node_migrations.py +813 -0
  190. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  191. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  192. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  193. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  194. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  195. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  196. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  197. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  198. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  199. flowfile_core/flowfile/manage/open_flowfile.py +0 -135
  200. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
  201. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -0,0 +1,103 @@
1
+ from typing import TypedDict, List
2
+
3
+
4
+ # === Transform Schema YAML Types ===
5
+
6
+ class SelectInputYaml(TypedDict, total=False):
7
+ old_name: str
8
+ new_name: str
9
+ keep: bool
10
+ data_type: str
11
+
12
+
13
+ class JoinInputsYaml(TypedDict):
14
+ select: List[SelectInputYaml]
15
+
16
+
17
+ class JoinMapYaml(TypedDict):
18
+ left_col: str
19
+ right_col: str
20
+
21
+
22
+ class JoinInputYaml(TypedDict):
23
+ join_mapping: List[JoinMapYaml]
24
+ left_select: JoinInputsYaml
25
+ right_select: JoinInputsYaml
26
+ how: str
27
+
28
+
29
+ class CrossJoinInputYaml(TypedDict):
30
+ left_select: JoinInputsYaml
31
+ right_select: JoinInputsYaml
32
+
33
+
34
+ class FuzzyMappingYaml(TypedDict, total=False):
35
+ left_col: str
36
+ right_col: str
37
+ threshold_score: float
38
+ fuzzy_type: str
39
+ perc_unique: float
40
+ output_column_name: str
41
+ valid: bool
42
+
43
+
44
+ class FuzzyMatchInputYaml(TypedDict):
45
+ join_mapping: List[FuzzyMappingYaml]
46
+ left_select: JoinInputsYaml
47
+ right_select: JoinInputsYaml
48
+ how: str
49
+ aggregate_output: bool
50
+
51
+
52
+ # === Input Schema YAML Types ===
53
+
54
+ class OutputSettingsYaml(TypedDict, total=False):
55
+ name: str
56
+ directory: str
57
+ file_type: str
58
+ write_mode: str
59
+ abs_file_path: str
60
+ fields: List[str]
61
+ table_settings: dict
62
+
63
+
64
+ class NodeSelectYaml(TypedDict):
65
+ cache_results: bool
66
+ keep_missing: bool
67
+ select_input: List[SelectInputYaml]
68
+ sorted_by: str
69
+
70
+
71
+ class NodeJoinYaml(TypedDict):
72
+ cache_results: bool
73
+ auto_generate_selection: bool
74
+ verify_integrity: bool
75
+ join_input: JoinInputYaml
76
+ auto_keep_all: bool
77
+ auto_keep_right: bool
78
+ auto_keep_left: bool
79
+
80
+
81
+ class NodeCrossJoinYaml(TypedDict):
82
+ cache_results: bool
83
+ auto_generate_selection: bool
84
+ verify_integrity: bool
85
+ cross_join_input: CrossJoinInputYaml
86
+ auto_keep_all: bool
87
+ auto_keep_right: bool
88
+ auto_keep_left: bool
89
+
90
+
91
+ class NodeFuzzyMatchYaml(TypedDict):
92
+ cache_results: bool
93
+ auto_generate_selection: bool
94
+ verify_integrity: bool
95
+ join_input: FuzzyMatchInputYaml
96
+ auto_keep_all: bool
97
+ auto_keep_right: bool
98
+ auto_keep_left: bool
99
+
100
+
101
+ class NodeOutputYaml(TypedDict):
102
+ cache_results: bool
103
+ output_settings: OutputSettingsYaml
flowfile_core/types.py ADDED
@@ -0,0 +1,156 @@
1
+ # types.py - Public API for type specifications
2
+ """
3
+ Public type system for column selection and data type specification.
4
+
5
+ Usage:
6
+ from flowfile_core.types import Types
7
+
8
+ # Use type groups
9
+ ColumnSelector(data_types=Types.Numeric)
10
+ ColumnSelector(data_types=Types.String)
11
+
12
+ # Use specific types
13
+ ColumnSelector(data_types=Types.Int64)
14
+ ColumnSelector(data_types=Types.Float)
15
+
16
+ # Mix and match
17
+ ColumnSelector(data_types=[Types.Numeric, Types.String])
18
+ """
19
+
20
+ from enum import Enum
21
+ from typing import List, Literal, Union
22
+ import polars as pl
23
+
24
+
25
+ DataTypeStr = Literal[
26
+ "Int8", "Int16", "Int32", "Int64",
27
+ "UInt8", "UInt16", "UInt32", "UInt64",
28
+ "Float32", "Float64", "Decimal",
29
+ "String",
30
+ "Date", "Datetime", "Time", "Duration",
31
+ "Boolean", "Binary", "List", "Struct", "Array", "Integer", "Double", "Utf8"
32
+ ]
33
+
34
+
35
+ class TypeGroup(str, Enum):
36
+ """High-level type groups for column selection."""
37
+ Numeric = "Numeric"
38
+ String = "String"
39
+ Date = "Date"
40
+ Boolean = "Boolean"
41
+ Binary = "Binary"
42
+ Complex = "Complex"
43
+ All = "ALL"
44
+
45
+ def __str__(self) -> str:
46
+ return self.value
47
+
48
+ def __repr__(self) -> str:
49
+ return f"Types.{self.name}"
50
+
51
+
52
+ class DataType(str, Enum):
53
+ """Specific data types for fine-grained control."""
54
+ # Numeric types
55
+ Int8 = "Int8"
56
+ Int16 = "Int16"
57
+ Int32 = "Int32"
58
+ Int64 = "Int64"
59
+ UInt8 = "UInt8"
60
+ UInt16 = "UInt16"
61
+ UInt32 = "UInt32"
62
+ UInt64 = "UInt64"
63
+ Float32 = "Float32"
64
+ Float64 = "Float64"
65
+ Decimal = "Decimal"
66
+
67
+ # String types
68
+ String = "String"
69
+ Categorical = "Categorical"
70
+
71
+ # Date types
72
+ Date = "Date"
73
+ Datetime = "Datetime"
74
+ Time = "Time"
75
+ Duration = "Duration"
76
+
77
+ # Other types
78
+ Boolean = "Boolean"
79
+ Binary = "Binary"
80
+ List = "List"
81
+ Struct = "Struct"
82
+ Array = "Array"
83
+
84
+ def __str__(self) -> str:
85
+ return self.value
86
+
87
+ def __repr__(self) -> str:
88
+ return f"Types.{self.name}"
89
+
90
+
91
+ class Types:
92
+ """
93
+ Main entry point for type specifications.
94
+
95
+ Examples:
96
+ Types.Numeric # All numeric columns
97
+ Types.String # All string columns
98
+ Types.Int64 # 64-bit integers only
99
+ Types.Float # Alias for Float64
100
+ Types.All # All column types
101
+ """
102
+
103
+ # Type groups (most common use case)
104
+ Numeric = TypeGroup.Numeric
105
+ String = TypeGroup.String
106
+ AnyDate = TypeGroup.Date
107
+ Boolean = TypeGroup.Boolean
108
+ Binary = TypeGroup.Binary
109
+ Complex = TypeGroup.Complex
110
+ All = TypeGroup.All
111
+
112
+ # Specific numeric types
113
+ Int = DataType.Int64 # Default integer
114
+ Int8 = DataType.Int8
115
+ Int16 = DataType.Int16
116
+ Int32 = DataType.Int32
117
+ Int64 = DataType.Int64
118
+ UInt8 = DataType.UInt8
119
+ UInt16 = DataType.UInt16
120
+ UInt32 = DataType.UInt32
121
+ UInt64 = DataType.UInt64
122
+
123
+ Float = DataType.Float64 # Default float
124
+ Float32 = DataType.Float32
125
+ Float64 = DataType.Float64
126
+ Decimal = DataType.Decimal
127
+
128
+ # String types
129
+ Str = DataType.String
130
+ Text = DataType.String # Alias
131
+ Categorical = DataType.Categorical
132
+ Cat = DataType.Categorical # Short alias
133
+
134
+ # Date/time types
135
+ Date = DataType.Date
136
+ Datetime = DataType.Datetime
137
+ Time = DataType.Time
138
+ Duration = DataType.Duration
139
+
140
+ # Other types
141
+ Bool = DataType.Boolean
142
+ Bytes = DataType.Binary
143
+ List = DataType.List
144
+ Struct = DataType.Struct
145
+ Array = DataType.Array
146
+
147
+
148
+ # Type alias for better type hints
149
+ TypeSpec = Union[
150
+ TypeGroup,
151
+ DataType,
152
+ str,
153
+ List[Union[TypeGroup, DataType, str, type[pl.DataType], pl.DataType]],
154
+ type[pl.DataType],
155
+ pl.DataType
156
+ ]
@@ -3,7 +3,7 @@ as have a component in flowfile_frontend"""
3
3
 
4
4
  from flowfile_core.schemas import input_schema
5
5
  from flowfile_core.flowfile.flow_graph import FlowGraph
6
- from flowfile_core.configs.node_store.nodes import nodes_list, NodeTemplate
6
+ from flowfile_core.configs.node_store import nodes_list, NodeTemplate
7
7
  import inspect
8
8
 
9
9
 
@@ -31,6 +31,8 @@ def validate_setup():
31
31
  Raises ValueError if any node is missing either.
32
32
  """
33
33
  for node in nodes_list:
34
+ if node.custom_node:
35
+ continue
34
36
  check_if_node_has_add_function_in_flow_graph(node)
35
37
  check_if_node_has_input_schema_definition(node)
36
38
 
@@ -1,6 +1,8 @@
1
1
  # flowframe/__init__.py
2
2
  """A Polars-like API for building ETL graphs."""
3
3
 
4
+ from importlib.metadata import version
5
+
4
6
  # Core classes
5
7
  from flowfile_frame.flow_frame import FlowFrame # noqa: F401
6
8
  from pl_fuzzy_frame_match.models import FuzzyMapping # noqa: F401
@@ -64,4 +66,4 @@ from polars.datatypes import ( # noqa: F401
64
66
  DataType, DataTypeClass, Field
65
67
  )
66
68
 
67
- __version__ = "0.1.0"
69
+ __version__ = version("Flowfile")
@@ -10,7 +10,7 @@ from flowfile_frame.lazy_methods import add_lazyframe_methods
10
10
  from polars._typing import (CsvEncoding, FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
11
11
  from collections.abc import Iterator
12
12
 
13
- from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs
13
+ from pl_fuzzy_frame_match import FuzzyMapping
14
14
 
15
15
  from flowfile_core.flowfile.flow_graph import FlowGraph, add_connection
16
16
  from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
@@ -27,6 +27,8 @@ from flowfile_frame.join import _normalize_columns_to_list, _create_join_mapping
27
27
  from flowfile_frame.utils import _check_if_convertible_to_code
28
28
  from flowfile_frame.config import logger
29
29
  from flowfile_frame.cloud_storage.frame_helpers import add_write_ff_to_cloud_storage
30
+ from collections.abc import Mapping
31
+
30
32
 
31
33
 
32
34
  def can_be_expr(param: inspect.Parameter) -> bool:
@@ -624,7 +626,6 @@ class FlowFrame:
624
626
  left_columns, right_columns = self._parse_join_columns(
625
627
  on, left_on, right_on, how
626
628
  )
627
-
628
629
  # Step 5: Validate column lists have same length (except for cross join)
629
630
  if how != 'cross' and left_columns is not None and right_columns is not None:
630
631
  if len(left_columns) != len(right_columns):
@@ -796,33 +797,36 @@ class FlowFrame:
796
797
  ) -> "FlowFrame":
797
798
  """Execute join using native FlowFile join nodes."""
798
799
  # Create select inputs for both frames
800
+
799
801
  left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
800
802
  right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
801
803
  # Create appropriate join input based on join type
802
804
  if how == 'cross':
803
805
  join_input = transform_schema.CrossJoinInput(
804
- left_select=left_select.renames,
806
+ left_select=transform_schema.JoinInputs(renames=left_select.renames),
805
807
  right_select=right_select.renames,
806
808
  )
809
+ join_input_manager = transform_schema.CrossJoinInputManager(join_input)
810
+
807
811
  else:
808
812
  join_input = transform_schema.JoinInput(
809
813
  join_mapping=join_mappings,
810
- left_select=left_select.renames,
814
+ left_select=transform_schema.JoinInputs(renames=left_select.renames),
811
815
  right_select=right_select.renames,
812
816
  how=how,
813
817
  )
818
+ join_input_manager = transform_schema.JoinInputManager(join_input)
814
819
 
815
820
  # Configure join input
816
- join_input.auto_rename()
817
- for right_column in right_select.renames:
821
+ for right_column in join_input_manager.right_select.renames:
818
822
  if right_column.join_key:
819
823
  right_column.keep = False
820
824
 
821
825
  # Create and add appropriate node
822
826
  if how == 'cross':
823
- self._add_cross_join_node(new_node_id, join_input, description, other)
827
+ self._add_cross_join_node(new_node_id, join_input_manager.to_cross_join_input(), description, other)
824
828
  else:
825
- self._add_regular_join_node(new_node_id, join_input, description, other)
829
+ self._add_regular_join_node(new_node_id, join_input_manager.to_join_input(), description, other)
826
830
 
827
831
  # Add connections
828
832
  self._add_connection(self.node_id, new_node_id, "main")
@@ -890,13 +894,18 @@ class FlowFrame:
890
894
  self.flow_graph.add_record_count(node_number_of_records)
891
895
  return self._create_child_frame(new_node_id)
892
896
 
893
- def select(self, *columns: Union[str, Expr, Selector], description: Optional[str] = None) -> "FlowFrame":
897
+ def rename(self, mapping: Mapping[str, str], *, strict: bool = True,
898
+ description: str = None) -> "FlowFrame":
899
+ """Rename columns based on a mapping or function."""
900
+ return self.select([col(old_name).alias(new_name) for old_name, new_name in mapping.items()],
901
+ description=description, _keep_missing=True)
902
+
903
+ def select(self, *columns: Union[str, Expr, Selector], description: Optional[str] = None, _keep_missing: bool = False) -> "FlowFrame":
894
904
  """
895
905
  Select columns from the frame.
896
906
  """
897
907
  columns_iterable = list(_parse_inputs_as_iterable(columns))
898
908
  new_node_id = generate_node_id()
899
-
900
909
  if (len(columns_iterable) == 1 and isinstance(columns_iterable[0], Expr)
901
910
  and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"):
902
911
  return self._add_number_of_records(new_node_id, description)
@@ -914,7 +923,6 @@ class FlowFrame:
914
923
  for expr_input in effective_columns_iterable:
915
924
  current_expr_obj = expr_input
916
925
  is_simple_col_for_native = False
917
-
918
926
  if isinstance(expr_input, str):
919
927
  current_expr_obj = col(expr_input)
920
928
  selected_col_names_for_native.append(transform_schema.SelectInput(old_name=expr_input))
@@ -942,14 +950,18 @@ class FlowFrame:
942
950
  if can_use_native_node:
943
951
  existing_cols = self.columns
944
952
  selected_col_names = {select_col.old_name for select_col in selected_col_names_for_native}
945
- dropped_columns = [transform_schema.SelectInput(c, keep=False) for c in existing_cols if
953
+ not_selected_columns = [transform_schema.SelectInput(c, keep=_keep_missing) for c in existing_cols if
946
954
  c not in selected_col_names]
947
- selected_col_names_for_native.extend(dropped_columns)
955
+ selected_col_names_for_native.extend(not_selected_columns)
956
+ if _keep_missing:
957
+ lookup_selection = {_col.old_name: _col for _col in selected_col_names_for_native}
958
+ selected_col_names_for_native = [lookup_selection.get(_col) for
959
+ _col in existing_cols if _col in lookup_selection]
948
960
  select_settings = input_schema.NodeSelect(
949
961
  flow_id=self.flow_graph.flow_id,
950
962
  node_id=new_node_id,
951
963
  select_input=selected_col_names_for_native,
952
- keep_missing=False,
964
+ keep_missing=_keep_missing,
953
965
  pos_x=200,
954
966
  pos_y=100,
955
967
  is_setup=True,
@@ -1130,16 +1142,11 @@ class FlowFrame:
1130
1142
  file_name = file_str.split(os.sep)[-1]
1131
1143
  use_polars_code = bool(kwargs.items()) or not is_path_input
1132
1144
 
1133
- output_parquet_table = input_schema.OutputParquetTable(
1134
- file_type="parquet"
1135
- )
1136
1145
  output_settings = input_schema.OutputSettings(
1137
1146
  file_type='parquet',
1138
1147
  name=file_name,
1139
1148
  directory=file_str if is_path_input else str(file_str),
1140
- output_parquet_table=output_parquet_table,
1141
- output_csv_table=input_schema.OutputCsvTable(),
1142
- output_excel_table=input_schema.OutputExcelTable()
1149
+ table_settings=input_schema.OutputParquetTable()
1143
1150
  )
1144
1151
 
1145
1152
  if is_path_input:
@@ -1210,10 +1217,10 @@ class FlowFrame:
1210
1217
  file_type='csv',
1211
1218
  name=file_name,
1212
1219
  directory=file_str if is_path_input else str(file_str),
1213
- output_csv_table=input_schema.OutputCsvTable(
1214
- file_type="csv", delimiter=separator, encoding=encoding),
1215
- output_excel_table=input_schema.OutputExcelTable(),
1216
- output_parquet_table=input_schema.OutputParquetTable()
1220
+ table_settings=input_schema.OutputCsvTable(
1221
+ delimiter=separator,
1222
+ encoding=encoding
1223
+ )
1217
1224
  )
1218
1225
  if is_path_input:
1219
1226
  try:
@@ -186,15 +186,17 @@ def read_csv(
186
186
  file_type='csv',
187
187
  path=current_source_path_for_native,
188
188
  name=Path(current_source_path_for_native).name,
189
- delimiter=separator,
190
- has_headers=has_header,
191
- encoding=encoding,
192
- starting_from_line=skip_rows,
193
- quote_char=quote_char if quote_char is not None else '"',
194
- infer_schema_length=actual_infer_schema_length if actual_infer_schema_length is not None else 10000,
195
- truncate_ragged_lines=truncate_ragged_lines,
196
- ignore_errors=ignore_errors,
197
- row_delimiter=eol_char
189
+ table_settings=input_schema.InputCsvTable(
190
+ delimiter=separator,
191
+ has_headers=has_header,
192
+ encoding=encoding,
193
+ starting_from_line=skip_rows,
194
+ quote_char=quote_char if quote_char is not None else '"',
195
+ infer_schema_length=actual_infer_schema_length if actual_infer_schema_length is not None else 10000,
196
+ truncate_ragged_lines=truncate_ragged_lines,
197
+ ignore_errors=ignore_errors,
198
+ row_delimiter=eol_char
199
+ )
198
200
  )
199
201
  if convert_to_absolute_path:
200
202
  try:
@@ -407,6 +409,7 @@ def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = Non
407
409
  file_type='parquet',
408
410
  path=source,
409
411
  name=Path(source).name,
412
+ table_settings=input_schema.InputParquetTable()
410
413
  )
411
414
  if convert_to_absolute_path:
412
415
  received_table.path = received_table.abs_file_path
@@ -1,15 +1,18 @@
1
1
  from typing import Dict
2
- import tempfile
3
2
  import threading
4
3
  import multiprocessing
5
- import os
6
- import shutil
7
- multiprocessing.set_start_method('spawn', force=True)
4
+ from shared.storage_config import storage
5
+ from importlib.metadata import version
6
+
7
+ __version__ = version("Flowfile")
8
8
 
9
+ multiprocessing.set_start_method('spawn', force=True)
9
10
 
10
11
  from multiprocessing import get_context
11
12
  from flowfile_worker.models import Status
13
+
12
14
  mp_context = get_context("spawn")
15
+
13
16
  status_dict: Dict[str, Status] = dict()
14
17
  process_dict = dict()
15
18
 
@@ -17,39 +20,10 @@ status_dict_lock = threading.Lock()
17
20
  process_dict_lock = threading.Lock()
18
21
 
19
22
 
20
- class SharedTempDirectory:
21
- """A class that mimics tempfile.TemporaryDirectory but uses a fixed directory"""
22
- def __init__(self, dir_path):
23
- self._path = dir_path
24
- os.makedirs(self._path, exist_ok=True)
25
-
26
- @property
27
- def name(self):
28
- return self._path
29
-
30
- def cleanup(self):
31
- """Remove all contents of the temp directory"""
32
- try:
33
- shutil.rmtree(self._path)
34
- os.makedirs(self._path, exist_ok=True)
35
- print(f"Cleaned up temporary directory: {self._path}")
36
- except Exception as e:
37
- print(f"Error during cleanup: {e}")
38
-
39
- def __enter__(self):
40
- return self.name
41
-
42
- def __exit__(self, exc, value, tb):
43
- self.cleanup()
44
-
45
-
46
23
  CACHE_EXPIRATION_TIME = 24 * 60 * 60
47
24
 
48
25
 
49
- TEMP_DIR = os.getenv('TEMP_DIR')
50
- if TEMP_DIR:
51
- CACHE_DIR = SharedTempDirectory(TEMP_DIR)
52
- else:
53
- CACHE_DIR = tempfile.TemporaryDirectory()
26
+ CACHE_DIR = storage.cache_directory
27
+
54
28
 
55
29
  PROCESS_MEMORY_USAGE: Dict[str, float] = dict()
@@ -1,29 +1,11 @@
1
- from flowfile_worker.create.models import (ReceivedCsvTable, ReceivedParquetTable, ReceivedExcelTable,
2
- ReceivedJsonTable)
1
+
3
2
  from flowfile_worker.create.funcs import (create_from_path_csv, create_from_path_parquet, create_from_path_excel,
4
3
  create_from_path_json)
5
- from typing import Dict, Literal
4
+ from typing import Literal
6
5
 
7
- ReceivedTableCollection = ReceivedCsvTable | ReceivedParquetTable | ReceivedJsonTable | ReceivedExcelTable
8
6
  FileType = Literal['csv', 'parquet', 'json', 'excel']
9
7
 
10
-
11
- def received_table_parser(received_table_raw: Dict, file_type: FileType) -> ReceivedTableCollection:
12
- match file_type:
13
- case 'csv':
14
- received_table = ReceivedCsvTable.model_validate(received_table_raw)
15
- case 'parquet':
16
- received_table = ReceivedParquetTable.model_validate(received_table_raw)
17
- case 'excel':
18
- received_table = ReceivedExcelTable.model_validate(received_table_raw)
19
- case 'json':
20
- return ReceivedJsonTable.model_validate(received_table_raw)
21
- case _:
22
- raise ValueError(f'Unsupported file type: {file_type}')
23
- return received_table
24
-
25
-
26
- def table_creator_factory_method(file_type: Literal['csv', 'parquet', 'json', 'excel']) -> callable:
8
+ def table_creator_factory_method(file_type: FileType) -> callable:
27
9
  match file_type:
28
10
  case 'csv':
29
11
  return create_from_path_csv