Flowfile 0.3.4.1__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (122) hide show
  1. flowfile/__init__.py +3 -3
  2. flowfile/api.py +36 -15
  3. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
  4. flowfile/web/static/assets/CloudConnectionManager-d004942f.js +784 -0
  5. flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
  6. flowfile/web/static/assets/CloudStorageReader-eccf9fc2.js +437 -0
  7. flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
  8. flowfile/web/static/assets/CloudStorageWriter-b1ba6bba.js +430 -0
  9. flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-68981877.js} +8 -8
  10. flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-0b06649c.js} +2 -2
  11. flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-8349a426.js} +2 -2
  12. flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-905344f8.js} +9 -9
  13. flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-9f5b8638.js} +9 -9
  14. flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-131a6d53.js} +5 -5
  15. flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-e3549dcc.js} +6 -6
  16. flowfile/web/static/assets/{Filter-f87bb897.js → Filter-6e0730ae.js} +8 -8
  17. flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-02f033e6.js} +75 -9
  18. flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
  19. flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-54c14036.js} +9 -9
  20. flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-08a3f499.js} +5 -5
  21. flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-2ae38139.js} +6 -6
  22. flowfile/web/static/assets/{Join-eec38203.js → Join-493b9772.js} +23 -15
  23. flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
  24. flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-4373d163.js} +106 -34
  25. flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
  26. flowfile/web/static/assets/{Output-3b2ca045.js → Output-b534f3c7.js} +4 -4
  27. flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-2968ff65.js} +6 -6
  28. flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-65136536.js} +6 -6
  29. flowfile/web/static/assets/{Read-07acdc9a.js → Read-c56339ed.js} +6 -6
  30. flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-1c641a5e.js} +5 -5
  31. flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-df308b8f.js} +6 -6
  32. flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-293e8a64.js} +5 -5
  33. flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-03911655.js} +2 -2
  34. flowfile/web/static/assets/{Select-32b28406.js → Select-3058a13d.js} +8 -8
  35. flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-fbf4fb39.js} +1 -1
  36. flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-a29bbaf7.js} +6 -6
  37. flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-c7d7760e.js} +8 -8
  38. flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-118f1d20.js} +2 -2
  39. flowfile/web/static/assets/{Union-39eecc6c.js → Union-f0589571.js} +5 -5
  40. flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-7329a207.js} +8 -8
  41. flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-30b0be15.js} +5 -5
  42. flowfile/web/static/assets/{api-44ca9e9c.js → api-602fb95c.js} +1 -1
  43. flowfile/web/static/assets/api-fb67319c.js +80 -0
  44. flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
  45. flowfile/web/static/assets/{designer-267d44f1.js → designer-94a6bf4d.js} +36 -34
  46. flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-a224831e.js} +1 -1
  47. flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-c2d2aa97.js} +1 -1
  48. flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-921ac5fd.js} +2 -2
  49. flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-7013cc94.js} +3 -3
  50. flowfile/web/static/assets/{index-e235a8bc.js → index-3a75211d.js} +19 -6
  51. flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-a63d4680.js} +3 -3
  52. flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-763aec6e.js} +1 -1
  53. flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-08464729.js} +3 -3
  54. flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-f15a5f87.js} +2 -1
  55. flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-93bd09d7.js} +1 -1
  56. flowfile/web/static/index.html +1 -1
  57. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/METADATA +8 -3
  58. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/RECORD +109 -104
  59. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/entry_points.txt +2 -0
  60. flowfile_core/__init__.py +2 -0
  61. flowfile_core/configs/node_store/nodes.py +8 -6
  62. flowfile_core/database/connection.py +63 -15
  63. flowfile_core/database/init_db.py +0 -1
  64. flowfile_core/database/models.py +49 -2
  65. flowfile_core/flowfile/code_generator/code_generator.py +402 -18
  66. flowfile_core/flowfile/connection_manager/models.py +1 -1
  67. flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
  68. flowfile_core/flowfile/extensions.py +1 -1
  69. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
  70. flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
  71. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +522 -59
  72. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
  73. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
  74. flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
  75. flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
  76. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
  77. flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
  78. flowfile_core/flowfile/flow_graph.py +119 -82
  79. flowfile_core/flowfile/flow_node/flow_node.py +68 -33
  80. flowfile_core/flowfile/flow_node/models.py +32 -3
  81. flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
  82. flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
  83. flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
  84. flowfile_core/flowfile/utils.py +1 -23
  85. flowfile_core/main.py +3 -2
  86. flowfile_core/routes/cloud_connections.py +81 -0
  87. flowfile_core/routes/logs.py +0 -1
  88. flowfile_core/routes/routes.py +3 -39
  89. flowfile_core/schemas/cloud_storage_schemas.py +215 -0
  90. flowfile_core/schemas/input_schema.py +37 -15
  91. flowfile_core/schemas/schemas.py +7 -2
  92. flowfile_core/schemas/transform_schema.py +97 -22
  93. flowfile_core/utils/utils.py +40 -1
  94. flowfile_core/utils/validate_setup.py +41 -0
  95. flowfile_frame/flow_frame.py +253 -102
  96. flowfile_frame/flow_frame_methods.py +13 -13
  97. flowfile_worker/external_sources/s3_source/main.py +216 -0
  98. flowfile_worker/external_sources/s3_source/models.py +142 -0
  99. flowfile_worker/funcs.py +51 -6
  100. flowfile_worker/models.py +22 -2
  101. flowfile_worker/routes.py +40 -38
  102. flowfile_worker/utils.py +1 -1
  103. test_utils/s3/commands.py +46 -0
  104. test_utils/s3/data_generator.py +291 -0
  105. test_utils/s3/fixtures.py +209 -0
  106. flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
  107. flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
  108. flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
  109. flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
  110. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
  111. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
  112. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
  114. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  115. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
  116. flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
  117. flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
  118. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  119. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/LICENSE +0 -0
  120. {flowfile-0.3.4.1.dist-info → flowfile-0.3.6.dist-info}/WHEEL +0 -0
  121. {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
  122. {flowfile_core/schemas/external_sources → test_utils/s3}/__init__.py +0 -0
@@ -102,17 +102,17 @@ def _extract_expr_parts(expr_obj) -> tuple[str, str]:
102
102
  def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr | None = None,
103
103
  group_expr: pl.Expr | None = None) -> None:
104
104
  if method_name is None:
105
- raise NotImplemented("Cannot create a polars lambda expression without the method")
105
+ raise NotImplementedError("Cannot create a polars lambda expression without the method")
106
106
  if polars_expr is None:
107
- raise NotImplemented("Cannot create polars expressions with lambda function")
107
+ raise NotImplementedError("Cannot create polars expressions with lambda function")
108
108
  method_ref = getattr(pl.LazyFrame, method_name)
109
109
  if method_ref is None:
110
110
  raise ModuleNotFoundError(f"Could not find the method {method_name} in polars lazyframe")
111
111
  if method_name == 'group_by':
112
112
  if group_expr is None:
113
- raise NotImplemented("Cannot create a polars lambda expression without the groupby expression")
113
+ raise NotImplementedError("Cannot create a polars lambda expression without the groupby expression")
114
114
  if not all(isinstance(ge, pl.Expr) for ge in group_expr):
115
- raise NotImplemented("Cannot create a polars lambda expression without the groupby expression")
115
+ raise NotImplementedError("Cannot create a polars lambda expression without the groupby expression")
116
116
 
117
117
 
118
118
  def generate_node_id() -> int:
@@ -272,7 +272,6 @@ class FlowFrame:
272
272
  data = pl.LazyFrame()
273
273
  if not isinstance(data, pl.LazyFrame):
274
274
  return
275
-
276
275
  self.node_id = node_id or generate_node_id()
277
276
  self.parent_node_id = parent_node_id
278
277
 
@@ -535,18 +534,18 @@ class FlowFrame:
535
534
  self.flow_graph.add_polars_code(polars_code_settings)
536
535
 
537
536
  def join(
538
- self,
539
- other,
540
- on: List[str | Column] | str | Column = None,
541
- how: str = "inner",
542
- left_on: List[str | Column] | str | Column = None,
543
- right_on: List[str | Column] | str | Column = None,
544
- suffix: str = "_right",
545
- validate: str = None,
546
- nulls_equal: bool = False,
547
- coalesce: bool = None,
548
- maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
549
- description: str = None,
537
+ self,
538
+ other,
539
+ on: List[str | Column] | str | Column = None,
540
+ how: str = "inner",
541
+ left_on: List[str | Column] | str | Column = None,
542
+ right_on: List[str | Column] | str | Column = None,
543
+ suffix: str = "_right",
544
+ validate: str = None,
545
+ nulls_equal: bool = False,
546
+ coalesce: bool = None,
547
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
548
+ description: str = None,
550
549
  ):
551
550
  """
552
551
  Add a join operation to the Logical Plan.
@@ -591,27 +590,90 @@ class FlowFrame:
591
590
  FlowFrame
592
591
  New FlowFrame with join operation applied.
593
592
  """
594
- use_polars_code = not(maintain_order is None and
595
- coalesce is None and
596
- nulls_equal is False and
597
- validate is None and
598
- suffix == '_right')
593
+ # Step 1: Determine if we need to use Polars code
594
+ use_polars_code = self._should_use_polars_code_for_join(
595
+ maintain_order, coalesce, nulls_equal, validate, suffix
596
+ )
597
+
598
+ # Step 2: Ensure both FlowFrames are in the same graph
599
+ self._ensure_same_graph(other)
600
+
601
+ # Step 3: Generate new node ID
602
+ new_node_id = generate_node_id()
603
+
604
+ # Step 4: Parse and validate join columns
605
+ left_columns, right_columns = self._parse_join_columns(
606
+ on, left_on, right_on, how
607
+ )
608
+
609
+ # Step 5: Validate column lists have same length (except for cross join)
610
+ if how != 'cross' and left_columns is not None and right_columns is not None:
611
+ if len(left_columns) != len(right_columns):
612
+ raise ValueError(
613
+ f"Length mismatch: left columns ({len(left_columns)}) != right columns ({len(right_columns)})"
614
+ )
599
615
 
616
+ # Step 6: Create join mappings if not using Polars code
600
617
  join_mappings = None
618
+ if not use_polars_code and how != 'cross':
619
+ join_mappings, use_polars_code = _create_join_mappings(
620
+ left_columns or [], right_columns or []
621
+ )
622
+
623
+ # Step 7: Execute join based on approach
624
+ if use_polars_code or suffix != '_right':
625
+ return self._execute_polars_code_join(
626
+ other, new_node_id, on, left_on, right_on, left_columns, right_columns,
627
+ how, suffix, validate, nulls_equal, coalesce, maintain_order, description
628
+ )
629
+ elif join_mappings or how == 'cross':
630
+ return self._execute_native_join(
631
+ other, new_node_id, join_mappings, how, description
632
+ )
633
+ else:
634
+ raise ValueError("Could not execute join")
635
+
636
+ def _should_use_polars_code_for_join(
637
+ self, maintain_order, coalesce, nulls_equal, validate, suffix
638
+ ) -> bool:
639
+ """Determine if we should use Polars code instead of native join."""
640
+ return not (
641
+ maintain_order is None and
642
+ coalesce is None and
643
+ nulls_equal is False and
644
+ validate is None and
645
+ suffix == '_right'
646
+ )
647
+
648
+ def _ensure_same_graph(self, other: "FlowFrame") -> None:
649
+ """Ensure both FlowFrames are in the same graph, combining if necessary."""
601
650
  if self.flow_graph.flow_id != other.flow_graph.flow_id:
602
- combined_graph, node_mappings = combine_flow_graphs_with_mapping(self.flow_graph, other.flow_graph)
651
+ combined_graph, node_mappings = combine_flow_graphs_with_mapping(
652
+ self.flow_graph, other.flow_graph
653
+ )
654
+
603
655
  new_self_node_id = node_mappings.get((self.flow_graph.flow_id, self.node_id), None)
604
656
  new_other_node_id = node_mappings.get((other.flow_graph.flow_id, other.node_id), None)
657
+
605
658
  if new_other_node_id is None or new_self_node_id is None:
606
659
  raise ValueError("Cannot remap the nodes")
660
+
607
661
  self.node_id = new_self_node_id
608
662
  other.node_id = new_other_node_id
609
663
  self.flow_graph = combined_graph
610
664
  other.flow_graph = combined_graph
665
+
611
666
  global node_id_counter
612
667
  node_id_counter += len(combined_graph.nodes)
613
- new_node_id = generate_node_id()
614
668
 
669
+ def _parse_join_columns(
670
+ self,
671
+ on: List[str | Column] | str | Column,
672
+ left_on: List[str | Column] | str | Column,
673
+ right_on: List[str | Column] | str | Column,
674
+ how: str
675
+ ) -> tuple[List[str] | None, List[str] | None]:
676
+ """Parse and validate join column specifications."""
615
677
  if on is not None:
616
678
  left_columns = right_columns = _normalize_columns_to_list(on)
617
679
  elif left_on is not None and right_on is not None:
@@ -623,93 +685,182 @@ class FlowFrame:
623
685
  else:
624
686
  raise ValueError("Must specify either 'on' or both 'left_on' and 'right_on'")
625
687
 
626
- # Ensure left and right column lists have same length
627
- if how != 'cross' and len(left_columns) != len(right_columns):
628
- raise ValueError(
629
- f"Length mismatch: left columns ({len(left_columns)}) != right columns ({len(right_columns)})"
630
- )
631
- if not use_polars_code:
632
- join_mappings, use_polars_code = _create_join_mappings(
633
- left_columns or [], right_columns or []
634
- )
688
+ return left_columns, right_columns
635
689
 
636
- if use_polars_code or suffix != '_right':
690
+ def _execute_polars_code_join(
691
+ self,
692
+ other: "FlowFrame",
693
+ new_node_id: int,
694
+ on: List[str | Column] | str | Column,
695
+ left_on: List[str | Column] | str | Column,
696
+ right_on: List[str | Column] | str | Column,
697
+ left_columns: List[str] | None,
698
+ right_columns: List[str] | None,
699
+ how: str,
700
+ suffix: str,
701
+ validate: str,
702
+ nulls_equal: bool,
703
+ coalesce: bool,
704
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
705
+ description: str,
706
+ ) -> "FlowFrame":
707
+ """Execute join using Polars code approach."""
708
+ # Build the code arguments
709
+ code_kwargs = self._build_polars_join_kwargs(
710
+ on, left_on, right_on, left_columns, right_columns,
711
+ how, suffix, validate, nulls_equal, coalesce, maintain_order
712
+ )
637
713
 
638
- _on = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in _normalize_columns_to_list(on)) + "]" if on else None
639
- _left = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in left_columns) + "]" if left_on else None
640
- _right = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in right_columns) + "]" if right_on else None
641
- code_kwargs = {"other": "input_df_2", "how": _to_string_val(how), "on": _on, "left_on": _left,
642
- "right_on": _right, "suffix": _to_string_val(suffix), "validate": _to_string_val(validate),
643
- "nulls_equal": nulls_equal, "coalesce": coalesce,
644
- "maintain_order": _to_string_val(maintain_order)}
645
- kwargs_str = ", ".join(f"{k}={v}" for k, v in code_kwargs.items() if v is not None)
646
- code = f"input_df_1.join({kwargs_str})"
647
- self._add_polars_code(new_node_id, code, description, depending_on_ids=[self.node_id, other.node_id])
648
- self._add_connection(self.node_id, new_node_id, "main")
649
- other._add_connection(other.node_id, new_node_id, "main")
650
- result_frame = FlowFrame(
651
- data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
652
- flow_graph=self.flow_graph,
653
- node_id=new_node_id,
654
- parent_node_id=self.node_id,
655
- )
714
+ kwargs_str = ", ".join(f"{k}={v}" for k, v in code_kwargs.items() if v is not None)
715
+ code = f"input_df_1.join({kwargs_str})"
656
716
 
657
- elif join_mappings or how == 'cross':
717
+ # Add the Polars code node
718
+ self._add_polars_code(
719
+ new_node_id, code, description,
720
+ depending_on_ids=[self.node_id, other.node_id]
721
+ )
658
722
 
659
- left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
660
- right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
723
+ # Add connections
724
+ self._add_connection(self.node_id, new_node_id, "main")
725
+ other._add_connection(other.node_id, new_node_id, "main")
661
726
 
662
- if how == 'cross':
663
- join_input = transform_schema.CrossJoinInput(left_select=left_select.renames,
664
- right_select=right_select.renames,)
665
- else:
666
- join_input = transform_schema.JoinInput(
667
- join_mapping=join_mappings,
668
- left_select=left_select.renames,
669
- right_select=right_select.renames,
670
- how=how,
671
- )
672
-
673
- join_input.auto_rename()
674
- if how == 'cross':
675
- cross_join_settings = input_schema.NodeCrossJoin(
676
- flow_id=self.flow_graph.flow_id,
677
- node_id=new_node_id,
678
- cross_join_input=join_input,
679
- is_setup=True,
680
- depending_on_ids=[self.node_id, other.node_id],
681
- description=description or f"Join with {how} strategy",
682
- auto_generate_selection=True,
683
- verify_integrity=True,
684
- )
727
+ # Create and return result frame
728
+ return FlowFrame(
729
+ data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
730
+ flow_graph=self.flow_graph,
731
+ node_id=new_node_id,
732
+ parent_node_id=self.node_id,
733
+ )
685
734
 
686
- self.flow_graph.add_cross_join(cross_join_settings)
687
- else:
688
- join_settings = input_schema.NodeJoin(
689
- flow_id=self.flow_graph.flow_id,
690
- node_id=new_node_id,
691
- join_input=join_input,
692
- auto_generate_selection=True,
693
- verify_integrity=True,
694
- pos_x=200,
695
- pos_y=150,
696
- is_setup=True,
697
- depending_on_ids=[self.node_id, other.node_id],
698
- description=description or f"Join with {how} strategy",
699
- )
700
- self.flow_graph.add_join(join_settings)
701
- self._add_connection(self.node_id, new_node_id, "main")
702
- other._add_connection(other.node_id, new_node_id, "right")
703
- result_frame = FlowFrame(
704
- data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
705
- flow_graph=self.flow_graph,
706
- node_id=new_node_id,
707
- parent_node_id=self.node_id,
735
+ def _build_polars_join_kwargs(
736
+ self,
737
+ on: List[str | Column] | str | Column,
738
+ left_on: List[str | Column] | str | Column,
739
+ right_on: List[str | Column] | str | Column,
740
+ left_columns: List[str] | None,
741
+ right_columns: List[str] | None,
742
+ how: str,
743
+ suffix: str,
744
+ validate: str,
745
+ nulls_equal: bool,
746
+ coalesce: bool,
747
+ maintain_order: Literal[None, "left", "right", "left_right", "right_left"],
748
+ ) -> dict:
749
+ """Build kwargs dictionary for Polars join code."""
750
+
751
+ def format_column_list(cols):
752
+ if cols is None:
753
+ return None
754
+ return "[" + ', '.join(
755
+ f"'{v}'" if isinstance(v, str) else str(v)
756
+ for v in _normalize_columns_to_list(cols)
757
+ ) + "]"
758
+
759
+ return {
760
+ "other": "input_df_2",
761
+ "how": _to_string_val(how),
762
+ "on": format_column_list(on) if on else None,
763
+ "left_on": format_column_list(left_columns) if left_on else None,
764
+ "right_on": format_column_list(right_columns) if right_on else None,
765
+ "suffix": _to_string_val(suffix),
766
+ "validate": _to_string_val(validate),
767
+ "nulls_equal": nulls_equal,
768
+ "coalesce": coalesce,
769
+ "maintain_order": _to_string_val(maintain_order)
770
+ }
771
+
772
+ def _execute_native_join(
773
+ self,
774
+ other: "FlowFrame",
775
+ new_node_id: int,
776
+ join_mappings: List | None,
777
+ how: str,
778
+ description: str,
779
+ ) -> "FlowFrame":
780
+ """Execute join using native FlowFile join nodes."""
781
+ # Create select inputs for both frames
782
+ left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
783
+ right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
784
+
785
+ # Create appropriate join input based on join type
786
+ if how == 'cross':
787
+ join_input = transform_schema.CrossJoinInput(
788
+ left_select=left_select.renames,
789
+ right_select=right_select.renames,
708
790
  )
709
791
  else:
710
- raise ValueError("Could not execute join")
792
+ join_input = transform_schema.JoinInput(
793
+ join_mapping=join_mappings,
794
+ left_select=left_select.renames,
795
+ right_select=right_select.renames,
796
+ how=how,
797
+ )
798
+
799
+ # Configure join input
800
+ join_input.auto_rename()
801
+ for right_column in right_select.renames:
802
+ if right_column.join_key:
803
+ right_column.keep = False
804
+
805
+ # Create and add appropriate node
806
+ if how == 'cross':
807
+ self._add_cross_join_node(new_node_id, join_input, description, other)
808
+ else:
809
+ self._add_regular_join_node(new_node_id, join_input, description, other)
810
+
811
+ # Add connections
812
+ self._add_connection(self.node_id, new_node_id, "main")
813
+ other._add_connection(other.node_id, new_node_id, "right")
814
+
815
+ # Create and return result frame
816
+ return FlowFrame(
817
+ data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
818
+ flow_graph=self.flow_graph,
819
+ node_id=new_node_id,
820
+ parent_node_id=self.node_id,
821
+ )
711
822
 
712
- return result_frame
823
+ def _add_cross_join_node(
824
+ self,
825
+ new_node_id: int,
826
+ join_input: "transform_schema.CrossJoinInput",
827
+ description: str,
828
+ other: "FlowFrame",
829
+ ) -> None:
830
+ """Add a cross join node to the graph."""
831
+ cross_join_settings = input_schema.NodeCrossJoin(
832
+ flow_id=self.flow_graph.flow_id,
833
+ node_id=new_node_id,
834
+ cross_join_input=join_input,
835
+ is_setup=True,
836
+ depending_on_ids=[self.node_id, other.node_id],
837
+ description=description or f"Join with cross strategy",
838
+ auto_generate_selection=True,
839
+ verify_integrity=True,
840
+ )
841
+ self.flow_graph.add_cross_join(cross_join_settings)
842
+
843
+ def _add_regular_join_node(
844
+ self,
845
+ new_node_id: int,
846
+ join_input: "transform_schema.JoinInput",
847
+ description: str,
848
+ other: "FlowFrame",
849
+ ) -> None:
850
+ """Add a regular join node to the graph."""
851
+ join_settings = input_schema.NodeJoin(
852
+ flow_id=self.flow_graph.flow_id,
853
+ node_id=new_node_id,
854
+ join_input=join_input,
855
+ auto_generate_selection=True,
856
+ verify_integrity=True,
857
+ pos_x=200,
858
+ pos_y=150,
859
+ is_setup=True,
860
+ depending_on_ids=[self.node_id, other.node_id],
861
+ description=description or f"Join with {join_input.how} strategy",
862
+ )
863
+ self.flow_graph.add_join(join_settings)
713
864
 
714
865
  def _add_number_of_records(self, new_node_id: int, description: str = None) -> "FlowFrame":
715
866
  node_number_of_records = input_schema.NodeRecordCount(
@@ -1,22 +1,20 @@
1
- import logging
1
+ import io
2
2
  import os
3
- from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable
4
3
  from pathlib import Path
4
+ from typing import Any, List, Optional, Union, Dict, Callable
5
5
 
6
- import io
7
6
  import polars as pl
8
- from polars._typing import (SchemaDict, IO,PolarsDataType,
7
+ from polars._typing import (SchemaDict, IO, PolarsDataType,
9
8
  Sequence, CsvEncoding)
10
9
 
11
- from flowfile_core.flowfile.flow_graph import FlowGraph
12
10
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
11
+ from flowfile_core.flowfile.flow_graph import FlowGraph
13
12
  from flowfile_core.schemas import input_schema, transform_schema
14
-
13
+ from flowfile_frame.config import logger
15
14
  from flowfile_frame.expr import col
16
-
17
- from flowfile_frame.utils import create_flow_graph
18
15
  from flowfile_frame.flow_frame import generate_node_id, FlowFrame
19
- from flowfile_frame.config import logger
16
+ from flowfile_frame.utils import create_flow_graph
17
+
20
18
 
21
19
  def sum(expr):
22
20
  """Sum aggregation function."""
@@ -140,11 +138,10 @@ def read_csv(
140
138
  Returns:
141
139
  A FlowFrame with the CSV data.
142
140
  """
143
- node_id = generate_node_id() # Assuming generate_node_id is defined
141
+ node_id = generate_node_id()
144
142
  if flow_graph is None:
145
- flow_graph = create_flow_graph() # Assuming create_flow_graph is defined
143
+ flow_graph = create_flow_graph()
146
144
  flow_id = flow_graph.flow_id
147
-
148
145
  current_source_path_for_native = None
149
146
  if isinstance(source, (str, os.PathLike)):
150
147
  current_source_path_for_native = str(source)
@@ -216,11 +213,14 @@ def read_csv(
216
213
  description=read_node_description
217
214
  )
218
215
  flow_graph.add_read(read_node)
216
+ flow_graph.get_node(1)
217
+
219
218
  result_frame = FlowFrame(
220
219
  data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
221
220
  flow_graph=flow_graph,
222
221
  node_id=node_id
223
222
  )
223
+ flow_graph.get_node(1)
224
224
  return result_frame
225
225
  else:
226
226
  polars_source_arg = source
@@ -449,7 +449,7 @@ def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) ->
449
449
  input_node = input_schema.NodeManualInput(
450
450
  flow_id=flow_id,
451
451
  node_id=node_id,
452
- raw_data=FlowDataEngine(data).to_pylist(),
452
+ raw_data_format=FlowDataEngine(data).to_raw_data(),
453
453
  pos_x=100,
454
454
  pos_y=100,
455
455
  is_setup=True,