Flowfile 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +4 -3
- flowfile/api.py +1 -1
- flowfile/web/static/assets/{CloudConnectionManager-c20a740f.js → CloudConnectionManager-d7c2c028.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-960b400a.js → CloudStorageReader-d467329f.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-e3decbdd.js → CloudStorageWriter-071b8b00.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ContextMenu-2dea5e27.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-785554c4.js +41 -0
- flowfile/web/static/assets/ContextMenu-a51e19ea.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-d67e2405.js → CrossJoin-cf68ec7a.js} +14 -84
- flowfile/web/static/assets/{DatabaseConnectionSettings-a81e0f7e.js → DatabaseConnectionSettings-435c5dd8.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-9ea35e84.js → DatabaseManager-349e33a8.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-9578bfa5.js → DatabaseReader-8075bd28.js} +14 -114
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseWriter-19531098.js → DatabaseWriter-3e2dda89.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-76ec698c.js +192 -0
- flowfile/web/static/assets/{ExternalSource-2297ef96.js → ExternalSource-609a265c.js} +8 -79
- flowfile/web/static/assets/{Filter-f211c03a.js → Filter-97cff793.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-4207ea31.js → Formula-09de0ec9.js} +18 -85
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-bf120df0.js → FuzzyMatch-bdf70248.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-5bb7497a.js → GraphSolver-0b5a0e05.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{GroupBy-92c81b65.js → GroupBy-eaddadde.js} +12 -75
- flowfile/web/static/assets/{Join-4e49a274.js → Join-3313371b.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-90998ae8.js → ManualInput-e8bfc0be.js} +11 -82
- flowfile/web/static/assets/{Output-81e3e917.js → Output-7303bb09.js} +13 -243
- flowfile/web/static/assets/Output-ddc9079f.css +37 -0
- flowfile/web/static/assets/{Pivot-a3419842.js → Pivot-3b1c54ef.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-3bb36c8f.js +61 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-eaa819c0.js +61 -0
- flowfile/web/static/assets/{PolarsCode-72710deb.js → PolarsCode-aa12e25d.js} +13 -80
- flowfile/web/static/assets/Read-6b17491f.css +62 -0
- flowfile/web/static/assets/Read-a2bfc618.js +243 -0
- flowfile/web/static/assets/RecordCount-aa0dc082.js +53 -0
- flowfile/web/static/assets/{RecordId-10baf191.js → RecordId-48ee1a3b.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-e149dbf2.js +38 -0
- flowfile/web/static/assets/{Sample-3ed9a0ae.js → Sample-f06cb97a.js} +8 -77
- flowfile/web/static/assets/{SecretManager-0d49c0e8.js → SecretManager-37f34886.js} +2 -2
- flowfile/web/static/assets/{Select-8a02a0b3.js → Select-b60e6c47.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-70e5a7b1.js +53 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/{SettingsSection-4c0f45f5.js → SettingsSection-75b6cf4f.js} +2 -40
- flowfile/web/static/assets/SettingsSection-e57a672e.js +45 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-f55c9f9d.js → Sort-51b1ee4d.js} +12 -97
- flowfile/web/static/assets/{TextToRows-5dbc2145.js → TextToRows-26835f8f.js} +14 -83
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{UnavailableFields-a1768e52.js → UnavailableFields-88a4cd0c.js} +2 -2
- flowfile/web/static/assets/Union-4d0088eb.js +77 -0
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/{Unique-46b250da.js → Unique-7d554a62.js} +22 -91
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-25ac84cc.js → Unpivot-4668595c.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-d4f0e0e8.js +51 -0
- flowfile/web/static/assets/{ExploreData-40476474.js → VueGraphicWalker-5324d566.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-6ef0dcef.js → api-271ed117.js} +1 -1
- flowfile/web/static/assets/{api-a0abbdc7.js → api-31e4fea6.js} +1 -1
- flowfile/web/static/assets/{designer-186f2e71.css → designer-091bdc3f.css} +819 -184
- flowfile/web/static/assets/{designer-13eabd83.js → designer-bf3d9487.js} +2214 -680
- flowfile/web/static/assets/{documentation-b87e7f6f.js → documentation-4d0a1cea.js} +1 -1
- flowfile/web/static/assets/{dropDown-13564764.js → dropDown-025888df.js} +1 -1
- flowfile/web/static/assets/{fullEditor-fd2cd6f9.js → fullEditor-1df991ec.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-71e11604.js → genericNodeSettings-d3b2b2ac.js} +3 -3
- flowfile/web/static/assets/{index-f6c15e76.js → index-d0518598.js} +210 -31
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputCsv-d8457527.js +86 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputExcel-be89153e.js +56 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/outputParquet-fabb445a.js +31 -0
- flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
- flowfile/web/static/assets/readCsv-e8359522.js +178 -0
- flowfile/web/static/assets/readExcel-dabaf51b.js +203 -0
- flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
- flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
- flowfile/web/static/assets/readParquet-e0771ef2.js +26 -0
- flowfile/web/static/assets/{secretApi-dd636aa2.js → secretApi-ce823eee.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-af36165e.js → selectDynamic-5476546e.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/{vue-codemirror.esm-2847001e.js → vue-codemirror.esm-9ed00d50.js} +29 -33
- flowfile/web/static/assets/{vue-content-loader.es-0371da73.js → vue-content-loader.es-7bca2d9b.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/METADATA +2 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/RECORD +147 -117
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/nodes.py +303 -44
- flowfile_core/configs/settings.py +6 -3
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/code_generator/code_generator.py +36 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +60 -80
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +61 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +44 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +3 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +33 -10
- flowfile_core/flowfile/flow_graph.py +223 -118
- flowfile_core/flowfile/flow_node/flow_node.py +56 -19
- flowfile_core/flowfile/flow_node/models.py +0 -2
- flowfile_core/flowfile/flow_node/schema_callback.py +138 -43
- flowfile_core/flowfile/graph_tree/graph_tree.py +250 -0
- flowfile_core/flowfile/graph_tree/models.py +15 -0
- flowfile_core/flowfile/handler.py +22 -3
- flowfile_core/flowfile/manage/compatibility_enhancements.py +1 -1
- flowfile_core/flowfile/{flow_data_engine/fuzzy_matching/settings_validator.py → schema_callbacks.py} +72 -16
- flowfile_core/flowfile/setting_generator/settings.py +2 -2
- flowfile_core/flowfile/util/execution_orderer.py +9 -0
- flowfile_core/flowfile/util/node_skipper.py +8 -0
- flowfile_core/main.py +4 -1
- flowfile_core/routes/routes.py +59 -10
- flowfile_core/schemas/input_schema.py +0 -1
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +48 -3
- flowfile_core/schemas/transform_schema.py +28 -38
- flowfile_frame/__init__.py +1 -4
- flowfile_frame/flow_frame.py +33 -4
- flowfile_frame/flow_frame.pyi +2 -0
- flowfile_worker/__init__.py +6 -35
- flowfile_worker/funcs.py +7 -3
- flowfile_worker/main.py +5 -2
- flowfile_worker/models.py +3 -1
- flowfile_worker/routes.py +47 -5
- shared/__init__.py +15 -0
- shared/storage_config.py +243 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c4059daf.js +0 -701
- flowfile/web/static/assets/RecordCount-c2b5e095.js +0 -122
- flowfile/web/static/assets/Union-f2aefdc9.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-988d9efe.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_worker/polars_fuzzy_match/matcher.py +0 -435
- flowfile_worker/polars_fuzzy_match/models.py +0 -36
- flowfile_worker/polars_fuzzy_match/pre_process.py +0 -213
- flowfile_worker/polars_fuzzy_match/process.py +0 -86
- flowfile_worker/polars_fuzzy_match/utils.py +0 -50
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/LICENSE +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/WHEEL +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/entry_points.txt +0 -0
- {flowfile_worker/polars_fuzzy_match → flowfile_core/flowfile/graph_tree}/__init__.py +0 -0
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
import polars as pl
|
|
2
|
-
import polars_distance as pld
|
|
3
|
-
from flowfile_worker.polars_fuzzy_match.utils import cache_polars_frame_to_temp
|
|
4
|
-
from flowfile_worker.utils import collect_lazy_frame
|
|
5
|
-
from flowfile_worker.polars_fuzzy_match.models import FuzzyTypeLiteral
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def calculate_fuzzy_score(mapping_table: pl.LazyFrame, left_col_name: str, right_col_name: str,
|
|
9
|
-
fuzzy_method: FuzzyTypeLiteral, th_score: float) -> pl.LazyFrame:
|
|
10
|
-
"""
|
|
11
|
-
Calculate fuzzy matching scores between columns in a LazyFrame.
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
mapping_table: The DataFrame containing columns to compare
|
|
15
|
-
left_col_name: Name of the left column for comparison
|
|
16
|
-
right_col_name: Name of the right column for comparison
|
|
17
|
-
fuzzy_method: Type of fuzzy matching algorithm to use
|
|
18
|
-
th_score: The threshold score for fuzzy matching
|
|
19
|
-
|
|
20
|
-
Returns:
|
|
21
|
-
A LazyFrame with fuzzy matching scores
|
|
22
|
-
"""
|
|
23
|
-
mapping_table = mapping_table.with_columns(pl.col(left_col_name).str.to_lowercase().alias('left'),
|
|
24
|
-
pl.col(right_col_name).str.to_lowercase().alias('right'))
|
|
25
|
-
dist_col = pld.DistancePairWiseString(pl.col('left'))
|
|
26
|
-
if fuzzy_method in ("jaro_winkler"):
|
|
27
|
-
fm_method = getattr(dist_col, fuzzy_method)(pl.col('right')).alias('s')
|
|
28
|
-
else:
|
|
29
|
-
fm_method = getattr(dist_col, fuzzy_method)(pl.col('right'), normalized=True).alias('s')
|
|
30
|
-
return (mapping_table.with_columns(fm_method).drop(['left', 'right']).filter(pl.col('s') <= th_score).
|
|
31
|
-
with_columns((1-pl.col('s')).alias('s')))
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def process_fuzzy_frames(left_df: pl.LazyFrame, right_df: pl.LazyFrame, left_col_name: str, right_col_name: str,
|
|
35
|
-
temp_dir_ref: str):
|
|
36
|
-
"""
|
|
37
|
-
Process left and right data frames to create fuzzy frames,
|
|
38
|
-
cache them temporarily, and adjust based on their lengths.
|
|
39
|
-
|
|
40
|
-
Args:
|
|
41
|
-
- left_df (pl.DataFrame): The left data frame.
|
|
42
|
-
- right_df (pl.DataFrame): The right data frame.
|
|
43
|
-
- fm (object): An object containing configuration such as the left column name.
|
|
44
|
-
- temp_dir_ref (str): A reference to the temporary directory for caching frames.
|
|
45
|
-
|
|
46
|
-
Returns:
|
|
47
|
-
- Tuple[pl.DataFrame, pl.DataFrame, str, str]: Processed left and right fuzzy frames and their respective column names.
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
# Process left and right data frames
|
|
51
|
-
left_fuzzy_frame = cache_polars_frame_to_temp(left_df.group_by(left_col_name).agg('__left_index').
|
|
52
|
-
filter(pl.col(left_col_name).is_not_null()), temp_dir_ref)
|
|
53
|
-
right_fuzzy_frame = cache_polars_frame_to_temp(right_df.group_by(right_col_name).agg('__right_index').
|
|
54
|
-
filter(pl.col(right_col_name).is_not_null()), temp_dir_ref)
|
|
55
|
-
# Calculate lengths of fuzzy frames
|
|
56
|
-
len_left_df = collect_lazy_frame(left_fuzzy_frame.select(pl.len()))[0, 0]
|
|
57
|
-
len_right_df = collect_lazy_frame(right_fuzzy_frame.select(pl.len()))[0, 0]
|
|
58
|
-
|
|
59
|
-
# Decide which frame to use as left or right based on their lengths
|
|
60
|
-
if len_left_df < len_right_df:
|
|
61
|
-
# Swap the frames and column names if right frame is larger
|
|
62
|
-
left_fuzzy_frame, right_fuzzy_frame = right_fuzzy_frame, left_fuzzy_frame
|
|
63
|
-
left_col_name, right_col_name = right_col_name, left_col_name
|
|
64
|
-
|
|
65
|
-
# Return the processed frames and column names
|
|
66
|
-
return left_fuzzy_frame, right_fuzzy_frame, left_col_name, right_col_name, len_left_df, len_right_df
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def calculate_and_parse_fuzzy(mapping_table: pl.LazyFrame, left_col_name: str, right_col_name: str,
|
|
70
|
-
fuzzy_method: FuzzyTypeLiteral, th_score: float) -> pl.LazyFrame:
|
|
71
|
-
"""
|
|
72
|
-
Calculate fuzzy scores and parse/explode the results for further processing.
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
mapping_table: The DataFrame containing columns to compare
|
|
76
|
-
left_col_name: Name of the left column for comparison
|
|
77
|
-
right_col_name: Name of the right column for comparison
|
|
78
|
-
fuzzy_method: Type of fuzzy matching algorithm to use
|
|
79
|
-
th_score: Minimum similarity score threshold (0-1)
|
|
80
|
-
|
|
81
|
-
Returns:
|
|
82
|
-
A LazyFrame with exploded indices and fuzzy scores
|
|
83
|
-
"""
|
|
84
|
-
return calculate_fuzzy_score(mapping_table, left_col_name, right_col_name, fuzzy_method, th_score).select(
|
|
85
|
-
pl.col('s'), pl.col('__left_index'), pl.col('__right_index')).explode(pl.col('__left_index')).explode(
|
|
86
|
-
pl.col('__right_index'))
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import polars as pl
|
|
2
|
-
from flowfile_worker.configs import logger
|
|
3
|
-
from flowfile_worker.utils import collect_lazy_frame
|
|
4
|
-
import os
|
|
5
|
-
import uuid
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def write_polars_frame(_df: pl.LazyFrame | pl.DataFrame, path: str,
|
|
9
|
-
estimated_size: int = 0):
|
|
10
|
-
is_lazy = isinstance(_df, pl.LazyFrame)
|
|
11
|
-
logger.info('Caching data frame')
|
|
12
|
-
if is_lazy:
|
|
13
|
-
if estimated_size > 0:
|
|
14
|
-
fit_memory = estimated_size / 1024 / 1000 / 1000 < 8
|
|
15
|
-
if fit_memory:
|
|
16
|
-
_df = _df.collect()
|
|
17
|
-
is_lazy = False
|
|
18
|
-
|
|
19
|
-
if is_lazy:
|
|
20
|
-
logger.info("Writing in memory efficient mode")
|
|
21
|
-
write_method = getattr(_df, 'sink_ipc')
|
|
22
|
-
try:
|
|
23
|
-
write_method(path)
|
|
24
|
-
return True
|
|
25
|
-
except Exception as e:
|
|
26
|
-
pass
|
|
27
|
-
try:
|
|
28
|
-
write_method(path)
|
|
29
|
-
return True
|
|
30
|
-
except Exception as e:
|
|
31
|
-
pass
|
|
32
|
-
if is_lazy:
|
|
33
|
-
_df = collect_lazy_frame(_df)
|
|
34
|
-
try:
|
|
35
|
-
write_method = getattr(_df, 'write_ipc')
|
|
36
|
-
write_method(path)
|
|
37
|
-
return True
|
|
38
|
-
except Exception as e:
|
|
39
|
-
print('error', e)
|
|
40
|
-
return False
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def cache_polars_frame_to_temp(_df: pl.LazyFrame | pl.DataFrame, tempdir: str = None) -> pl.LazyFrame:
|
|
44
|
-
path = f'{tempdir}{os.sep}{uuid.uuid4()}'
|
|
45
|
-
result = write_polars_frame(_df, path)
|
|
46
|
-
if result:
|
|
47
|
-
df = pl.read_ipc(path)
|
|
48
|
-
return df.lazy()
|
|
49
|
-
else:
|
|
50
|
-
raise Exception('Could not cache the data')
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|