Flowfile 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +3 -3
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-d004942f.js +784 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-eccf9fc2.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-b1ba6bba.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-68981877.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-0b06649c.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-8349a426.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-905344f8.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-9f5b8638.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-131a6d53.js} +5 -5
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-e3549dcc.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-6e0730ae.js} +8 -8
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-02f033e6.js} +75 -9
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-54c14036.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-08a3f499.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-2ae38139.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-493b9772.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-4373d163.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-b534f3c7.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-2968ff65.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-65136536.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c56339ed.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-1c641a5e.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-df308b8f.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-293e8a64.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-03911655.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-3058a13d.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-fbf4fb39.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-a29bbaf7.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-c7d7760e.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-118f1d20.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f0589571.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-7329a207.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-30b0be15.js} +5 -5
- flowfile/web/static/assets/{api-44ca9e9c.js → api-602fb95c.js} +1 -1
- flowfile/web/static/assets/api-fb67319c.js +80 -0
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-94a6bf4d.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-a224831e.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-c2d2aa97.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-921ac5fd.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-7013cc94.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-3a75211d.js} +19 -6
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-a63d4680.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-763aec6e.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-08464729.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-f15a5f87.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-93bd09d7.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/METADATA +8 -3
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/RECORD +108 -103
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +2 -0
- flowfile_core/configs/node_store/nodes.py +8 -6
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +401 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +522 -59
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +119 -82
- flowfile_core/flowfile/flow_node/flow_node.py +68 -33
- flowfile_core/flowfile/flow_node/models.py +32 -3
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/utils.py +1 -23
- flowfile_core/main.py +3 -2
- flowfile_core/routes/cloud_connections.py +81 -0
- flowfile_core/routes/logs.py +0 -1
- flowfile_core/routes/routes.py +3 -39
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +37 -15
- flowfile_core/schemas/schemas.py +7 -2
- flowfile_core/schemas/transform_schema.py +97 -22
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/flow_frame.py +253 -102
- flowfile_frame/flow_frame_methods.py +13 -13
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +291 -0
- test_utils/s3/fixtures.py +209 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""Cloud storage writer module for FlowFile Worker.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to write Polars LazyFrames to various cloud storage
|
|
4
|
+
services (S3, Azure ADLS, Google Cloud Storage) in different file formats.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
from typing import Dict, Any
|
|
9
|
+
from logging import Logger
|
|
10
|
+
|
|
11
|
+
from flowfile_worker.external_sources.s3_source.models import (
|
|
12
|
+
CloudStorageWriteSettings,
|
|
13
|
+
WriteSettings
|
|
14
|
+
)
|
|
15
|
+
from flowfile_worker.utils import collect_lazy_frame
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _write_parquet_to_cloud(
|
|
19
|
+
df: pl.LazyFrame,
|
|
20
|
+
resource_path: str,
|
|
21
|
+
storage_options: Dict[str, Any],
|
|
22
|
+
write_settings: WriteSettings,
|
|
23
|
+
logger: Logger
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Write LazyFrame to a Parquet file in cloud storage.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
df: Polars LazyFrame to write.
|
|
29
|
+
resource_path: Cloud storage path where the file will be written.
|
|
30
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
31
|
+
write_settings: Write configuration including compression settings.
|
|
32
|
+
logger: Logger instance for logging operations.
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
Exception: If writing fails, wrapped with a descriptive error message.
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
sink_kwargs = {
|
|
39
|
+
"path": resource_path,
|
|
40
|
+
"compression": write_settings.parquet_compression,
|
|
41
|
+
}
|
|
42
|
+
if storage_options:
|
|
43
|
+
sink_kwargs["storage_options"] = storage_options
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
# Try to use sink_parquet for lazy execution
|
|
47
|
+
df.sink_parquet(**sink_kwargs)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
# Fall back to collecting and writing if sink fails
|
|
50
|
+
logger.warning(f"Failed to use sink_parquet, falling back to collect and write: {str(e)}")
|
|
51
|
+
pl_df = collect_lazy_frame(df)
|
|
52
|
+
sink_kwargs['file'] = sink_kwargs.pop("path")
|
|
53
|
+
pl_df.write_parquet(**sink_kwargs)
|
|
54
|
+
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.error(f"Failed to write Parquet to {resource_path}: {str(e)}")
|
|
57
|
+
raise Exception(f"Failed to write Parquet to cloud storage: {str(e)}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _write_delta_to_cloud(
|
|
61
|
+
df: pl.LazyFrame,
|
|
62
|
+
resource_path: str,
|
|
63
|
+
storage_options: Dict[str, Any],
|
|
64
|
+
write_settings: WriteSettings,
|
|
65
|
+
logger: Logger
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Write LazyFrame to Delta Lake format in cloud storage.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
df: Polars LazyFrame to write.
|
|
71
|
+
resource_path: Cloud storage path where the Delta table will be written.
|
|
72
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
73
|
+
write_settings: Write configuration including write mode.
|
|
74
|
+
logger: Logger instance for logging operations.
|
|
75
|
+
"""
|
|
76
|
+
sink_kwargs = {
|
|
77
|
+
"target": resource_path,
|
|
78
|
+
"mode": write_settings.write_mode,
|
|
79
|
+
}
|
|
80
|
+
if storage_options:
|
|
81
|
+
sink_kwargs["storage_options"] = storage_options
|
|
82
|
+
|
|
83
|
+
# Delta format requires collecting the LazyFrame first
|
|
84
|
+
collect_lazy_frame(df).write_delta(**sink_kwargs)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _write_csv_to_cloud(
|
|
88
|
+
df: pl.LazyFrame,
|
|
89
|
+
resource_path: str,
|
|
90
|
+
storage_options: Dict[str, Any],
|
|
91
|
+
write_settings: WriteSettings,
|
|
92
|
+
logger: Logger
|
|
93
|
+
) -> None:
|
|
94
|
+
"""Write LazyFrame to a CSV file in cloud storage.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
df: Polars LazyFrame to write.
|
|
98
|
+
resource_path: Cloud storage path where the CSV file will be written.
|
|
99
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
100
|
+
write_settings: Write configuration including delimiter settings.
|
|
101
|
+
logger: Logger instance for logging operations.
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
Exception: If writing fails, wrapped with a descriptive error message.
|
|
105
|
+
"""
|
|
106
|
+
try:
|
|
107
|
+
sink_kwargs = {
|
|
108
|
+
"path": resource_path,
|
|
109
|
+
"separator": write_settings.csv_delimiter,
|
|
110
|
+
}
|
|
111
|
+
if storage_options:
|
|
112
|
+
sink_kwargs["storage_options"] = storage_options
|
|
113
|
+
|
|
114
|
+
# sink_csv executes the lazy query and writes the result
|
|
115
|
+
df.sink_csv(**sink_kwargs)
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.error(f"Failed to write CSV to {resource_path}: {str(e)}")
|
|
119
|
+
raise Exception(f"Failed to write CSV to cloud storage: {str(e)}")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _write_json_to_cloud(
|
|
123
|
+
df: pl.LazyFrame,
|
|
124
|
+
resource_path: str,
|
|
125
|
+
storage_options: Dict[str, Any],
|
|
126
|
+
write_settings: WriteSettings,
|
|
127
|
+
logger: Logger
|
|
128
|
+
) -> None:
|
|
129
|
+
"""Write LazyFrame to a line-delimited JSON (NDJSON) file in cloud storage.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
df: Polars LazyFrame to write.
|
|
133
|
+
resource_path: Cloud storage path where the NDJSON file will be written.
|
|
134
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
135
|
+
write_settings: Write configuration settings.
|
|
136
|
+
logger: Logger instance for logging operations.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
Exception: If writing fails, wrapped with a descriptive error message.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
sink_kwargs = {"path": resource_path}
|
|
143
|
+
if storage_options:
|
|
144
|
+
sink_kwargs["storage_options"] = storage_options
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
# Try to use sink_ndjson for lazy execution
|
|
148
|
+
df.sink_ndjson(**sink_kwargs)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
# Fall back to collecting and writing if sink fails
|
|
151
|
+
pl_df = collect_lazy_frame(df)
|
|
152
|
+
sink_kwargs['file'] = sink_kwargs.pop("path")
|
|
153
|
+
pl_df.write_ndjson(**sink_kwargs)
|
|
154
|
+
logger.error(f"Failed to use sink_ndjson, falling back to collect and write: {str(e)}")
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.error(f"Failed to write JSON to {resource_path}: {str(e)}")
|
|
158
|
+
raise Exception(f"Failed to write JSON to cloud storage: {str(e)}")
|
|
159
|
+
|
|
160
|
+
writers = {
|
|
161
|
+
"parquet": _write_parquet_to_cloud,
|
|
162
|
+
"delta": _write_delta_to_cloud,
|
|
163
|
+
"csv": _write_csv_to_cloud,
|
|
164
|
+
"json": _write_json_to_cloud,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def write_df_to_cloud(
|
|
169
|
+
df: pl.LazyFrame,
|
|
170
|
+
settings: CloudStorageWriteSettings,
|
|
171
|
+
logger: Logger
|
|
172
|
+
) -> None:
|
|
173
|
+
"""Write a Polars LazyFrame to an object in cloud storage.
|
|
174
|
+
|
|
175
|
+
Supports writing to S3, Azure ADLS, and Google Cloud Storage. Currently supports
|
|
176
|
+
'overwrite' write mode. The 'append' mode is not yet implemented for most formats.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
df: Polars LazyFrame to write to cloud storage.
|
|
180
|
+
settings: Cloud storage write settings containing connection details and write options.
|
|
181
|
+
logger: Logger instance for logging operations.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
ValueError: If the specified file format is not supported.
|
|
185
|
+
NotImplementedError: If 'append' write mode is used for non-delta formats.
|
|
186
|
+
Exception: If writing to cloud storage fails.
|
|
187
|
+
"""
|
|
188
|
+
connection = settings.connection
|
|
189
|
+
write_settings = settings.write_settings
|
|
190
|
+
logger.info(
|
|
191
|
+
f"Writing to {connection.storage_type} storage: {write_settings.resource_path}"
|
|
192
|
+
)
|
|
193
|
+
# Validate write mode
|
|
194
|
+
if write_settings.write_mode == 'append' and write_settings.file_format != "delta":
|
|
195
|
+
raise NotImplementedError(
|
|
196
|
+
"The 'append' write mode is not yet supported for this destination."
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
storage_options = connection.get_storage_options()
|
|
200
|
+
|
|
201
|
+
# Dispatch to the appropriate writer
|
|
202
|
+
writer_func = writers.get(write_settings.file_format)
|
|
203
|
+
if not writer_func:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"Unsupported file format for writing: {write_settings.file_format}"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
writer_func(
|
|
209
|
+
df,
|
|
210
|
+
write_settings.resource_path,
|
|
211
|
+
storage_options,
|
|
212
|
+
write_settings,
|
|
213
|
+
logger
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
logger.info(f"Successfully wrote data to {write_settings.resource_path}")
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Cloud storage connection schemas for S3, ADLS, and other cloud providers."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Literal, Dict, Any
|
|
4
|
+
import boto3
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
6
|
+
from flowfile_worker.secrets import decrypt_secret
|
|
7
|
+
|
|
8
|
+
CloudStorageType = Literal["s3", "adls", "gcs"]
|
|
9
|
+
AuthMethod = Literal["access_key", "iam_role", "service_principal", "managed_identity", "sas_token", "aws-cli", "env_vars"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_storage_options_from_boto_credentials(profile_name: Optional[str],
|
|
13
|
+
region_name: Optional[str] = None) -> Dict[str, Any]:
|
|
14
|
+
"""
|
|
15
|
+
Create a storage options dictionary from AWS credentials using a boto3 profile.
|
|
16
|
+
This is the most robust way to handle profile-based authentication as it
|
|
17
|
+
bypasses Polars' internal credential provider chain, avoiding conflicts.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
profile_name
|
|
22
|
+
The name of the AWS profile in ~/.aws/credentials.
|
|
23
|
+
region_name
|
|
24
|
+
The AWS region to use.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
Dict[str, Any]
|
|
29
|
+
A storage options dictionary for Polars with explicit credentials.
|
|
30
|
+
"""
|
|
31
|
+
session = boto3.Session(profile_name=profile_name, region_name=region_name)
|
|
32
|
+
credentials = session.get_credentials()
|
|
33
|
+
frozen_creds = credentials.get_frozen_credentials()
|
|
34
|
+
|
|
35
|
+
storage_options = {
|
|
36
|
+
"aws_access_key_id": frozen_creds.access_key,
|
|
37
|
+
"aws_secret_access_key": frozen_creds.secret_key,
|
|
38
|
+
"aws_session_token": frozen_creds.token,
|
|
39
|
+
}
|
|
40
|
+
# Use the session's region if one was resolved, otherwise use the provided one
|
|
41
|
+
if session.region_name:
|
|
42
|
+
storage_options["aws_region"] = session.region_name
|
|
43
|
+
|
|
44
|
+
print("Boto3: Successfully created storage options with explicit credentials.")
|
|
45
|
+
return storage_options
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FullCloudStorageConnection(BaseModel):
|
|
49
|
+
"""Internal model with decrypted secrets"""
|
|
50
|
+
storage_type: CloudStorageType
|
|
51
|
+
auth_method: AuthMethod
|
|
52
|
+
connection_name: Optional[str] = "None" # This is the reference to the item we will fetch that contains the data
|
|
53
|
+
|
|
54
|
+
# AWS S3
|
|
55
|
+
aws_region: Optional[str] = None
|
|
56
|
+
aws_access_key_id: Optional[str] = None
|
|
57
|
+
aws_secret_access_key: Optional[SecretStr] = None
|
|
58
|
+
aws_role_arn: Optional[str] = None
|
|
59
|
+
aws_allow_unsafe_html: Optional[bool] = None
|
|
60
|
+
|
|
61
|
+
# Azure ADLS
|
|
62
|
+
azure_account_name: Optional[str] = None
|
|
63
|
+
azure_account_key: Optional[SecretStr] = None
|
|
64
|
+
azure_tenant_id: Optional[str] = None
|
|
65
|
+
azure_client_id: Optional[str] = None
|
|
66
|
+
azure_client_secret: Optional[SecretStr] = None
|
|
67
|
+
|
|
68
|
+
# Common
|
|
69
|
+
endpoint_url: Optional[str] = None
|
|
70
|
+
verify_ssl: bool = True
|
|
71
|
+
|
|
72
|
+
def get_storage_options(self) -> Dict[str, Any]:
|
|
73
|
+
"""
|
|
74
|
+
Build storage options dict based on the connection type and auth method.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Dict containing appropriate storage options for the provider
|
|
78
|
+
"""
|
|
79
|
+
if self.storage_type == "s3":
|
|
80
|
+
return self._get_s3_storage_options()
|
|
81
|
+
|
|
82
|
+
def _get_s3_storage_options(self) -> Dict[str, Any]:
|
|
83
|
+
"""Build S3-specific storage options."""
|
|
84
|
+
auth_method = self.auth_method
|
|
85
|
+
print(f"Building S3 storage options for auth_method: '{auth_method}'")
|
|
86
|
+
|
|
87
|
+
if auth_method == "aws-cli":
|
|
88
|
+
return create_storage_options_from_boto_credentials(
|
|
89
|
+
profile_name=self.connection_name,
|
|
90
|
+
region_name=self.aws_region
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
storage_options = {}
|
|
94
|
+
if self.aws_region:
|
|
95
|
+
storage_options["aws_region"] = self.aws_region
|
|
96
|
+
if self.endpoint_url:
|
|
97
|
+
storage_options["endpoint_url"] = self.endpoint_url
|
|
98
|
+
if not self.verify_ssl:
|
|
99
|
+
storage_options["verify"] = "False"
|
|
100
|
+
if self.aws_allow_unsafe_html: # Note: Polars uses aws_allow_http
|
|
101
|
+
storage_options["aws_allow_http"] = "true"
|
|
102
|
+
|
|
103
|
+
if auth_method == "access_key":
|
|
104
|
+
storage_options["aws_access_key_id"] = self.aws_access_key_id
|
|
105
|
+
storage_options["aws_secret_access_key"] = decrypt_secret(
|
|
106
|
+
self.aws_secret_access_key.get_secret_value()).get_secret_value()
|
|
107
|
+
# Explicitly clear any session token from the environment
|
|
108
|
+
storage_options["aws_session_token"] = ""
|
|
109
|
+
|
|
110
|
+
elif auth_method == "iam_role":
|
|
111
|
+
# Correctly implement IAM role assumption using boto3 STS client.
|
|
112
|
+
sts_client = boto3.client('sts', region_name=self.aws_region)
|
|
113
|
+
assumed_role_object = sts_client.assume_role(
|
|
114
|
+
RoleArn=self.aws_role_arn,
|
|
115
|
+
RoleSessionName="PolarsCloudStorageReaderSession" # A descriptive session name
|
|
116
|
+
)
|
|
117
|
+
credentials = assumed_role_object['Credentials']
|
|
118
|
+
storage_options["aws_access_key_id"] = credentials['AccessKeyId']
|
|
119
|
+
storage_options["aws_secret_access_key"] = decrypt_secret(credentials['SecretAccessKey']).get_secret_value()
|
|
120
|
+
storage_options["aws_session_token"] = decrypt_secret(credentials['SessionToken']).get_secret_value()
|
|
121
|
+
|
|
122
|
+
return storage_options
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class WriteSettings(BaseModel):
|
|
126
|
+
"""Settings for writing to cloud storage"""
|
|
127
|
+
resource_path: str # s3://bucket/path/to/file.csv
|
|
128
|
+
|
|
129
|
+
write_mode: Literal["overwrite", "append"] = "overwrite"
|
|
130
|
+
file_format: Literal["csv", "parquet", "json", "delta"] = "parquet"
|
|
131
|
+
|
|
132
|
+
parquet_compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy"
|
|
133
|
+
|
|
134
|
+
csv_delimiter: str = ","
|
|
135
|
+
csv_encoding: str = "utf8"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class CloudStorageWriteSettings(BaseModel):
|
|
139
|
+
write_settings: WriteSettings
|
|
140
|
+
connection: FullCloudStorageConnection
|
|
141
|
+
flowfile_flow_id: int = 1
|
|
142
|
+
flowfile_node_id: int | str = -1
|
flowfile_worker/funcs.py
CHANGED
|
@@ -6,7 +6,9 @@ from flowfile_worker.polars_fuzzy_match.matcher import fuzzy_match_dfs
|
|
|
6
6
|
from flowfile_worker.polars_fuzzy_match.models import FuzzyMapping
|
|
7
7
|
from flowfile_worker.flow_logger import get_worker_logger
|
|
8
8
|
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
9
|
-
from flowfile_worker.external_sources.sql_source.main import
|
|
9
|
+
from flowfile_worker.external_sources.sql_source.main import write_df_to_database
|
|
10
|
+
from flowfile_worker.external_sources.s3_source.main import write_df_to_cloud
|
|
11
|
+
from flowfile_worker.external_sources.s3_source.models import CloudStorageWriteSettings
|
|
10
12
|
from base64 import encodebytes
|
|
11
13
|
from logging import Logger
|
|
12
14
|
import logging
|
|
@@ -205,9 +207,9 @@ def execute_write_method(write_method: Callable, path: str, data_type: str = Non
|
|
|
205
207
|
logger.info('Writing as csv file')
|
|
206
208
|
if write_mode == 'append':
|
|
207
209
|
with open(path, 'ab') as f:
|
|
208
|
-
write_method(
|
|
210
|
+
write_method(f, separator=delimiter, quote_style='always')
|
|
209
211
|
else:
|
|
210
|
-
write_method(
|
|
212
|
+
write_method(path, separator=delimiter, quote_style='always')
|
|
211
213
|
elif data_type == 'parquet':
|
|
212
214
|
logger.info('Writing as parquet file')
|
|
213
215
|
write_method(path)
|
|
@@ -243,6 +245,49 @@ def write_to_database(polars_serializable_object: bytes,
|
|
|
243
245
|
progress.value = -1
|
|
244
246
|
|
|
245
247
|
|
|
248
|
+
def write_to_cloud_storage(polars_serializable_object: bytes,
|
|
249
|
+
progress: Value,
|
|
250
|
+
error_message: Array,
|
|
251
|
+
queue: Queue,
|
|
252
|
+
file_path: str,
|
|
253
|
+
cloud_write_settings: CloudStorageWriteSettings,
|
|
254
|
+
flowfile_flow_id: int = -1,
|
|
255
|
+
flowfile_node_id: int | str = -1
|
|
256
|
+
) -> None:
|
|
257
|
+
"""
|
|
258
|
+
Writes a Polars DataFrame to cloud storage using the provided settings.
|
|
259
|
+
Args:
|
|
260
|
+
polars_serializable_object (): # Serialized Polars DataFrame object
|
|
261
|
+
progress (): Multiprocessing Value to track progress
|
|
262
|
+
error_message (): Array to store error messages
|
|
263
|
+
queue (): Queue to send results back
|
|
264
|
+
file_path (): Path to the file where the DataFrame will be written
|
|
265
|
+
cloud_write_settings (): CloudStorageWriteSettings object containing write settings and connection details
|
|
266
|
+
flowfile_flow_id (): Flowfile flow ID for logging
|
|
267
|
+
flowfile_node_id (): Flowfile node ID for logging
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
None
|
|
271
|
+
"""
|
|
272
|
+
flowfile_logger = get_worker_logger(flowfile_flow_id, flowfile_node_id)
|
|
273
|
+
flowfile_logger.info(f"Starting write operation to: {cloud_write_settings.write_settings.resource_path}")
|
|
274
|
+
df = pl.LazyFrame.deserialize(io.BytesIO(polars_serializable_object))
|
|
275
|
+
flowfile_logger.info(f"Starting to sync the data to cloud, execution plan: \n"
|
|
276
|
+
f"{df.explain(format='plain')}")
|
|
277
|
+
try:
|
|
278
|
+
write_df_to_cloud(df, cloud_write_settings, flowfile_logger)
|
|
279
|
+
flowfile_logger.info("Write operation completed successfully")
|
|
280
|
+
with progress.get_lock():
|
|
281
|
+
progress.value = 100
|
|
282
|
+
except Exception as e:
|
|
283
|
+
error_msg = str(e).encode()[:1024]
|
|
284
|
+
flowfile_logger.error(f'Error during write operation: {str(e)}')
|
|
285
|
+
with error_message.get_lock():
|
|
286
|
+
error_message[:len(error_msg)] = error_msg
|
|
287
|
+
with progress.get_lock():
|
|
288
|
+
progress.value = -1
|
|
289
|
+
|
|
290
|
+
|
|
246
291
|
def write_output(polars_serializable_object: bytes,
|
|
247
292
|
progress: Value,
|
|
248
293
|
error_message: Array,
|
|
@@ -263,16 +308,16 @@ def write_output(polars_serializable_object: bytes,
|
|
|
263
308
|
if isinstance(df, pl.LazyFrame):
|
|
264
309
|
flowfile_logger.info(f'Execution plan explanation:\n{df.explain(format="plain")}')
|
|
265
310
|
flowfile_logger.info("Successfully deserialized dataframe")
|
|
266
|
-
is_lazy = False
|
|
267
311
|
sink_method_str = 'sink_'+data_type
|
|
268
312
|
write_method_str = 'write_'+data_type
|
|
269
313
|
has_sink_method = hasattr(df, sink_method_str)
|
|
270
314
|
write_method = None
|
|
271
315
|
if os.path.exists(path) and write_mode == 'create':
|
|
272
316
|
raise Exception('File already exists')
|
|
273
|
-
if has_sink_method and
|
|
317
|
+
if has_sink_method and write_method != 'append':
|
|
318
|
+
flowfile_logger.info(f'Using sink method: {sink_method_str}')
|
|
274
319
|
write_method = getattr(df, 'sink_' + data_type)
|
|
275
|
-
elif not
|
|
320
|
+
elif not has_sink_method:
|
|
276
321
|
if isinstance(df, pl.LazyFrame):
|
|
277
322
|
df = collect_lazy_frame(df)
|
|
278
323
|
write_method = getattr(df, write_method_str)
|
flowfile_worker/models.py
CHANGED
|
@@ -3,11 +3,12 @@ from typing import Optional, Literal, Any
|
|
|
3
3
|
from base64 import decodebytes
|
|
4
4
|
from flowfile_worker.polars_fuzzy_match.models import FuzzyMapping
|
|
5
5
|
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
6
|
+
from flowfile_worker.external_sources.s3_source.models import CloudStorageWriteSettings
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
OperationType = Literal[
|
|
9
10
|
'store', 'calculate_schema', 'calculate_number_of_records', 'write_output', 'fuzzy', 'store_sample',
|
|
10
|
-
'write_to_database']
|
|
11
|
+
'write_to_database', "write_to_cloud_storage",]
|
|
11
12
|
ResultType = Literal['polars', 'other']
|
|
12
13
|
|
|
13
14
|
|
|
@@ -55,7 +56,6 @@ class DatabaseScriptWrite(DatabaseWriteSettings):
|
|
|
55
56
|
Returns:
|
|
56
57
|
DatabaseWriteSettings: The corresponding DatabaseWriteSettings object.
|
|
57
58
|
"""
|
|
58
|
-
|
|
59
59
|
return DatabaseWriteSettings(
|
|
60
60
|
connection=self.connection,
|
|
61
61
|
table_name=self.table_name,
|
|
@@ -65,6 +65,26 @@ class DatabaseScriptWrite(DatabaseWriteSettings):
|
|
|
65
65
|
)
|
|
66
66
|
|
|
67
67
|
|
|
68
|
+
class CloudStorageScriptWrite(CloudStorageWriteSettings):
|
|
69
|
+
operation: bytes
|
|
70
|
+
|
|
71
|
+
def polars_serializable_object(self):
|
|
72
|
+
return decodebytes(self.operation)
|
|
73
|
+
|
|
74
|
+
def get_cloud_storage_write_settings(self) -> CloudStorageWriteSettings:
|
|
75
|
+
"""
|
|
76
|
+
Converts the current instance to a DatabaseWriteSettings object.
|
|
77
|
+
Returns:
|
|
78
|
+
DatabaseWriteSettings: The corresponding DatabaseWriteSettings object.
|
|
79
|
+
"""
|
|
80
|
+
return CloudStorageWriteSettings(
|
|
81
|
+
write_settings=self.write_settings,
|
|
82
|
+
connection=self.connection,
|
|
83
|
+
flowfile_flow_id=self.flowfile_flow_id,
|
|
84
|
+
flowfile_node_id=self.flowfile_node_id
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
68
88
|
class FuzzyJoinInput(BaseModel):
|
|
69
89
|
task_id: Optional[str] = None
|
|
70
90
|
cache_dir: Optional[str] = None
|
flowfile_worker/routes.py
CHANGED
|
@@ -10,10 +10,8 @@ from flowfile_worker import models
|
|
|
10
10
|
from flowfile_worker.spawner import start_process, start_fuzzy_process, start_generic_process, process_manager
|
|
11
11
|
from flowfile_worker.create import table_creator_factory_method, received_table_parser, FileType
|
|
12
12
|
from flowfile_worker.configs import logger
|
|
13
|
-
from flowfile_worker.external_sources.airbyte_sources.models import AirbyteSettings
|
|
14
13
|
from flowfile_worker.external_sources.sql_source.models import DatabaseReadSettings
|
|
15
14
|
from flowfile_worker.external_sources.sql_source.main import read_sql_source, write_serialized_df_to_database
|
|
16
|
-
from flowfile_worker.external_sources.airbyte_sources.main import read_airbyte_source
|
|
17
15
|
|
|
18
16
|
|
|
19
17
|
router = APIRouter()
|
|
@@ -74,6 +72,44 @@ def store_sample(polars_script: models.PolarsScriptSample, background_tasks: Bac
|
|
|
74
72
|
raise HTTPException(status_code=500, detail=str(e))
|
|
75
73
|
|
|
76
74
|
|
|
75
|
+
@router.post("/write_data_to_cloud/")
|
|
76
|
+
def write_data_to_cloud(cloud_storage_script_write: models.CloudStorageScriptWrite,
|
|
77
|
+
background_tasks: BackgroundTasks) -> models.Status:
|
|
78
|
+
"""
|
|
79
|
+
Write polars dataframe to a file in cloud storage.
|
|
80
|
+
Args:
|
|
81
|
+
cloud_storage_script_write (): Contains dataframe and write options for cloud storage
|
|
82
|
+
background_tasks (): FastAPI background tasks handler
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
models.Status: Status object tracking the write operation
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
logger.info("Starting write operation to: cloud storage")
|
|
89
|
+
task_id = str(uuid.uuid4())
|
|
90
|
+
polars_serializable_object = cloud_storage_script_write.polars_serializable_object()
|
|
91
|
+
status = models.Status(background_task_id=task_id, status="Starting", file_ref='',
|
|
92
|
+
result_type="other")
|
|
93
|
+
status_dict[task_id] = status
|
|
94
|
+
background_tasks.add_task(
|
|
95
|
+
start_process,
|
|
96
|
+
polars_serializable_object=polars_serializable_object,
|
|
97
|
+
task_id=task_id,
|
|
98
|
+
operation="write_to_cloud_storage",
|
|
99
|
+
file_ref='',
|
|
100
|
+
flowfile_flow_id=cloud_storage_script_write.flowfile_flow_id,
|
|
101
|
+
flowfile_node_id=cloud_storage_script_write.flowfile_node_id,
|
|
102
|
+
kwargs=dict(cloud_write_settings=cloud_storage_script_write.get_cloud_storage_write_settings()),
|
|
103
|
+
)
|
|
104
|
+
logger.info(
|
|
105
|
+
f"Started write task: {task_id} to database"
|
|
106
|
+
)
|
|
107
|
+
return status
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.error(f"Error in write operation: {str(e)}", exc_info=True)
|
|
110
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
111
|
+
|
|
112
|
+
|
|
77
113
|
@router.post('/store_database_write_result/')
|
|
78
114
|
def store_in_database(database_script_write: models.DatabaseScriptWrite, background_tasks: BackgroundTasks) -> models.Status:
|
|
79
115
|
"""
|
|
@@ -158,44 +194,10 @@ def write_results(polars_script_write: models.PolarsScriptWrite, background_task
|
|
|
158
194
|
raise HTTPException(status_code=500, detail=str(e))
|
|
159
195
|
|
|
160
196
|
|
|
161
|
-
@router.post('/store_airbyte_result')
|
|
162
|
-
def store_airbyte_result(airbyte_settings: AirbyteSettings, background_tasks: BackgroundTasks) -> models.Status:
|
|
163
|
-
"""
|
|
164
|
-
Store the result of an Airbyte source operation.
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
airbyte_settings (AirbyteSettings): Settings for the Airbyte source operation
|
|
168
|
-
background_tasks (BackgroundTasks): FastAPI background tasks handler
|
|
169
|
-
|
|
170
|
-
Returns:
|
|
171
|
-
models.Status: Status object tracking the Airbyte source operation
|
|
172
|
-
"""
|
|
173
|
-
logger.info("Processing Airbyte source operation")
|
|
174
|
-
|
|
175
|
-
try:
|
|
176
|
-
task_id = str(uuid.uuid4())
|
|
177
|
-
file_path = os.path.join(CACHE_DIR.name, f"{task_id}.arrow")
|
|
178
|
-
status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_path,
|
|
179
|
-
result_type="polars")
|
|
180
|
-
status_dict[task_id] = status
|
|
181
|
-
logger.info(f"Starting Airbyte source task: {task_id}")
|
|
182
|
-
background_tasks.add_task(start_generic_process, func_ref=read_airbyte_source, file_ref=file_path,
|
|
183
|
-
flowfile_flow_id=airbyte_settings.flowfile_flow_id,
|
|
184
|
-
flowfile_node_id=airbyte_settings.flowfile_node_id,
|
|
185
|
-
task_id=task_id, kwargs=dict(airbyte_settings=airbyte_settings))
|
|
186
|
-
logger.info(f"Started Airbyte source task: {task_id}")
|
|
187
|
-
|
|
188
|
-
return status
|
|
189
|
-
|
|
190
|
-
except Exception as e:
|
|
191
|
-
logger.error(f"Error processing Airbyte source: {str(e)}", exc_info=True)
|
|
192
|
-
raise HTTPException(status_code=500, detail=str(e))
|
|
193
|
-
|
|
194
|
-
|
|
195
197
|
@router.post('/store_database_read_result')
|
|
196
198
|
def store_sql_db_result(database_read_settings: DatabaseReadSettings, background_tasks: BackgroundTasks) -> models.Status:
|
|
197
199
|
"""
|
|
198
|
-
Store the result of an
|
|
200
|
+
Store the result of an sql source operation.
|
|
199
201
|
|
|
200
202
|
Args:
|
|
201
203
|
database_read_settings (SQLSourceSettings): Settings for the SQL source operation
|
|
@@ -204,7 +206,7 @@ def store_sql_db_result(database_read_settings: DatabaseReadSettings, background
|
|
|
204
206
|
Returns:
|
|
205
207
|
models.Status: Status object tracking the Sql operation
|
|
206
208
|
"""
|
|
207
|
-
logger.info("Processing
|
|
209
|
+
logger.info("Processing Sql source operation")
|
|
208
210
|
|
|
209
211
|
try:
|
|
210
212
|
task_id = str(uuid.uuid4())
|
flowfile_worker/utils.py
CHANGED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
# Set up logging
|
|
4
|
+
logging.basicConfig(
|
|
5
|
+
level=logging.INFO,
|
|
6
|
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
7
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
|
8
|
+
)
|
|
9
|
+
logger = logging.getLogger("postgres_commands")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def start_minio():
|
|
13
|
+
"""Start MinIO container for S3 testing"""
|
|
14
|
+
from . import fixtures
|
|
15
|
+
if not fixtures.is_docker_available():
|
|
16
|
+
logger.warning("Docker is not available. Cannot start PostgreSQL container.")
|
|
17
|
+
print("\n" + "=" * 50)
|
|
18
|
+
print("SKIPPING: Docker is not available on this system")
|
|
19
|
+
print("Tests requiring Docker will need to be skipped")
|
|
20
|
+
print("=" * 50 + "\n")
|
|
21
|
+
return 0 # Return success to allow pipeline to continue
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if fixtures.start_minio_container():
|
|
25
|
+
print(f"MinIO started at http://localhost:{fixtures.MINIO_PORT}")
|
|
26
|
+
print(f"Access Key: {fixtures.MINIO_ACCESS_KEY}")
|
|
27
|
+
return 0
|
|
28
|
+
return 1
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def stop_minio():
|
|
32
|
+
"""Stop MinIO container"""
|
|
33
|
+
from . import fixtures
|
|
34
|
+
|
|
35
|
+
if not fixtures.is_docker_available():
|
|
36
|
+
logger.warning("Docker is not available. Cannot stop MinIO container.")
|
|
37
|
+
print("\n" + "=" * 50)
|
|
38
|
+
print("SKIPPING: Docker is not available on this system")
|
|
39
|
+
print("Tests requiring Docker will need to be skipped")
|
|
40
|
+
print("=" * 50 + "\n")
|
|
41
|
+
return 0
|
|
42
|
+
|
|
43
|
+
if fixtures.stop_minio_container():
|
|
44
|
+
print("MinIO stopped successfully")
|
|
45
|
+
return 0
|
|
46
|
+
return 1
|