Flowfile 0.3.5__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +3 -3
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-d004942f.js +784 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-eccf9fc2.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-b1ba6bba.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-68981877.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-0b06649c.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-8349a426.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-905344f8.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-9f5b8638.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-131a6d53.js} +5 -5
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-e3549dcc.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-6e0730ae.js} +8 -8
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-02f033e6.js} +75 -9
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-54c14036.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-08a3f499.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-2ae38139.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-493b9772.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-4373d163.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-b534f3c7.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-2968ff65.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-65136536.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c56339ed.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-1c641a5e.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-df308b8f.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-293e8a64.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-03911655.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-3058a13d.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-fbf4fb39.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-a29bbaf7.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-c7d7760e.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-118f1d20.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f0589571.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-7329a207.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-30b0be15.js} +5 -5
- flowfile/web/static/assets/{api-44ca9e9c.js → api-602fb95c.js} +1 -1
- flowfile/web/static/assets/api-fb67319c.js +80 -0
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-94a6bf4d.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-a224831e.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-c2d2aa97.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-921ac5fd.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-7013cc94.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-3a75211d.js} +19 -6
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-a63d4680.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-763aec6e.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-08464729.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-f15a5f87.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-93bd09d7.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/METADATA +8 -3
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/RECORD +108 -103
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +2 -0
- flowfile_core/configs/node_store/nodes.py +8 -6
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +401 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +522 -59
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +119 -82
- flowfile_core/flowfile/flow_node/flow_node.py +68 -33
- flowfile_core/flowfile/flow_node/models.py +32 -3
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/utils.py +1 -23
- flowfile_core/main.py +3 -2
- flowfile_core/routes/cloud_connections.py +81 -0
- flowfile_core/routes/logs.py +0 -1
- flowfile_core/routes/routes.py +3 -39
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +37 -15
- flowfile_core/schemas/schemas.py +7 -2
- flowfile_core/schemas/transform_schema.py +97 -22
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/flow_frame.py +253 -102
- flowfile_frame/flow_frame_methods.py +13 -13
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +291 -0
- test_utils/s3/fixtures.py +209 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.6.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
import { C as ColumnSelector } from "./dropDown-52790b15.js";
|
|
2
|
-
import { d as defineComponent, r as ref, m as watch, c as openBlock, e as createElementBlock, t as toDisplayString, i as createCommentVNode, p as createBaseVNode, f as createVNode, _ as _export_sfc } from "./index-e235a8bc.js";
|
|
3
|
-
const _hoisted_1 = {
|
|
4
|
-
key: 0,
|
|
5
|
-
class: "label"
|
|
6
|
-
};
|
|
7
|
-
const _hoisted_2 = { class: "select-wrapper" };
|
|
8
|
-
const _sfc_main = /* @__PURE__ */ defineComponent({
|
|
9
|
-
__name: "dropDownGeneric",
|
|
10
|
-
props: {
|
|
11
|
-
modelValue: {
|
|
12
|
-
type: String,
|
|
13
|
-
default: "NewField"
|
|
14
|
-
},
|
|
15
|
-
optionList: {
|
|
16
|
-
type: Array,
|
|
17
|
-
required: true
|
|
18
|
-
},
|
|
19
|
-
title: {
|
|
20
|
-
type: String,
|
|
21
|
-
default: ""
|
|
22
|
-
},
|
|
23
|
-
allowOther: {
|
|
24
|
-
type: Boolean,
|
|
25
|
-
default: true
|
|
26
|
-
},
|
|
27
|
-
placeholder: {
|
|
28
|
-
type: String,
|
|
29
|
-
default: "Select an option"
|
|
30
|
-
},
|
|
31
|
-
isLoading: {
|
|
32
|
-
type: Boolean,
|
|
33
|
-
default: false
|
|
34
|
-
}
|
|
35
|
-
},
|
|
36
|
-
emits: ["update:modelValue", "change"],
|
|
37
|
-
setup(__props, { emit: __emit }) {
|
|
38
|
-
const props = __props;
|
|
39
|
-
const emit = __emit;
|
|
40
|
-
const localSelectedValue = ref(props.modelValue);
|
|
41
|
-
watch(
|
|
42
|
-
() => props.modelValue,
|
|
43
|
-
(newVal) => {
|
|
44
|
-
localSelectedValue.value = newVal;
|
|
45
|
-
}
|
|
46
|
-
);
|
|
47
|
-
watch(localSelectedValue, (newVal) => {
|
|
48
|
-
emit("update:modelValue", newVal);
|
|
49
|
-
emit("change", newVal);
|
|
50
|
-
});
|
|
51
|
-
return (_ctx, _cache) => {
|
|
52
|
-
return openBlock(), createElementBlock("div", null, [
|
|
53
|
-
__props.title !== "" ? (openBlock(), createElementBlock("p", _hoisted_1, toDisplayString(__props.title), 1)) : createCommentVNode("", true),
|
|
54
|
-
createBaseVNode("div", _hoisted_2, [
|
|
55
|
-
createVNode(ColumnSelector, {
|
|
56
|
-
modelValue: localSelectedValue.value,
|
|
57
|
-
"onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => localSelectedValue.value = $event),
|
|
58
|
-
"column-options": __props.optionList,
|
|
59
|
-
"allow-other": __props.allowOther,
|
|
60
|
-
placeholder: __props.placeholder,
|
|
61
|
-
"is-loading": __props.isLoading
|
|
62
|
-
}, null, 8, ["modelValue", "column-options", "allow-other", "placeholder", "is-loading"])
|
|
63
|
-
])
|
|
64
|
-
]);
|
|
65
|
-
};
|
|
66
|
-
}
|
|
67
|
-
});
|
|
68
|
-
const dropDownGeneric_vue_vue_type_style_index_0_scoped_f2958f57_lang = "";
|
|
69
|
-
const DropDownGeneric = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-f2958f57"]]);
|
|
70
|
-
export {
|
|
71
|
-
DropDownGeneric as D
|
|
72
|
-
};
|
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from ast import literal_eval
|
|
3
|
-
import polars as pl
|
|
4
|
-
from typing import Any, Dict, Generator, List, Optional
|
|
5
|
-
from flowfile_core.configs import logger
|
|
6
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
7
|
-
from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
|
|
8
|
-
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.models import (
|
|
9
|
-
AirbyteProperty, JsonSchema, AirbyteResponse, AirbyteSettings
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class LazyAirbyteImporter:
|
|
14
|
-
"""Lazy importer for airbyte module."""
|
|
15
|
-
_airbyte = None
|
|
16
|
-
|
|
17
|
-
@classmethod
|
|
18
|
-
def get_airbyte(cls):
|
|
19
|
-
if cls._airbyte is None:
|
|
20
|
-
logger.info("Importing airbyte module")
|
|
21
|
-
import airbyte as ab
|
|
22
|
-
cls._airbyte = ab
|
|
23
|
-
return cls._airbyte
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class AirbyteSource(ExternalDataSource):
|
|
27
|
-
stream: str
|
|
28
|
-
source_name: str
|
|
29
|
-
cache_store: Optional['airbyte.results.ReadResult'] = None
|
|
30
|
-
_type: str
|
|
31
|
-
is_collected: bool
|
|
32
|
-
_airbyte_response: Optional[AirbyteResponse] = None
|
|
33
|
-
_airbyte_module = None
|
|
34
|
-
_enforce_full_refresh: Optional[bool] = True
|
|
35
|
-
version: Optional[str] = None
|
|
36
|
-
|
|
37
|
-
def __init__(self, airbyte_settings: AirbyteSettings):
|
|
38
|
-
self.is_collected = False
|
|
39
|
-
self._airbyte_response = None
|
|
40
|
-
self.stream = airbyte_settings.stream
|
|
41
|
-
self.source_name = airbyte_settings.source_name
|
|
42
|
-
self._enforce_full_refresh = airbyte_settings.enforce_full_refresh
|
|
43
|
-
if hasattr(airbyte_settings, 'version'):
|
|
44
|
-
self.version = airbyte_settings.version
|
|
45
|
-
|
|
46
|
-
# Handle config
|
|
47
|
-
if airbyte_settings.config_ref and not airbyte_settings.config:
|
|
48
|
-
logger.info(f"Getting config from {airbyte_settings.config_ref}")
|
|
49
|
-
config = literal_eval(os.environ.get(airbyte_settings.config_ref))
|
|
50
|
-
else:
|
|
51
|
-
logger.info(f"Using provided config")
|
|
52
|
-
config = airbyte_settings.config
|
|
53
|
-
|
|
54
|
-
if config is None:
|
|
55
|
-
raise ValueError("Config must be provided")
|
|
56
|
-
|
|
57
|
-
self.config = config
|
|
58
|
-
self._type = 'airbyte'
|
|
59
|
-
self.read_result = None
|
|
60
|
-
|
|
61
|
-
# Only load source if fields aren't provided
|
|
62
|
-
if not airbyte_settings.fields:
|
|
63
|
-
self.load_source(airbyte_settings)
|
|
64
|
-
else:
|
|
65
|
-
logger.info('Using provided schema')
|
|
66
|
-
self.schema = [
|
|
67
|
-
FlowfileColumn.from_input(column_name=col.name, data_type=col.data_type)
|
|
68
|
-
for col in airbyte_settings.fields
|
|
69
|
-
]
|
|
70
|
-
|
|
71
|
-
def load_source(self, airbyte_settings: AirbyteSettings):
|
|
72
|
-
logger.info(f"Loading source {self.source_name}")
|
|
73
|
-
if airbyte_settings.fields is not None and len(airbyte_settings.fields) > 0:
|
|
74
|
-
logger.info('Using provided schema')
|
|
75
|
-
self.schema = [
|
|
76
|
-
FlowfileColumn.from_input(column_name=col.name, data_type=col.data_type)
|
|
77
|
-
for col in airbyte_settings.fields
|
|
78
|
-
]
|
|
79
|
-
else:
|
|
80
|
-
logger.info('Using airbyte schema')
|
|
81
|
-
logger.info(f"Loading source {self.source_name}")
|
|
82
|
-
_ = self.airbyte_response
|
|
83
|
-
|
|
84
|
-
@property
|
|
85
|
-
def airbyte_response(self) -> AirbyteResponse:
|
|
86
|
-
if self._airbyte_response is None:
|
|
87
|
-
# Lazy import airbyte
|
|
88
|
-
ab = LazyAirbyteImporter.get_airbyte()
|
|
89
|
-
|
|
90
|
-
source = ab.get_source(
|
|
91
|
-
name=self.source_name,
|
|
92
|
-
config=self.config,
|
|
93
|
-
streams=self.stream,
|
|
94
|
-
docker_image=True,
|
|
95
|
-
version=self.version
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
try:
|
|
99
|
-
source.check()
|
|
100
|
-
except Exception:
|
|
101
|
-
logger.warning('Source check failed, trying to continue')
|
|
102
|
-
|
|
103
|
-
logger.info(f'Source check passed, starting to load data for {self.stream}')
|
|
104
|
-
|
|
105
|
-
json_schema = source.get_stream_json_schema(self.stream)['properties']
|
|
106
|
-
properties = [
|
|
107
|
-
AirbyteProperty(name=name, json_schema=JsonSchema(**schema))
|
|
108
|
-
for name, schema in json_schema.items()
|
|
109
|
-
]
|
|
110
|
-
|
|
111
|
-
logger.info(f"Loaded source {self.source_name}")
|
|
112
|
-
self._airbyte_response = AirbyteResponse(properties=properties, source=source)
|
|
113
|
-
self.schema = self.parse_schema(self._airbyte_response)
|
|
114
|
-
|
|
115
|
-
return self._airbyte_response
|
|
116
|
-
|
|
117
|
-
def get_initial_data(self):
|
|
118
|
-
return []
|
|
119
|
-
|
|
120
|
-
def get_iter(self) -> Generator[Dict[str, Any], None, None]:
|
|
121
|
-
logger.warning('Getting data in iteration, this is suboptimal')
|
|
122
|
-
data = self.data_getter()
|
|
123
|
-
for row in data:
|
|
124
|
-
yield row
|
|
125
|
-
self.is_collected = True
|
|
126
|
-
|
|
127
|
-
def get_sample(self, n: int = 10000):
|
|
128
|
-
logger.warning('Getting sample in iteration, this is suboptimal')
|
|
129
|
-
data = self.get_iter()
|
|
130
|
-
for i in range(n):
|
|
131
|
-
try:
|
|
132
|
-
yield next(data)
|
|
133
|
-
except StopIteration:
|
|
134
|
-
break
|
|
135
|
-
|
|
136
|
-
@staticmethod
|
|
137
|
-
def parse_schema(airbyte_response: AirbyteResponse) -> List[FlowfileColumn]:
|
|
138
|
-
return airbyte_response.get_flow_file_columns()
|
|
139
|
-
|
|
140
|
-
def get_df(self):
|
|
141
|
-
if self.read_result is None:
|
|
142
|
-
self.read_result = self.airbyte_response.source.read()
|
|
143
|
-
|
|
144
|
-
df = self.read_result[self.stream].to_pandas()
|
|
145
|
-
drop_cols = [c for c in df.columns if c.startswith('_airbyte')]
|
|
146
|
-
df.drop(drop_cols, axis=1, inplace=True)
|
|
147
|
-
return df
|
|
148
|
-
|
|
149
|
-
def get_pl_df(self) -> pl.DataFrame:
|
|
150
|
-
self.is_collected = True
|
|
151
|
-
return pl.from_pandas(self.get_df())
|
|
152
|
-
|
|
153
|
-
def data_getter(self) -> List[Dict]:
|
|
154
|
-
return self.get_df().to_dict(orient='records')
|
|
155
|
-
|
|
156
|
-
@classmethod
|
|
157
|
-
def create_from_frontend_input(cls, config: Any, stream_name: str, source_name: str):
|
|
158
|
-
# Implementation details to be added
|
|
159
|
-
pass
|
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union
|
|
2
|
-
from pydantic import BaseModel, field_validator, ConfigDict
|
|
3
|
-
import polars as pl
|
|
4
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
5
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
6
|
-
from flowfile_core.schemas.input_schema import MinimalFieldInfo
|
|
7
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
|
|
8
|
-
from flowfile_core.configs import logger
|
|
9
|
-
|
|
10
|
-
# Use TYPE_CHECKING to avoid circular imports
|
|
11
|
-
if TYPE_CHECKING:
|
|
12
|
-
from airbyte import Source
|
|
13
|
-
else:
|
|
14
|
-
Source = Any
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class LazyAirbyteSource:
|
|
18
|
-
"""Lazy wrapper for airbyte Source class."""
|
|
19
|
-
_source_class = None
|
|
20
|
-
|
|
21
|
-
@classmethod
|
|
22
|
-
def get_source_class(cls):
|
|
23
|
-
if cls._source_class is None:
|
|
24
|
-
logger.info("Importing airbyte Source class")
|
|
25
|
-
from airbyte import Source
|
|
26
|
-
cls._source_class = Source
|
|
27
|
-
return cls._source_class
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class JsonSchema(BaseModel):
|
|
31
|
-
type: Optional[Union[str, List[str]]]
|
|
32
|
-
airbyte_type: Optional[Union[str, List[str]]] = None
|
|
33
|
-
format: Optional[str] = None
|
|
34
|
-
|
|
35
|
-
def get_pl_type(self) -> pl.DataType:
|
|
36
|
-
if self.format:
|
|
37
|
-
format_mapping = {
|
|
38
|
-
'date-time': 'datetime',
|
|
39
|
-
'date': 'date',
|
|
40
|
-
'time': 'time'
|
|
41
|
-
}
|
|
42
|
-
dtype = format_mapping.get(self.format, 'string')
|
|
43
|
-
else:
|
|
44
|
-
type_mapping = {
|
|
45
|
-
'string': 'string',
|
|
46
|
-
'boolean': 'bool',
|
|
47
|
-
'integer': 'int',
|
|
48
|
-
'number': 'float',
|
|
49
|
-
'array': 'string',
|
|
50
|
-
'object': 'string'
|
|
51
|
-
}
|
|
52
|
-
if isinstance(self.type, list) and len(self.type) >= 1:
|
|
53
|
-
_type_mappings = (type_mapping.get(t) for t in self.type)
|
|
54
|
-
dtype = next((t for t in _type_mappings if t is not None), self.type[0])
|
|
55
|
-
elif isinstance(self.type, list) and len(self.type) == 0:
|
|
56
|
-
dtype = 'string'
|
|
57
|
-
else:
|
|
58
|
-
dtype = type_mapping.get(self.type[0] if isinstance(self.type, list) else self.type, 'string')
|
|
59
|
-
return cast_str_to_polars_type(dtype)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
class AirbyteProperty(BaseModel):
|
|
63
|
-
name: str
|
|
64
|
-
json_schema: JsonSchema
|
|
65
|
-
|
|
66
|
-
def get_pl_type(self) -> PlType:
|
|
67
|
-
return PlType(column_name=self.name, pl_datatype=self.json_schema.get_pl_type())
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
class AirbyteResponse(BaseModel):
|
|
71
|
-
source: Any # Using Any to avoid direct Source import
|
|
72
|
-
properties: list[AirbyteProperty]
|
|
73
|
-
|
|
74
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
75
|
-
|
|
76
|
-
@field_validator('source')
|
|
77
|
-
@classmethod
|
|
78
|
-
def validate_source(cls, v: Any) -> Any:
|
|
79
|
-
source_class = LazyAirbyteSource.get_source_class()
|
|
80
|
-
if not isinstance(v, source_class):
|
|
81
|
-
raise ValueError(f"Source must be an instance of airbyte.Source, got {type(v)}")
|
|
82
|
-
return v
|
|
83
|
-
|
|
84
|
-
def get_flow_file_columns(self) -> List[FlowfileColumn]:
|
|
85
|
-
return [
|
|
86
|
-
FlowfileColumn.create_from_polars_type(c.get_pl_type(), col_index=i)
|
|
87
|
-
for i, c in enumerate(self.properties)
|
|
88
|
-
]
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class GenericProperties(BaseModel):
|
|
92
|
-
type: str
|
|
93
|
-
title: Optional[str] = None
|
|
94
|
-
description: Optional[str] = None
|
|
95
|
-
order: Optional[int] = None
|
|
96
|
-
required: Optional[List[str]] = None
|
|
97
|
-
airbyte_secret: Optional[bool] = None
|
|
98
|
-
pattern: Optional[str] = None
|
|
99
|
-
pattern_descriptor: Optional[str] = None
|
|
100
|
-
format: Optional[str] = None
|
|
101
|
-
examples: Optional[List[Any]] = None
|
|
102
|
-
enum: Optional[List[str]] = None
|
|
103
|
-
minimum: Optional[float] = None
|
|
104
|
-
maximum: Optional[float] = None
|
|
105
|
-
items: Optional[Any] = None
|
|
106
|
-
properties: Optional[Dict[str, Any]] = None
|
|
107
|
-
|
|
108
|
-
@field_validator('items', 'properties')
|
|
109
|
-
@classmethod
|
|
110
|
-
def validate_nested(cls, value: Any) -> Any:
|
|
111
|
-
if isinstance(value, dict):
|
|
112
|
-
if 'type' in value:
|
|
113
|
-
return GenericProperties(**value)
|
|
114
|
-
return {k: GenericProperties(**v) if isinstance(v, dict) else v for k, v in value.items()}
|
|
115
|
-
return value
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
class GenericSchema(BaseModel):
|
|
119
|
-
title: str
|
|
120
|
-
type: str
|
|
121
|
-
required: Optional[List[str]] = None
|
|
122
|
-
additionalProperties: Optional[bool] = None
|
|
123
|
-
properties: Dict[str, GenericProperties]
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
class FieldProperty(BaseModel):
|
|
127
|
-
title: Optional[str] = None
|
|
128
|
-
type: str
|
|
129
|
-
key: str
|
|
130
|
-
description: Optional[str] = None
|
|
131
|
-
airbyte_secret: Optional[bool] = None
|
|
132
|
-
input_value: Optional[str] = None
|
|
133
|
-
default: Any
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
class OverallFieldProperty(BaseModel):
|
|
137
|
-
title: Optional[str] = None
|
|
138
|
-
type: str
|
|
139
|
-
key: str
|
|
140
|
-
required: bool
|
|
141
|
-
properties: List[FieldProperty]
|
|
142
|
-
items: Optional[List[FieldProperty]]
|
|
143
|
-
isOpen: bool
|
|
144
|
-
description: Optional[str] = None
|
|
145
|
-
input_value: Optional[str] = None
|
|
146
|
-
airbyte_secret: Optional[bool] = None
|
|
147
|
-
default: Any
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
class AirbyteConfigTemplate(BaseModel):
|
|
151
|
-
source_name: str
|
|
152
|
-
docs_url: Optional[str] = None
|
|
153
|
-
config_spec: Dict
|
|
154
|
-
available_streams: Optional[List[str]] = None
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
class AirbyteSettings(BaseModel):
|
|
158
|
-
source_name: str
|
|
159
|
-
stream: str
|
|
160
|
-
config_ref: Optional[str] = None
|
|
161
|
-
config: Optional[Dict] = None
|
|
162
|
-
fields: Optional[List[MinimalFieldInfo]] = None
|
|
163
|
-
enforce_full_refresh: Optional[bool] = True
|
|
164
|
-
flowfile_flow_id: int
|
|
165
|
-
flowfile_node_id: int
|
|
166
|
-
version: Optional[str] = None
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
def get_source_instance(*args, **kwargs) -> 'Source':
|
|
170
|
-
"""Helper function to get a Source instance with lazy loading."""
|
|
171
|
-
source_class = LazyAirbyteSource.get_source_class()
|
|
172
|
-
return source_class(*args, **kwargs)
|
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
from typing import List, Dict, Optional, Any, Type
|
|
2
|
-
from flowfile_core.configs import logger
|
|
3
|
-
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.models import AirbyteConfigTemplate, \
|
|
4
|
-
AirbyteSettings
|
|
5
|
-
from flowfile_core.schemas.external_sources.airbyte_schemas import AirbyteConfig
|
|
6
|
-
from flowfile_core.flowfile.connection_manager import connection_manager
|
|
7
|
-
from flowfile_core.flowfile.connection_manager._connection_manager import Connection
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class LazyAirbyteImporter:
|
|
11
|
-
"""Lazy importer for airbyte module."""
|
|
12
|
-
_airbyte = None
|
|
13
|
-
|
|
14
|
-
@classmethod
|
|
15
|
-
def get_airbyte(cls):
|
|
16
|
-
if cls._airbyte is None:
|
|
17
|
-
logger.info("Importing airbyte module")
|
|
18
|
-
import airbyte as ab
|
|
19
|
-
cls._airbyte = ab
|
|
20
|
-
return cls._airbyte
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class AirbyteConfigHandler:
|
|
24
|
-
_available_connectors: Optional[List[str]] = None
|
|
25
|
-
configs: Dict[str, AirbyteConfigTemplate]
|
|
26
|
-
_airbyte = None
|
|
27
|
-
|
|
28
|
-
def __init__(self):
|
|
29
|
-
self.configs = {}
|
|
30
|
-
self._available_connectors = None
|
|
31
|
-
|
|
32
|
-
@property
|
|
33
|
-
def airbyte(self):
|
|
34
|
-
"""Lazy load airbyte module when needed."""
|
|
35
|
-
if self._airbyte is None:
|
|
36
|
-
self._airbyte = LazyAirbyteImporter.get_airbyte()
|
|
37
|
-
return self._airbyte
|
|
38
|
-
|
|
39
|
-
@property
|
|
40
|
-
def available_connectors(self) -> List[str]:
|
|
41
|
-
if self._available_connectors is None:
|
|
42
|
-
self._available_connectors = [
|
|
43
|
-
c.replace('source-', '')
|
|
44
|
-
for c in self.airbyte.get_available_connectors()
|
|
45
|
-
if c.startswith('source-')
|
|
46
|
-
]
|
|
47
|
-
return self._available_connectors
|
|
48
|
-
|
|
49
|
-
@property
|
|
50
|
-
def available_configs(self) -> List[str]:
|
|
51
|
-
return list(self.configs.keys())
|
|
52
|
-
|
|
53
|
-
def get_config(self, config_name: str) -> AirbyteConfigTemplate:
|
|
54
|
-
"""Get configuration for a specific source."""
|
|
55
|
-
logger.info(f"Getting config for {config_name}")
|
|
56
|
-
|
|
57
|
-
if config_name not in self.configs:
|
|
58
|
-
try:
|
|
59
|
-
source = self.airbyte.get_source(
|
|
60
|
-
name=config_name,
|
|
61
|
-
install_if_missing=True,
|
|
62
|
-
docker_image=True
|
|
63
|
-
)
|
|
64
|
-
logger.info(f"Got source {config_name}")
|
|
65
|
-
|
|
66
|
-
self.configs[config_name] = AirbyteConfigTemplate(
|
|
67
|
-
config_spec=source.config_spec,
|
|
68
|
-
docs_url=source.docs_url,
|
|
69
|
-
source_name=config_name
|
|
70
|
-
)
|
|
71
|
-
except Exception as e:
|
|
72
|
-
logger.error(f"Error getting config for {config_name}: {str(e)}")
|
|
73
|
-
raise
|
|
74
|
-
|
|
75
|
-
return self.configs[config_name]
|
|
76
|
-
|
|
77
|
-
def get_available_streams(self, config_name: str, config_settings: Any) -> List[str]:
|
|
78
|
-
"""Get available streams for a specific configuration."""
|
|
79
|
-
if config_name not in self.configs:
|
|
80
|
-
raise ValueError(f"Config {config_name} not found")
|
|
81
|
-
|
|
82
|
-
logger.info(f'Getting available streams for {config_name}')
|
|
83
|
-
|
|
84
|
-
try:
|
|
85
|
-
source = self.airbyte.get_source(
|
|
86
|
-
name=config_name,
|
|
87
|
-
install_if_missing=True,
|
|
88
|
-
config=config_settings,
|
|
89
|
-
docker_image=True
|
|
90
|
-
)
|
|
91
|
-
streams = source.get_available_streams()
|
|
92
|
-
if len(streams) == 0 or streams is None:
|
|
93
|
-
raise ValueError(f"No streams found for {config_name}")
|
|
94
|
-
self.configs[config_name].available_streams = streams
|
|
95
|
-
return self.configs[config_name].available_streams
|
|
96
|
-
|
|
97
|
-
except Exception as e:
|
|
98
|
-
logger.error(f"Error getting streams for {config_name}: {str(e)}")
|
|
99
|
-
raise
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
# Create singleton instance
|
|
103
|
-
airbyte_config_handler = AirbyteConfigHandler()
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
class AirbyteHandler:
|
|
107
|
-
"""Handler for Airbyte configurations and connections."""
|
|
108
|
-
config: AirbyteConfig
|
|
109
|
-
|
|
110
|
-
def __init__(self, airbyte_config: AirbyteConfig):
|
|
111
|
-
self.config = airbyte_config
|
|
112
|
-
|
|
113
|
-
def set_airbyte_config(self, airbyte_config_in: AirbyteConfig) -> AirbyteConfig:
|
|
114
|
-
"""Update the current configuration."""
|
|
115
|
-
self.config.mapped_config_spec = airbyte_config_in.mapped_config_spec
|
|
116
|
-
self.config.parsed_config = airbyte_config_in.parsed_config
|
|
117
|
-
return airbyte_config_in
|
|
118
|
-
|
|
119
|
-
def get_available_streams(self) -> List[str]:
|
|
120
|
-
"""Get available streams for the current configuration."""
|
|
121
|
-
config_template = airbyte_config_handler.configs.get(self.config.full_source_name)
|
|
122
|
-
|
|
123
|
-
if not config_template:
|
|
124
|
-
logger.warning(
|
|
125
|
-
f"Config {self.config.source_name} not found, trying to recreate the config"
|
|
126
|
-
)
|
|
127
|
-
try:
|
|
128
|
-
_ = airbyte_config_handler.get_config(self.config.full_source_name)
|
|
129
|
-
logger.info(f"Config {self.config.source_name} recreated")
|
|
130
|
-
except Exception as e:
|
|
131
|
-
logger.error(f"Error recreating config: {str(e)}")
|
|
132
|
-
raise
|
|
133
|
-
|
|
134
|
-
return airbyte_config_handler.get_available_streams(
|
|
135
|
-
self.config.full_source_name,
|
|
136
|
-
self.config.mapped_config_spec
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
def save_connection(self, connection_name: str) -> None:
|
|
140
|
-
"""Save the current configuration as a connection."""
|
|
141
|
-
connection = Connection(
|
|
142
|
-
group=self.config.source_name,
|
|
143
|
-
name=connection_name,
|
|
144
|
-
config_setting=self.config,
|
|
145
|
-
type='airbyte'
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
connection_manager.add_connection(
|
|
149
|
-
self.config.source_name,
|
|
150
|
-
connection_name=connection_name,
|
|
151
|
-
connection=connection
|
|
152
|
-
)
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def airbyte_settings_from_config(airbyte_config: AirbyteConfig, flow_id: int, node_id: int|str) -> AirbyteSettings:
|
|
156
|
-
"""Create AirbyteSettings from an AirbyteConfig."""
|
|
157
|
-
if airbyte_config.config_mode == 'key_vault':
|
|
158
|
-
connection = connection_manager.get_connection(
|
|
159
|
-
connection_group=airbyte_config.source_name,
|
|
160
|
-
connection_name=airbyte_config.connection_name
|
|
161
|
-
)
|
|
162
|
-
config = connection.config_setting.mapped_config_spec
|
|
163
|
-
else:
|
|
164
|
-
config = airbyte_config.mapped_config_spec
|
|
165
|
-
|
|
166
|
-
return AirbyteSettings(
|
|
167
|
-
source_name=airbyte_config.full_source_name,
|
|
168
|
-
stream=airbyte_config.selected_stream,
|
|
169
|
-
config=config,
|
|
170
|
-
flowfile_flow_id=flow_id,
|
|
171
|
-
flowfile_node_id=node_id,
|
|
172
|
-
version=airbyte_config.version
|
|
173
|
-
)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from typing import TypeAlias, Optional, Dict, Any, Literal
|
|
2
|
-
from pydantic import BaseModel, Field
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
config_options: TypeAlias = Literal["in_line", "key_vault"]
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class AirbyteConfig(BaseModel):
|
|
9
|
-
source_name: str
|
|
10
|
-
selected_stream: Optional[str] = None
|
|
11
|
-
config_mode: config_options = "in_line"
|
|
12
|
-
mapped_config_spec: Optional[Dict[str, Any]] = Field(default_factory=dict)
|
|
13
|
-
parsed_config: Optional[Any] = None
|
|
14
|
-
connection_name: Optional[str] = None
|
|
15
|
-
version: Optional[str] = None
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def full_source_name(self) -> str:
|
|
19
|
-
return f"source-{self.source_name}"
|
|
20
|
-
|
|
File without changes
|