deltafi 0.109.0__py3-none-any.whl → 2.40.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltafi/input.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -17,170 +17,15 @@
17
17
  #
18
18
 
19
19
  from deltafi.domain import *
20
- from deltafi.exception import MissingMetadataException, ExpectedContentException, MissingDomainException, \
21
- MissingEnrichmentException
22
-
23
-
24
- class DomainInput(NamedTuple):
25
- content: List[Content]
26
- metadata: Dict[str, str]
27
- domains: Dict[str, Domain]
28
-
29
- def has_content(self) -> bool:
30
- return len(self.content) > 0
31
-
32
- def content_at(self, index: int) -> Content:
33
- if len(self.content) < index + 1:
34
- raise ExpectedContentException(index, len(self.content))
35
- return self.content[index]
36
-
37
- def first_content(self):
38
- return self.content_at(0)
39
-
40
- def get_metadata(self, key: str):
41
- if key in self.metadata:
42
- return self.metadata[key]
43
- else:
44
- raise MissingMetadataException(key)
45
-
46
- def get_metadata_or_else(self, key: str, default: str) -> str:
47
- if key in self.metadata:
48
- return self.metadata[key]
49
- else:
50
- return default
51
-
52
- def has_domain(self, name: str) -> bool:
53
- return name in self.domains
54
-
55
- def domain(self, name: str) -> Domain:
56
- if not self.has_domain(name):
57
- raise MissingDomainException(name)
58
- return self.domains[name]
20
+ from deltafi.exception import MissingMetadataException, ExpectedContentException
59
21
 
60
22
 
61
23
  class EgressInput(NamedTuple):
62
24
  content: Content
63
25
  metadata: dict
64
26
 
65
-
66
- class EnrichInput(NamedTuple):
67
- content: List[Content]
68
- metadata: dict
69
- domains: Dict[str, Domain]
70
- enrichment: Dict[str, Domain]
71
-
72
- def has_content(self) -> bool:
73
- return len(self.content) > 0
74
-
75
- def content_at(self, index: int) -> Content:
76
- if len(self.content) < index + 1:
77
- raise ExpectedContentException(index, len(self.content))
78
- return self.content[index]
79
-
80
- def first_content(self):
81
- return self.content_at(0)
82
-
83
- def get_metadata(self, key: str):
84
- if key in self.metadata:
85
- return self.metadata[key]
86
- else:
87
- raise MissingMetadataException(key)
88
-
89
- def get_metadata_or_else(self, key: str, default: str) -> str:
90
- if key in self.metadata:
91
- return self.metadata[key]
92
- else:
93
- return default
94
-
95
- def has_domain(self, name: str) -> bool:
96
- return name in self.domains
97
-
98
- def domain(self, name: str) -> Domain:
99
- if not self.has_domain(name):
100
- raise MissingDomainException(name)
101
- return self.domains[name]
102
-
103
- def has_enrichment(self, name: str) -> bool:
104
- return name in self.enrichment
105
-
106
- def enrichment(self, name: str) -> Domain:
107
- if not self.has_enrichment(name):
108
- raise MissingEnrichmentException(name)
109
- return self.enrichment[name]
110
-
111
-
112
- class FormatInput(NamedTuple):
113
- content: List[Content]
114
- metadata: dict
115
- domains: Dict[str, Domain]
116
- enrichment: Dict[str, Domain]
117
-
118
- def has_content(self) -> bool:
119
- return len(self.content) > 0
120
-
121
- def content_at(self, index: int) -> Content:
122
- if len(self.content) < index + 1:
123
- raise ExpectedContentException(index, len(self.content))
124
- return self.content[index]
125
-
126
- def first_content(self):
127
- return self.content_at(0)
128
-
129
- def get_metadata(self, key: str):
130
- if key in self.metadata:
131
- return self.metadata[key]
132
- else:
133
- raise MissingMetadataException(key)
134
-
135
- def get_metadata_or_else(self, key: str, default: str) -> str:
136
- if key in self.metadata:
137
- return self.metadata[key]
138
- else:
139
- return default
140
-
141
- def has_domain(self, name: str) -> bool:
142
- return name in self.domains
143
-
144
- def domain(self, name: str) -> Domain:
145
- if not self.has_domain(name):
146
- raise MissingDomainException(name)
147
- return self.domains[name]
148
-
149
- def has_enrichment(self, name: str) -> bool:
150
- return name in self.enrichment
151
-
152
- def enrichment(self, name: str) -> Domain:
153
- if not self.has_enrichment(name):
154
- raise MissingEnrichmentException(name)
155
- return self.enrichment[name]
156
-
157
-
158
- class LoadInput(NamedTuple):
159
- content: List[Content]
160
- metadata: dict
161
-
162
27
  def has_content(self) -> bool:
163
- return len(self.content) > 0
164
-
165
- def content_at(self, index: int) -> Content:
166
- if len(self.content) < index + 1:
167
- raise ExpectedContentException(index, len(self.content))
168
- return self.content[index]
169
-
170
- def first_content(self):
171
- return self.content_at(0)
172
-
173
- def get_metadata(self, key: str):
174
- if key in self.metadata:
175
- return self.metadata[key]
176
- else:
177
- raise MissingMetadataException(key)
178
-
179
- def get_metadata_or_else(self, key: str, default: str) -> str:
180
- if key in self.metadata:
181
- return self.metadata[key]
182
- else:
183
- return default
28
+ return self.content is not None
184
29
 
185
30
 
186
31
  class TransformInput(NamedTuple):
@@ -195,6 +40,9 @@ class TransformInput(NamedTuple):
195
40
  raise ExpectedContentException(index, len(self.content))
196
41
  return self.content[index]
197
42
 
43
+ def content_named(self, name: str) -> Content:
44
+ return next((c for c in self.content if c.name == name), None)
45
+
198
46
  def first_content(self):
199
47
  return self.content_at(0)
200
48
 
@@ -209,8 +57,3 @@ class TransformInput(NamedTuple):
209
57
  return self.metadata[key]
210
58
  else:
211
59
  return default
212
-
213
-
214
- class ValidateInput(NamedTuple):
215
- content: Content
216
- metadata: dict
deltafi/logger.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -18,20 +18,32 @@
18
18
 
19
19
  import logging
20
20
  import sys
21
- from datetime import datetime
21
+ from datetime import datetime, UTC
22
22
 
23
23
  import json_logging
24
24
 
25
25
 
26
+ logger_map = {}
27
+
26
28
  def get_logger(name: str = None) -> logging.Logger:
29
+ logger_name = name
30
+ if logger_name is None:
31
+ logger_name = "root"
32
+
33
+ if logger_name in logger_map:
34
+ return logger_map[logger_name]
35
+
27
36
  logger = logging.getLogger(name)
28
- logger.setLevel(logging.DEBUG)
37
+ logger.setLevel(logging.INFO)
38
+ logger.handlers.clear()
29
39
  logger.addHandler(logging.StreamHandler(sys.stdout))
30
40
  logger.propagate = False
31
41
 
32
42
  if name is not None:
33
43
  logger = logging.LoggerAdapter(logger, dict(action=name))
34
44
 
45
+ logger_map[logger_name] = logger
46
+
35
47
  return logger
36
48
 
37
49
 
@@ -42,7 +54,7 @@ def _sanitize_log_msg(record):
42
54
  class JSONLogFormatter(json_logging.JSONLogFormatter):
43
55
 
44
56
  def _format_log_object(self, record, request_util):
45
- utcnow = datetime.utcnow()
57
+ utcnow = datetime.now(UTC)
46
58
 
47
59
  json_log_object = {
48
60
  'timestamp': json_logging.util.iso_time_format(utcnow),
deltafi/lookuptable.py ADDED
@@ -0,0 +1,292 @@
1
+ #
2
+ # DeltaFi - Data transformation and enrichment platform
3
+ #
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ from abc import ABC, abstractmethod
20
+ from enum import Enum
21
+ import json
22
+ import os
23
+ from typing import List
24
+
25
+ import requests
26
+
27
+ from deltafi.types import PluginCoordinates
28
+
29
+
30
+ class LookupTable:
31
+ def __init__(self, name: str, columns: List[str], key_columns: List[str], source_plugin: PluginCoordinates = None,
32
+ service_backed: bool = True, backing_service_active: bool = True, pull_through: bool = False,
33
+ refresh_duration: str = None, last_refresh: str = None):
34
+ self.name = name
35
+ self.source_plugin = source_plugin
36
+ self.columns = columns
37
+ self.key_columns = key_columns
38
+ self.service_backed = service_backed
39
+ self.backing_service_active = backing_service_active
40
+ self.pull_through = pull_through
41
+ self.refresh_duration = refresh_duration
42
+ self.last_refresh = last_refresh
43
+
44
+ @classmethod
45
+ def from_dict(cls, lookup_table: dict):
46
+ return LookupTable(name=lookup_table.get('name'), source_plugin=lookup_table.get('sourcePlugin'),
47
+ columns=lookup_table.get('columns'), key_columns=lookup_table.get('keyColumns'),
48
+ service_backed=lookup_table.get('serviceBacked'),
49
+ backing_service_active=lookup_table.get('backingServiceActive'),
50
+ pull_through=lookup_table.get('pullThrough'),refresh_duration=lookup_table.get('refreshDuration'),
51
+ last_refresh=lookup_table.get('lastRefresh'))
52
+
53
+ def json(self):
54
+ source_plugin = None
55
+ if self.source_plugin is not None:
56
+ source_plugin = self.source_plugin.json()
57
+
58
+ return {
59
+ 'name': self.name,
60
+ 'sourcePlugin': source_plugin,
61
+ 'columns': self.columns,
62
+ 'keyColumns': self.key_columns,
63
+ 'serviceBacked': self.service_backed,
64
+ 'backingServiceActive': self.backing_service_active,
65
+ 'pullThrough': self.pull_through,
66
+ 'refreshDuration': self.refresh_duration,
67
+ 'lastRefresh': self.last_refresh
68
+ }
69
+
70
+
71
+ class LookupTableEvent:
72
+ @classmethod
73
+ def create(cls, event: dict):
74
+ return LookupTableEvent(id=event.get('id'), lookup_table_name=event.get('lookupTableName'),
75
+ matching_column_values=event.get('matchingColumnValues'), result_columns=event.get('resultColumns'),
76
+ variables=event.get('variables'))
77
+
78
+ def __init__(self, id: str, lookup_table_name: str, matching_column_values: dict, result_columns: List[str],
79
+ variables: dict):
80
+ self.id = id
81
+ self.lookup_table_name = lookup_table_name
82
+ self.matching_column_values = matching_column_values
83
+ self.result_columns = result_columns
84
+ self.variables = variables
85
+
86
+
87
+ class LookupTableEventResult:
88
+ def __init__(self, lookup_table_event_id: str, lookup_table_name: str, rows: List[dict]):
89
+ self.lookup_table_event_id = lookup_table_event_id
90
+ self.lookup_table_name = lookup_table_name
91
+ self.rows = rows
92
+
93
+ def json(self):
94
+ return {
95
+ 'lookupTableEventId': self.lookup_table_event_id,
96
+ 'lookupTableName': self.lookup_table_name,
97
+ 'rows': self.rows
98
+ }
99
+
100
+
101
+ class SortDirection(Enum):
102
+ ASC = "ASC"
103
+ DESC = "DESC"
104
+
105
+
106
+ class LookupOptions:
107
+ @classmethod
108
+ def default_lookup_options(cls):
109
+ return LookupOptions()
110
+
111
+ def __init__(self, matching_column_values: dict = None, result_columns: List[str] = None, sort_column: str = None,
112
+ sort_direction: SortDirection = None, offset: int = None, limit: int = None):
113
+ self.matching_column_values = matching_column_values
114
+ self.result_columns = result_columns
115
+ self.sort_column = sort_column
116
+ self.sort_direction = sort_direction
117
+ self.offset = offset
118
+ self.limit = limit
119
+
120
+
121
+ class LookupResults:
122
+ def __init__(self, total_count: int, results: List[dict]):
123
+ self.total_count = total_count
124
+ self.results = results
125
+
126
+
127
+ class UploadFileType(Enum):
128
+ JSON = "application/json"
129
+ CSV = "text/csv"
130
+
131
+
132
+ class LookupTableClient:
133
+ def __init__(self):
134
+ self.core_url = os.getenv('CORE_URL', 'http://deltafi-core:8080') + '/api/v2'
135
+ self.common_headers = {
136
+ 'X-User-Permissions': 'Admin',
137
+ 'X-User-Name': 'deltafi-cli'
138
+ }
139
+ self.graphql_url = f"{self.core_url}/graphql"
140
+ self.graphql_headers = self.common_headers.copy()
141
+ self.graphql_headers['Content-Type'] = 'application/json'
142
+
143
+ def create_lookup_table(self, lookup_table: LookupTable):
144
+ if lookup_table.refresh_duration is None:
145
+ refresh_duration_value = "null"
146
+ else:
147
+ refresh_duration_value = f"\"{lookup_table.refresh_duration}\""
148
+ mutation = f"""
149
+ mutation createLookupTable {{
150
+ createLookupTable(
151
+ lookupTableInput: {{name: "{lookup_table.name}", columns: [{quoted_string_list(lookup_table.columns)}],
152
+ keyColumns: [{quoted_string_list(lookup_table.key_columns)}], serviceBacked: {to_graphql_boolean(lookup_table.service_backed)},
153
+ backingServiceActive: {to_graphql_boolean(lookup_table.backing_service_active)}, pullThrough: {to_graphql_boolean(lookup_table.pull_through)},
154
+ refreshDuration: {refresh_duration_value}}}
155
+ ) {{
156
+ success
157
+ info
158
+ errors
159
+ }}
160
+ }}"""
161
+
162
+ response = requests.post(self.graphql_url, headers=self.graphql_headers, json={"query": mutation})
163
+ if not response.ok:
164
+ raise RuntimeError(f"Unable to create lookup table {lookup_table.name}: {response.text}")
165
+
166
+ return json.loads(response.text)['data']['createLookupTable']
167
+
168
+ def get_lookup_tables(self):
169
+ query = """
170
+ query getLookupTables {
171
+ getLookupTables {
172
+ name
173
+ columns
174
+ keyColumns
175
+ serviceBacked
176
+ backingServiceActive
177
+ pullThrough
178
+ refreshDuration
179
+ lastRefresh
180
+ }
181
+ }"""
182
+
183
+ response = requests.post(self.graphql_url, headers=self.graphql_headers, json={"query": query})
184
+ if not response.ok:
185
+ raise RuntimeError(f"Unable to get lookup tables: {response.text}")
186
+
187
+ response_dict = json.loads(response.text)
188
+
189
+ return [LookupTable.from_dict(lookup_table) for lookup_table in response_dict['data']['getLookupTables']]
190
+
191
+ def lookup(self, lookup_table_name: str, lookup_options: LookupOptions):
192
+ lookup_args = f"lookupTableName: \"{lookup_table_name}\""
193
+ if lookup_options.matching_column_values is not None:
194
+ lookup_args += f"\n matchingColumnValues: [{to_graphql(lookup_options.matching_column_values)}]"
195
+ if lookup_options.result_columns is not None:
196
+ lookup_args += f"\n resultColumns: [{quoted_string_list(lookup_options.result_columns)}]"
197
+ if lookup_options.sort_column is not None:
198
+ lookup_args += f"\n sortColumn: {lookup_options.sort_column}"
199
+ if lookup_options.sort_direction is not None:
200
+ lookup_args += f"\n sortDirection: {lookup_options.sort_direction}"
201
+ if lookup_options.offset is not None:
202
+ lookup_args += f"\n offset: {lookup_options.offset}"
203
+ if lookup_options.limit is not None:
204
+ lookup_args += f"\n limit: {lookup_options.limit}"
205
+
206
+ query = f"""
207
+ query lookup {{
208
+ lookup(
209
+ {lookup_args}
210
+ ) {{
211
+ rows {{
212
+ column
213
+ value
214
+ }}
215
+ }}
216
+ }}"""
217
+
218
+ response = requests.post(self.graphql_url, headers=self.graphql_headers, json={"query": query})
219
+ if not response.ok:
220
+ raise RuntimeError(f"Unable to lookup from {lookup_table_name}: {response.text}")
221
+
222
+ response_dict = json.loads(response.text)
223
+
224
+ return LookupResults(response_dict['data']['lookup']['totalCount'],
225
+ [to_dict(column_values) for column_values in response_dict['data']['lookup']['rows']])
226
+
227
+ def upload_table(self, lookup_table_name: str, file_type: UploadFileType, file_contents: str):
228
+ headers = self.common_headers.copy()
229
+ headers['Content-Type'] = file_type.value
230
+ response = requests.post(f"{self.core_url}/lookup/{lookup_table_name}", headers=headers, data=file_contents)
231
+ if not response.ok:
232
+ raise RuntimeError(f"Unable to upload table {lookup_table_name}: Server returned status code {response.status_code}: {response.text}")
233
+
234
+
235
+ def to_graphql(matching_column_values: dict):
236
+ matching_column_value_array = []
237
+ for key, value in matching_column_values.items():
238
+ matching_column_value_array.append(f"{{column: \"{key}\", value: [{quoted_string_list(value)}]}}")
239
+ return ', '.join(matching_column_value_array)
240
+
241
+
242
+ def to_graphql_boolean(boolean):
243
+ if boolean is True:
244
+ return 'true'
245
+ else:
246
+ return 'false'
247
+
248
+
249
+ def quoted_string_list(strings: List[str]):
250
+ return ', '.join('"' + s + '"' for s in strings)
251
+
252
+
253
+ def to_dict(column_values: List[dict]):
254
+ row_dict = {}
255
+ for column_value in column_values:
256
+ row_dict[column_value.get('column')] = column_value.get('value')
257
+ return row_dict
258
+
259
+
260
+ class LookupTableSupplier(ABC):
261
+ def __init__(self, lookup_table_client: LookupTableClient, lookup_table: LookupTable):
262
+ self.lookup_table_client = lookup_table_client
263
+ self.lookup_table = lookup_table
264
+
265
+ @abstractmethod
266
+ def get_rows(self, variables: dict, matching_column_value: dict = None, result_columns: List[str] = None):
267
+ pass
268
+
269
+ def upload_table(self, variables: dict):
270
+ self.upload_table_of_type(UploadFileType.JSON, json.dumps(self.get_rows(variables)))
271
+
272
+ def upload_table_of_type(self, upload_file_type: UploadFileType, file: str):
273
+ self.lookup_table_client.upload_table(self.lookup_table.name, upload_file_type, file)
274
+
275
+
276
+ class ResourceLookupTableSupplier(LookupTableSupplier, ABC):
277
+ def __init__(self, lookup_table_client: LookupTableClient, lookup_table: LookupTable, path: str):
278
+ super().__init__(lookup_table_client, lookup_table)
279
+ self.path = path
280
+
281
+ def get_rows(self, variables: dict, matching_column_value: dict = None, result_columns: List[str] = None):
282
+ self.upload_table(variables)
283
+ return []
284
+
285
+ def upload_table(self, variables: dict):
286
+ with open(self.path, 'r') as file:
287
+ file_contents = file.read()
288
+
289
+ if self.path.endswith('.csv'):
290
+ self.upload_table_of_type(UploadFileType.CSV, file_contents)
291
+ else:
292
+ self.upload_table_of_type(UploadFileType.JSON, file_contents)
deltafi/metric.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # DeltaFi - Data transformation and enrichment platform
3
3
  #
4
- # Copyright 2021-2023 DeltaFi Contributors <deltafi@deltafi.org>
4
+ # Copyright 2021-2025 DeltaFi Contributors <deltafi@deltafi.org>
5
5
  #
6
6
  # Licensed under the Apache License, Version 2.0 (the "License");
7
7
  # you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ from typing import Dict, NamedTuple
22
22
  class Metric(NamedTuple):
23
23
  name: str
24
24
  value: int
25
- tags: Dict[str, str]
25
+ tags: Dict[str, str] = {}
26
26
 
27
27
  def json(self):
28
28
  return {