ipulse-shared-core-ftredge 2.6__tar.gz → 2.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ipulse-shared-core-ftredge might be problematic. Click here for more details.
- ipulse_shared_core_ftredge-2.7.1/PKG-INFO +13 -0
- ipulse_shared_core_ftredge-2.7.1/README.md +21 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/setup.py +6 -2
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/__init__.py +23 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums/__init__.py +37 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums/enums_common_utils.py +107 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums/enums_data_eng.py +313 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums/enums_logging.py +108 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums/enums_module_fincore.py +72 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums/enums_modules.py +31 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums/enums_solution_providers.py +24 -0
- {ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/models → ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/enums}/pulse_enums.py +10 -46
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/src/ipulse_shared_core_ftredge/models/__init__.py +0 -1
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/models/organisation.py +71 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/models/resource_catalog_item.py +115 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/src/ipulse_shared_core_ftredge/models/user_profile.py +10 -9
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/models/user_profile_update.py +36 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/src/ipulse_shared_core_ftredge/models/user_status.py +21 -11
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/__init__.py +23 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/logs/__init__.py +2 -0
- {ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/models → ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/logs}/audit_log_firestore.py +1 -1
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/logs/context_log.py +210 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/logs/get_logger.py +103 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/utils_cloud.py +53 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/utils_cloud_gcp.py +442 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/utils_cloud_gcp_with_collectors.py +166 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/utils_cloud_with_collectors.py +27 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/utils_collector_pipelinemon.py +356 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/utils_common.py +180 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge/utils/utils_templates_and_schemas.py +151 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge.egg-info/PKG-INFO +13 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge.egg-info/SOURCES.txt +38 -0
- ipulse_shared_core_ftredge-2.7.1/src/ipulse_shared_core_ftredge.egg-info/requires.txt +6 -0
- ipulse_shared_core_ftredge-2.7.1/tests/test_utils_gcp.py +189 -0
- ipulse_shared_core_ftredge-2.6/PKG-INFO +0 -10
- ipulse_shared_core_ftredge-2.6/README.md +0 -2
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/__init__.py +0 -5
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/models/organisation.py +0 -65
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/models/resource_catalog_item.py +0 -189
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/models/user_profile_update.py +0 -18
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/tests/__init__.py +0 -0
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge/tests/test.py +0 -17
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge.egg-info/PKG-INFO +0 -10
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge.egg-info/SOURCES.txt +0 -21
- ipulse_shared_core_ftredge-2.6/src/ipulse_shared_core_ftredge.egg-info/requires.txt +0 -3
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/LICENCE +0 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/pyproject.toml +0 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/setup.cfg +0 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/src/ipulse_shared_core_ftredge/models/user_auth.py +0 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/src/ipulse_shared_core_ftredge.egg-info/dependency_links.txt +0 -0
- {ipulse_shared_core_ftredge-2.6 → ipulse_shared_core_ftredge-2.7.1}/src/ipulse_shared_core_ftredge.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: ipulse_shared_core_ftredge
|
|
3
|
+
Version: 2.7.1
|
|
4
|
+
Summary: Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.
|
|
5
|
+
Home-page: https://github.com/TheFutureEdge/ipulse_shared_core
|
|
6
|
+
Author: Russlan Ramdowar
|
|
7
|
+
License-File: LICENCE
|
|
8
|
+
Requires-Dist: pydantic[email]~=2.5
|
|
9
|
+
Requires-Dist: python-dateutil~=2.8
|
|
10
|
+
Requires-Dist: pytest~=7.1
|
|
11
|
+
Requires-Dist: google-cloud-logging~=3.10.0
|
|
12
|
+
Requires-Dist: google-cloud-error-reporting~=1.11.0
|
|
13
|
+
Requires-Dist: google-cloud-bigquery~=3.24.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# ipulse_shared_core
|
|
2
|
+
Shared Models like User, Organisation etc. Also includes shared enum_sets
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
### Enums
|
|
6
|
+
|
|
7
|
+
Contains majority of all Enums used in Pulse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Collectors i.e. Pipelinemon
|
|
12
|
+
|
|
13
|
+
Collectors are smart Objects which are added to long running functions or pipelines for which we want to collect an overall number of successes, notices, warnings or errors.
|
|
14
|
+
|
|
15
|
+
We can wait until the full pipeline is finished in order to write off a single Summary file from a Collector, or we can attach to it a logger, which will be reporting major status along the journey, which is often times better. Because if a function crashes midway through , all logs will be lost, and it would be hard to investigate if anythign has bee persisted and has to be rolled back. THis will require a lot of manual effort to recollect.
|
|
16
|
+
|
|
17
|
+
Pipelinemon , short of Pipeline Monitoring system is a type of very powerful Collector which Russlan created specifically for Pulse Data Engineering pipelines.
|
|
18
|
+
|
|
19
|
+
Pipelinemon writes all observation logs to Google CLoud Logging, and you have to setup a Log Sink (Router) which will send the Pipelinemon's observation logs to BigQuery.
|
|
20
|
+
|
|
21
|
+
Great thing about Pipelinemin is its "context" keeping feature.
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
# pylint: disable=import-error
|
|
1
2
|
from setuptools import setup, find_packages
|
|
2
3
|
|
|
3
4
|
setup(
|
|
4
5
|
name='ipulse_shared_core_ftredge',
|
|
5
|
-
version='2.
|
|
6
|
+
version='2.7.1',
|
|
6
7
|
package_dir={'': 'src'}, # Specify the source directory
|
|
7
8
|
packages=find_packages(where='src'), # Look for packages in 'src'
|
|
8
9
|
install_requires=[
|
|
@@ -10,8 +11,11 @@ setup(
|
|
|
10
11
|
'pydantic[email]~=2.5',
|
|
11
12
|
'python-dateutil~=2.8',
|
|
12
13
|
'pytest~=7.1',
|
|
14
|
+
'google-cloud-logging~=3.10.0',
|
|
15
|
+
'google-cloud-error-reporting~=1.11.0',
|
|
16
|
+
'google-cloud-bigquery~=3.24.0'
|
|
13
17
|
],
|
|
14
18
|
author='Russlan Ramdowar',
|
|
15
|
-
description='Shared models for the Pulse platform project. Using AI for financial advisory and investment management.',
|
|
19
|
+
description='Shared Core models and Logger util for the Pulse platform project. Using AI for financial advisory and investment management.',
|
|
16
20
|
url='https://github.com/TheFutureEdge/ipulse_shared_core',
|
|
17
21
|
)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
from .models import ( UserAuth, UserProfile,
|
|
3
|
+
UserStatus, UserProfileUpdate)
|
|
4
|
+
|
|
5
|
+
from .enums import (TargetLogs,LogLevel, Status, Unit, Frequency,
|
|
6
|
+
Module, Domain, FinCoreCategory, FincCoreSubCategory,
|
|
7
|
+
FinCoreRecordsCategory, FinancialExchangeOrPublisher,
|
|
8
|
+
DataPrimaryCategory, DataState, DatasetScope,
|
|
9
|
+
DataSourceType,PipelineTriggerType,DataOperationType,
|
|
10
|
+
MatchConditionType, DuplicationHandling, DuplicationHandlingStatus,
|
|
11
|
+
CodingLanguage, ExecutionLocation, ExecutionComputeType,
|
|
12
|
+
CloudProvider,LoggingHandlers)
|
|
13
|
+
from .utils import (get_logger,
|
|
14
|
+
save_json_locally_extended,
|
|
15
|
+
write_json_to_cloud_storage_extended,
|
|
16
|
+
write_json_to_gcs_extended,
|
|
17
|
+
write_csv_to_gcs,
|
|
18
|
+
read_json_from_cloud_storage,
|
|
19
|
+
read_csv_from_gcs,
|
|
20
|
+
read_json_from_gcs,
|
|
21
|
+
check_format_against_schema_template,
|
|
22
|
+
create_bigquery_schema_from_json,
|
|
23
|
+
Pipelinemon, ContextLog)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
|
|
2
|
+
# pylint: disable=missing-module-docstring
|
|
3
|
+
# pylint: disable=missing-function-docstring
|
|
4
|
+
# pylint: disable=missing-class-docstring
|
|
5
|
+
|
|
6
|
+
from .enums_common_utils import (Status,
|
|
7
|
+
Unit,
|
|
8
|
+
Frequency)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
from .enums_modules import(Module,
|
|
12
|
+
Domain)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from .enums_module_fincore import (FinCoreCategory,
|
|
16
|
+
FincCoreSubCategory,
|
|
17
|
+
FinCoreRecordsCategory,
|
|
18
|
+
FinancialExchangeOrPublisher)
|
|
19
|
+
|
|
20
|
+
from .enums_logging import (TargetLogs,
|
|
21
|
+
LogLevel,
|
|
22
|
+
LoggingHandlers)
|
|
23
|
+
|
|
24
|
+
from .enums_data_eng import (DataPrimaryCategory,
|
|
25
|
+
DataState,
|
|
26
|
+
DatasetScope,
|
|
27
|
+
DataSourceType,
|
|
28
|
+
PipelineTriggerType,
|
|
29
|
+
DataOperationType,
|
|
30
|
+
MatchConditionType,
|
|
31
|
+
DuplicationHandling,
|
|
32
|
+
DuplicationHandlingStatus,
|
|
33
|
+
CodingLanguage,
|
|
34
|
+
ExecutionLocation,
|
|
35
|
+
ExecutionComputeType)
|
|
36
|
+
|
|
37
|
+
from .enums_solution_providers import (CloudProvider)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
|
|
2
|
+
# pylint: disable=missing-module-docstring
|
|
3
|
+
# pylint: disable=missing-function-docstring
|
|
4
|
+
# pylint: disable=missing-class-docstring
|
|
5
|
+
# pylint: disable=line-too-long
|
|
6
|
+
|
|
7
|
+
from enum import Enum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Status(Enum):
|
|
11
|
+
OPEN = "open"
|
|
12
|
+
ACKNOWLEDGED = "acknowledged"
|
|
13
|
+
ESCALATED = "escalated"
|
|
14
|
+
IN_PROGRESS = "in_progress"
|
|
15
|
+
IN_REVIEW = "in_review"
|
|
16
|
+
RESOLVED = "resolved"
|
|
17
|
+
IGNORED = "ignored"
|
|
18
|
+
CANCELLED = "cancelled"
|
|
19
|
+
CLOSED = "closed"
|
|
20
|
+
|
|
21
|
+
def __str__(self):
|
|
22
|
+
return self.value
|
|
23
|
+
|
|
24
|
+
### Exception during full exection, partially saved
|
|
25
|
+
# Exception during ensemble pipeline; modifications collected in local object , nothing persisted
|
|
26
|
+
# Exception during ensemble pipeline; modifications persisted , metadata failed
|
|
27
|
+
# Exception during ensemble pipeline; modifications persisted , metadata persisted
|
|
28
|
+
# Exception during ensemble pipeline; modifications persisted , metadata persisted
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Unit(Enum):
|
|
32
|
+
MIX="MIX"
|
|
33
|
+
# Currency and Financial Values
|
|
34
|
+
USD = "USD" # United States Dollar
|
|
35
|
+
EUR = "EUR" # Euro
|
|
36
|
+
JPY = "JPY" # Japanese Yen
|
|
37
|
+
GBP = "GBP" # British Pound Sterling
|
|
38
|
+
AUD = "AUD" # Australian Dollar
|
|
39
|
+
CAD = "CAD" # Canadian Dollar
|
|
40
|
+
CHF = "CHF" # Swiss Franc
|
|
41
|
+
CNY = "CNY" # Chinese Yuan Renminbi
|
|
42
|
+
SEK = "SEK" # Swedish Krona
|
|
43
|
+
NZD = "NZD" # New Zealand Dollar
|
|
44
|
+
MXN = "MXN" # Mexican Peso
|
|
45
|
+
SGD = "SGD" # Singapore Dollar
|
|
46
|
+
HKD = "HKD" # Hong Kong Dollar
|
|
47
|
+
NOK = "NOK" # Norwegian Krone
|
|
48
|
+
KRW = "KRW" # South Korean Won
|
|
49
|
+
RUB = "RUB" # Russian Ruble
|
|
50
|
+
INR = "INR" # Indian Rupee
|
|
51
|
+
BRL = "BRL" # Brazilian Real
|
|
52
|
+
ZAR = "ZAR" # South African Rand
|
|
53
|
+
CURRENCY = "currency" # General currency, when specific currency is not needed
|
|
54
|
+
|
|
55
|
+
# Stock Market and Investments
|
|
56
|
+
SHARES = "shares" # Number of shares
|
|
57
|
+
PERCENT = "prcnt" # Percentage, used for rates and ratios
|
|
58
|
+
BPS = "bps" # Basis points, often used for interest rates and financial ratios
|
|
59
|
+
|
|
60
|
+
# Volume and Quantitative Measurements
|
|
61
|
+
VOLUME = "volume" # Trading volume in units
|
|
62
|
+
MILLIONS = "mills" # Millions, used for large quantities or sums
|
|
63
|
+
BILLIONS = "bills" # Billions, used for very large quantities or sums
|
|
64
|
+
|
|
65
|
+
# Commodity Specific Units
|
|
66
|
+
BARRELS = "barrels" # Barrels, specifically for oil and similar liquids
|
|
67
|
+
TONNES = "tonnes" # Tonnes, for bulk materials like metals or grains
|
|
68
|
+
TROY_OUNCES = "troy_oz" # Troy ounces, specifically for precious metals
|
|
69
|
+
|
|
70
|
+
# Real Estate and Physical Properties
|
|
71
|
+
SQUARE_FEET = "sq_ft" # Square feet, for area measurement in real estate
|
|
72
|
+
METER_SQUARE = "m2" # Square meters, for area measurement in real estate
|
|
73
|
+
ACRES = "acres" # Acres, used for measuring large plots of land
|
|
74
|
+
|
|
75
|
+
# Miscellaneous and Other Measures
|
|
76
|
+
UNITS = "units" # Generic units, applicable when other specific units are not suitable
|
|
77
|
+
COUNT = "count" # Count, used for tallying items or events
|
|
78
|
+
INDEX_POINTS = "index_pnts" # Index points, used in measuring indices like stock market indices
|
|
79
|
+
RATIO = "ratio" # Ratio, for various financial ratios
|
|
80
|
+
|
|
81
|
+
def __str__(self):
|
|
82
|
+
return self.value
|
|
83
|
+
|
|
84
|
+
class Frequency(Enum):
|
|
85
|
+
ONE_MIN = "1min"
|
|
86
|
+
FIVE_MIN="5min"
|
|
87
|
+
FIFTEEN_MIN="15min"
|
|
88
|
+
THIRTY_MIN = "30min"
|
|
89
|
+
ONE_H = "1h"
|
|
90
|
+
TWO_H = "2h"
|
|
91
|
+
SIX_H = "6h"
|
|
92
|
+
TWELVE_H = "12h"
|
|
93
|
+
FOUR_H = "4h"
|
|
94
|
+
EOD="eod"
|
|
95
|
+
ONE_D = "1d"
|
|
96
|
+
TWO_D = "2d"
|
|
97
|
+
THREE_D = "3d"
|
|
98
|
+
ONE_W = "1w"
|
|
99
|
+
ONE_M = "1m"
|
|
100
|
+
TWO_M="2m"
|
|
101
|
+
THREE_M="3m"
|
|
102
|
+
SIX_M="6m"
|
|
103
|
+
ONE_Y="1y"
|
|
104
|
+
THREE_Y="3y"
|
|
105
|
+
|
|
106
|
+
def __str__(self):
|
|
107
|
+
return self.value
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=missing-class-docstring
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DataPrimaryCategory(Enum):
|
|
8
|
+
SIMULATION="simulation" # Simulation data, based on models and simulations
|
|
9
|
+
HISTORIC = "historic" # Historical data, usually accurate and complete
|
|
10
|
+
REALTIME="realtime" # Real-time data, not always certain, can have error
|
|
11
|
+
ANALYTICS="analytics" # Analytical data and modelling, derived from historical and prediction data. Normally shall be making Human readable sense. vs. Features
|
|
12
|
+
FEATURES="features" # Feature data, used for training models
|
|
13
|
+
PREDICTIVE="predictive" # Predictive data, based on models and simulations
|
|
14
|
+
|
|
15
|
+
def __str__(self):
|
|
16
|
+
return self.value
|
|
17
|
+
|
|
18
|
+
class DataState(Enum):
|
|
19
|
+
RAW = "raw"
|
|
20
|
+
FORMATTED= "formatted"
|
|
21
|
+
CLEANED = "cleaned"
|
|
22
|
+
PROCESSED = "processed"
|
|
23
|
+
SIMULATED = "simulated"
|
|
24
|
+
ANALYZED = "analyzed"
|
|
25
|
+
VALIDATED = "validated"
|
|
26
|
+
INVALID = "invalid"
|
|
27
|
+
|
|
28
|
+
def __str__(self):
|
|
29
|
+
return self.value
|
|
30
|
+
|
|
31
|
+
class DatasetScope(Enum):
|
|
32
|
+
FULL = "full_dataset"
|
|
33
|
+
INCREMENTAL = "incremental_dataset"
|
|
34
|
+
PARTIAL = "partial_dataset"
|
|
35
|
+
UNKNOWN = "unknown_dataset"
|
|
36
|
+
|
|
37
|
+
def __str__(self):
|
|
38
|
+
return self.value
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class DataSourceType(Enum):
|
|
42
|
+
# --- General ---
|
|
43
|
+
API = "api"
|
|
44
|
+
RPC = "rpc"
|
|
45
|
+
GRPC = "grpc"
|
|
46
|
+
WEBSITE = "website"
|
|
47
|
+
# --SQL Databases--
|
|
48
|
+
ORACLE = "oracle"
|
|
49
|
+
POSTGRESQL = "postgresql"
|
|
50
|
+
SQLSERVER = "sqlserver"
|
|
51
|
+
MYSQL = "mysql"
|
|
52
|
+
BIGQUERY = "bigquery"
|
|
53
|
+
SNOWFLAKE = "snowflake"
|
|
54
|
+
REDSHIFT = "redshift"
|
|
55
|
+
ATHENA = "athena"
|
|
56
|
+
# --NOSQL Databases--
|
|
57
|
+
MONGODB = "mongodb"
|
|
58
|
+
REDIS = "redis"
|
|
59
|
+
CASSANDRA = "cassandra"
|
|
60
|
+
NEO4J = "neo4j"
|
|
61
|
+
FIRESTORE = "firestore"
|
|
62
|
+
DYNAMODB = "dynamodb"
|
|
63
|
+
# --NEWSQL Databases--
|
|
64
|
+
COCKROACHDB = "cockroachdb"
|
|
65
|
+
SPANNER = "spanner"
|
|
66
|
+
# --- Messaging ---
|
|
67
|
+
MESSAGING_KAFKA = "messaging_kafka"
|
|
68
|
+
MESSAGING_SQS = "messaging_sqs"
|
|
69
|
+
MESSAGING_PUBSUB = "messaging_pubsub"
|
|
70
|
+
# --- Real-time Communication ---
|
|
71
|
+
REALTIME_WEBSOCKET = "websocket"
|
|
72
|
+
# --- Notifications ---
|
|
73
|
+
NOTIFICATION_WEBHOOK = "webhook"
|
|
74
|
+
# --- Storage ---
|
|
75
|
+
LOCAL_STORAGE = "local_storage"
|
|
76
|
+
INMEMORY = "inmemory"
|
|
77
|
+
GCS = "gcs"
|
|
78
|
+
S3 = "s3"
|
|
79
|
+
AZURE_BLOB = "azure_blob"
|
|
80
|
+
HDFS = "hdfs"
|
|
81
|
+
# --- Files ---
|
|
82
|
+
FILE = "file"
|
|
83
|
+
FILE_CSV = "file_csv"
|
|
84
|
+
FILE_EXCEL = "file_excel"
|
|
85
|
+
FILE_JSON = "file_json"
|
|
86
|
+
FILE_PARQUET = "file_parquet"
|
|
87
|
+
FILE_ORC = "file_orc"
|
|
88
|
+
FILE_AVRO = "file_avro"
|
|
89
|
+
FILE_TEXT = "file_text"
|
|
90
|
+
FILE_IMAGE = "file_image"
|
|
91
|
+
FILE_VIDEO = "file_video"
|
|
92
|
+
FILE_AUDIO = "file_audio"
|
|
93
|
+
FILE_PDF = "file_pdf"
|
|
94
|
+
FILE_WORD = "file_word"
|
|
95
|
+
FILE_POWERPOINT = "file_powerpoint"
|
|
96
|
+
FILE_HTML = "file_html"
|
|
97
|
+
FILE_MARKDOWN = "file_markdown"
|
|
98
|
+
FILE_XML = "file_xml"
|
|
99
|
+
FILE_YAML = "file_yaml"
|
|
100
|
+
FILE_TOML = "file_toml"
|
|
101
|
+
FILE_OTHER = "file_other"
|
|
102
|
+
|
|
103
|
+
def __str__(self):
|
|
104
|
+
return self.value
|
|
105
|
+
|
|
106
|
+
class PipelineTriggerType(Enum):
|
|
107
|
+
MANUAL = "manual"
|
|
108
|
+
SCHEDULER = "scheduler"
|
|
109
|
+
SCHEDULER_MAIN = "scheduler_main"
|
|
110
|
+
SCHEDULER_FALLBACK = "scheduler_fallback"
|
|
111
|
+
SCHEDULER_RETRY = "scheduler_retry"
|
|
112
|
+
SCHEDULED_VERIFICATION = "scheduled_verification"
|
|
113
|
+
EVENT_GCS_UPLOAD= "event_gcs_upload"
|
|
114
|
+
EVENT_PUBSUB= "event_pubsub"
|
|
115
|
+
ANOTHER_PIPELINE = "another_pipeline"
|
|
116
|
+
|
|
117
|
+
def __str__(self):
|
|
118
|
+
return self.value
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class DataOperationType(Enum):
|
|
122
|
+
# --- Read operations ---
|
|
123
|
+
SOURCE="source" # For reading data from source
|
|
124
|
+
QUERY = "query" # For databases or systems that support queries
|
|
125
|
+
SCAN = "scan" # For reading all data sequentially (e.g., files)
|
|
126
|
+
READ= "read" # For general read operations
|
|
127
|
+
GET= "get" # For getting a single record
|
|
128
|
+
IMPORT = "import"
|
|
129
|
+
# --- Transform operations ---
|
|
130
|
+
TRANSFORM = "transform"
|
|
131
|
+
PREPROCESS = "preprocess"
|
|
132
|
+
ENRICH = "enrich"
|
|
133
|
+
JOIN = "join"
|
|
134
|
+
AGGREGATE = "aggregate"
|
|
135
|
+
FILTER = "filter"
|
|
136
|
+
SORT = "sort"
|
|
137
|
+
GROUP = "group"
|
|
138
|
+
# --- Write operations ---
|
|
139
|
+
POST= "post" # For creating new records
|
|
140
|
+
PUT= "put"
|
|
141
|
+
PATCH= "patch"
|
|
142
|
+
WRITE = "write"
|
|
143
|
+
WRITE_TO_FILE = "write_to_file"
|
|
144
|
+
APPEND = "append"
|
|
145
|
+
UPSERT = "upsert"
|
|
146
|
+
INSERT = "insert"
|
|
147
|
+
OVERWRITE = "overwrite"
|
|
148
|
+
INCREMENT = "increment"
|
|
149
|
+
UPDATE = "update"
|
|
150
|
+
DELETE = "delete"
|
|
151
|
+
EXPORT = "export"
|
|
152
|
+
COPY = "copy"
|
|
153
|
+
MERGE = "merge" ## For merging data, combines INSERT, UPDATE, DELETE operations
|
|
154
|
+
MERGE_UPSERT = "merge_upsert" ## For merging data, combines INSERT, UPDATE, DELETE operations
|
|
155
|
+
BIGQUERY_WRITE_APPEND = "bigquery_write_append" # For emptying table and writing data, specific to BIGQUERY
|
|
156
|
+
BIGQUERY_WRITE_TRUNCATE = "bigquery_write_truncate" #For writing data to empty table, fails if table not empty, specific to BIGQUERY
|
|
157
|
+
BIGQUERY_WRITE_EMPTY = "bigquery_write_empty" # For updating or inserting records
|
|
158
|
+
# --- Create operations ---
|
|
159
|
+
CREATE_TABLE = "create_table"
|
|
160
|
+
CREATE_DATABASE = "create_database"
|
|
161
|
+
CREATE_COLLECTION = "create_collection"
|
|
162
|
+
CREATE_INDEX = "create_index"
|
|
163
|
+
CREATE_SCHEMA = "create_schema"
|
|
164
|
+
CREATE_MODEL = "create_model"
|
|
165
|
+
CREATE_VIEW = "create_view"
|
|
166
|
+
# --- Alter operations ---
|
|
167
|
+
ALTER_TABLE = "alter_table"
|
|
168
|
+
ALTER_DATABASE = "alter_database"
|
|
169
|
+
ALTER_COLLECTION = "alter_collection"
|
|
170
|
+
ALTER_INDEX = "alter_index"
|
|
171
|
+
ALTER_SCHEMA = "alter_schema"
|
|
172
|
+
ALTER_MODEL = "alter_model"
|
|
173
|
+
ALTER_VIEW = "alter_view"
|
|
174
|
+
# --- Drop operations ---
|
|
175
|
+
DROP_TABLE = "drop_table"
|
|
176
|
+
DROP_DATABASE = "drop_database"
|
|
177
|
+
DROP_COLLECTION = "drop_collection"
|
|
178
|
+
DROP_INDEX = "drop_index"
|
|
179
|
+
DROP_SCHEMA = "drop_schema"
|
|
180
|
+
DROP_MODEL = "drop_model"
|
|
181
|
+
DROP_VIEW = "drop_view"
|
|
182
|
+
# --- Truncate operations ---
|
|
183
|
+
TRUNCATE_TABLE = "truncate_table"
|
|
184
|
+
TRUNCATE_COLLECTION = "truncate_collection"
|
|
185
|
+
|
|
186
|
+
def __str__(self):
|
|
187
|
+
return self.value
|
|
188
|
+
class MatchConditionType(Enum):
|
|
189
|
+
EXACT = "exact"
|
|
190
|
+
PREFIX = "prefix"
|
|
191
|
+
SUFFIX = "suffix"
|
|
192
|
+
CONTAINS = "contains"
|
|
193
|
+
REGEX = "regex"
|
|
194
|
+
IN_RANGE = "in_range"
|
|
195
|
+
NOT_IN_RANGE = "not_in_range"
|
|
196
|
+
GREATER_THAN = "greater_than"
|
|
197
|
+
LESS_THAN = "less_than"
|
|
198
|
+
GREATER_THAN_OR_EQUAL = "greater_than_or_equal"
|
|
199
|
+
LESS_THAN_OR_EQUAL = "less_than_or_equal"
|
|
200
|
+
IN_LIST = "in_list"
|
|
201
|
+
NOT_IN_LIST = "not_in_list"
|
|
202
|
+
ON_FIELD_MATCH = "on_field_match"
|
|
203
|
+
ON_FIELD_EQUAL = "on_field_equal"
|
|
204
|
+
ON_FIELDS_EQUAL_TO = "on_fields_equal_to"
|
|
205
|
+
ON_FIELDS_COMBINATION = "on_fields_combination"
|
|
206
|
+
NOT_APPLICABLE = "not_applicable"
|
|
207
|
+
|
|
208
|
+
def __str__(self):
|
|
209
|
+
return self.value
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class DuplicationHandling(Enum):
|
|
213
|
+
RAISE_ERROR = "raise_error"
|
|
214
|
+
OVERWRITE = "overwrite"
|
|
215
|
+
INCREMENT = "increment"
|
|
216
|
+
SKIP = "skip"
|
|
217
|
+
SYSTEM_DEFAULT = "system_default"
|
|
218
|
+
ALLOW = "allow" ## applicable for databases allowing this operation i.e. BigQuery
|
|
219
|
+
MERGE_DEFAULT = "merge_default"
|
|
220
|
+
MERGE_PRESERVE_SOURCE_ON_DUPLICATES = "merge_preserve_source_on_dups"
|
|
221
|
+
MERGE_PRESERVE_TARGET_ON_DUPLICATES = "merge_preserve_target_on_dups"
|
|
222
|
+
MERGE_PRESERVE_BOTH_ON_DUPLICATES = "merge_preserve_both_on_dups"
|
|
223
|
+
MERGE_RAISE_ERROR_ON_DUPLICATES = "merge_raise_error_on_dups"
|
|
224
|
+
MERGE_CUSTOM = "merge_custom"
|
|
225
|
+
|
|
226
|
+
def __str__(self):
|
|
227
|
+
return self.value
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class DuplicationHandlingStatus(Enum):
|
|
231
|
+
ALLOWED = "allowed"
|
|
232
|
+
RAISED_ERROR = "raised_error"
|
|
233
|
+
SYSTEM_DEFAULT = "system_default"
|
|
234
|
+
OVERWRITTEN = "overwritten"
|
|
235
|
+
SKIPPED = "skipped"
|
|
236
|
+
INCREMENTED = "incremented"
|
|
237
|
+
OPERATION_CANCELLED = "operation_cancelled"
|
|
238
|
+
MERGED = "merged"
|
|
239
|
+
MERGED_PRESERVED_SOURCE = "merged_preserved_source"
|
|
240
|
+
MERGED_PRESERVED_TARGET = "merged_preserved_target"
|
|
241
|
+
MERGED_PRESERVED_BOTH = "merged_preserved_both"
|
|
242
|
+
MERGED_RAISED_ERROR = "merged_raised_error"
|
|
243
|
+
MERGED_CUSTOM = "merged_custom"
|
|
244
|
+
NO_DUPLICATES = "no_duplicates"
|
|
245
|
+
UNKNOWN = "unknown"
|
|
246
|
+
UNEXPECTED_ERROR= "unexpected_error"
|
|
247
|
+
CONDITIONAL_ERROR = "conditional_error"
|
|
248
|
+
NOT_APPLICABLE = "not_applicable"
|
|
249
|
+
|
|
250
|
+
def __str__(self):
|
|
251
|
+
return self.value
|
|
252
|
+
|
|
253
|
+
class CodingLanguage(Enum):
|
|
254
|
+
PYTHON = "python"
|
|
255
|
+
NODEJS = "nodejs"
|
|
256
|
+
JAVA = "java"
|
|
257
|
+
JAVASCRIPT = "javascript"
|
|
258
|
+
TYPESCRIPT = "typescript"
|
|
259
|
+
REACTJS = "reactjs"
|
|
260
|
+
|
|
261
|
+
def __str__(self):
|
|
262
|
+
return self.value
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class ExecutionLocation(Enum):
|
|
266
|
+
# Add local execution environments
|
|
267
|
+
LOCAL_SCRIPT = "local_script"
|
|
268
|
+
LOCAL_JUPYTER_NOTEBOOK = "local_jupyter_notebook"
|
|
269
|
+
LOCAL_SERVER = "local_server"
|
|
270
|
+
LOCAL_DOCKER = "local_docker" # Add local Docker environment
|
|
271
|
+
LOCAL_KUBERNETES = "local_kubernetes" # Add local Kubernetes environment
|
|
272
|
+
|
|
273
|
+
LOCAL_GCP_CLOUD_FUNCTION = "local_gcp_cloud_function"
|
|
274
|
+
LOCAL_GCP_CLOUD_RUN = "local_gcp_cloud_run"
|
|
275
|
+
|
|
276
|
+
# Add GCP execution environments
|
|
277
|
+
CLOUD_GCP_JUPYTER_NOTEBOOK = "cloud_gcp_jupyter_notebook"
|
|
278
|
+
CLOUD_GCP_CLOUD_FUNCTION = "cloud_gcp_cloud_function"
|
|
279
|
+
CLOUD_GCP_CLOUD_RUN = "cloud_gcp_cloud_run"
|
|
280
|
+
CLOUD_GCP_COMPUTE_ENGINE = "cloud_gcp_compute_engine"
|
|
281
|
+
CLOUD_GCP_DATAPROC = "cloud_gcp_dataproc"
|
|
282
|
+
CLOUD_GCP_DATAFLOW = "cloud_gcp_dataflow"
|
|
283
|
+
CLOUD_GCP_BIGQUERY = "cloud_gcp_bigquery"
|
|
284
|
+
# Add AWS execution environments
|
|
285
|
+
CLOUD_AWS_LAMBDA = "cloud_aws_lambda"
|
|
286
|
+
CLOUD_AWS_EC2 = "cloud_aws_ec2"
|
|
287
|
+
CLOUD_AWS_EMR = "cloud_aws_emr"
|
|
288
|
+
CLOUD_AWS_GLUE = "cloud_aws_glue"
|
|
289
|
+
CLOUD_AWS_ATHENA = "cloud_aws_athena"
|
|
290
|
+
CLOUD_AWS_REDSHIFT = "cloud_aws_redshift"
|
|
291
|
+
# Add Azure execution environments
|
|
292
|
+
CLOUD_AZURE_FUNCTIONS = "cloud_azure_functions"
|
|
293
|
+
CLOUD_AZURE_VIRTUAL_MACHINES = "cloud_azure_virtual_machines"
|
|
294
|
+
CLOUD_AZURE_SYNAPSE_ANALYTICS = "cloud_azure_synapse_analytics"
|
|
295
|
+
CLOUD_AZURE_DATA_FACTORY = "cloud_azure_data_factory"
|
|
296
|
+
|
|
297
|
+
def __str__(self):
|
|
298
|
+
return self.value
|
|
299
|
+
|
|
300
|
+
class ExecutionComputeType(Enum):
|
|
301
|
+
|
|
302
|
+
CPU_INTEL = "cpu_intel"
|
|
303
|
+
CPU_AMD = "cpu_amd"
|
|
304
|
+
CPU_ARM = "cpu_arm"
|
|
305
|
+
GPU_NVIDIA = "gpu_nvidia"
|
|
306
|
+
GPU_AMD = "gpu_amd"
|
|
307
|
+
GPU_INTEL = "gpu_intel"
|
|
308
|
+
TPU_GOOGLE = "tpu_google"
|
|
309
|
+
TPU_INTEL = "tpu_intel"
|
|
310
|
+
TPU_AMD = "tpu_amd"
|
|
311
|
+
|
|
312
|
+
def __str__(self):
|
|
313
|
+
return self.value
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# pylint: disable=missing-module-docstring
|
|
2
|
+
# pylint: disable=missing-function-docstring
|
|
3
|
+
# pylint: disable=missing-class-docstring
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class LoggingHandlers(Enum):
|
|
8
|
+
NONE = "none" # No remote handler
|
|
9
|
+
LOCAL_STREAM = "local_stream" # Local stream handler
|
|
10
|
+
GCP_CLOUD_LOGGING = "gcp_cloud_logging"
|
|
11
|
+
GCP_ERROR_REPORTING = "gcp_error_reporting"
|
|
12
|
+
GCP_FIREBASE = "gcp_firebase"
|
|
13
|
+
AWS_CLOUD_WATCH = "aws_cloud_watch"
|
|
14
|
+
AZURE_MONITOR = "azure_monitor"
|
|
15
|
+
AZURE_APPLICATION_INSIGHTS = "azure_application_insights"
|
|
16
|
+
IBM_LOG_ANALYTICS = "ibm_log_analytics"
|
|
17
|
+
ALIBABA_LOG_SERVICE = "alibaba_log_service"
|
|
18
|
+
LOGGLY = "loggly"
|
|
19
|
+
DATADOG = "datadog"
|
|
20
|
+
NEW_RELIC = "new_relic"
|
|
21
|
+
SENTRY = "sentry"
|
|
22
|
+
SUMOLOGIC = "sumologic"
|
|
23
|
+
# --- Other ---
|
|
24
|
+
SYSLOG = "syslog" # For system logs
|
|
25
|
+
CUSTOM = "custom" # For a user-defined remote handler
|
|
26
|
+
OTHER = "other"
|
|
27
|
+
|
|
28
|
+
def __str__(self):
|
|
29
|
+
return self.value
|
|
30
|
+
|
|
31
|
+
class TargetLogs(Enum):
|
|
32
|
+
MIXED="mixed_logs"
|
|
33
|
+
SUCCESSES = "success_logs"
|
|
34
|
+
NOTICES = "notice_logs"
|
|
35
|
+
SUCCESSES_AND_NOTICES = "succs_n_notc_logs"
|
|
36
|
+
WARNINGS = "warning_logs"
|
|
37
|
+
WARNINGS_AND_ERRORS = "warn_n_err_logs"
|
|
38
|
+
ERRORS = "error_logs"
|
|
39
|
+
|
|
40
|
+
def __str__(self):
|
|
41
|
+
return self.value
|
|
42
|
+
|
|
43
|
+
class LogLevel(Enum):
|
|
44
|
+
"""
|
|
45
|
+
Standardized notice levels for data engineering pipelines,
|
|
46
|
+
designed for easy analysis and identification of manual
|
|
47
|
+
intervention needs.
|
|
48
|
+
"""
|
|
49
|
+
DEBUG = 10 # Detailed debug information (for development/troubleshooting)
|
|
50
|
+
|
|
51
|
+
INFO = 100
|
|
52
|
+
INFO_REMOTE_PERSISTNACE_COMPLETE= 101
|
|
53
|
+
INFO_REMOTE_UPDATE_COMPLETE = 102
|
|
54
|
+
INFO_REMOTE_DELETE_COMPLETE = 103
|
|
55
|
+
|
|
56
|
+
INFO_REMOTE_BULK_PERSISTNACE_COMPLETE= 111
|
|
57
|
+
INFO_REMOTE_BULK_UPDATE_COMPLETE = 112
|
|
58
|
+
INFO_REMOTE_BULK_DELETE_COMPLETE = 113
|
|
59
|
+
|
|
60
|
+
INFO_LOCAL_PERSISTNACE_COMPLETE = 121
|
|
61
|
+
|
|
62
|
+
SUCCESS = 201
|
|
63
|
+
SUCCESS_WITH_NOTICES = 211
|
|
64
|
+
SUCCESS_WITH_WARNINGS = 212
|
|
65
|
+
|
|
66
|
+
NOTICE = 300 # Maybe same file or data already fully or partially exists
|
|
67
|
+
NOTICE_ALREADY_EXISTS = 301 # Data already exists, no action required
|
|
68
|
+
NOTICE_PARTIAL_EXISTS = 302 # Partial data exists, no action required
|
|
69
|
+
NOTICE_ACTION_CANCELLED = 303 # Data processing cancelled, no action required
|
|
70
|
+
|
|
71
|
+
# Warnings indicate potential issues that might require attention:
|
|
72
|
+
WARNING = 400 # General warning, no immediate action required
|
|
73
|
+
# WARNING_NO_ACTION = 401 # Minor issue or Unexpected Behavior, no immediate action required (can be logged frequently)
|
|
74
|
+
WARNING_REVIEW_RECOMMENDED = 402 # Action recommended to prevent potential future issues
|
|
75
|
+
WARNING_FIX_RECOMMENDED = 403 # Action recommended to prevent potential future issues
|
|
76
|
+
WARNING_FIX_REQUIRED = 404 # Action required, pipeline can likely continue
|
|
77
|
+
|
|
78
|
+
ERROR = 500 # General error, no immediate action required
|
|
79
|
+
|
|
80
|
+
ERROR_EXCEPTION = 501
|
|
81
|
+
ERROR_CUSTOM = 502 # Temporary error, automatic retry likely to succeed
|
|
82
|
+
|
|
83
|
+
ERROR_OPERATION_PARTIALLY_FAILED = 511 # Partial or full failure, manual intervention required
|
|
84
|
+
ERROR_OPERATION_FAILED = 512 # Operation failed, manual intervention required
|
|
85
|
+
ERORR_OPERATION_WITH_WARNINGS = 513 # Partial or full failure, manual intervention required
|
|
86
|
+
ERORR_OPERATION_WITH_ERRORS = 514 # Partial or full failure, manual intervention required
|
|
87
|
+
ERORR_OPERATION_WITH_WARNINGS_OR_ERRORS = 515 # Partial or full failure, manual intervention required
|
|
88
|
+
|
|
89
|
+
ERROR_PERSISTANCE_FAILED = 522 # Data persistance failed, manual intervention required
|
|
90
|
+
ERROR_UPDATE_FAILED = 523 # Data update failed, manual intervention required
|
|
91
|
+
ERROR_DELETE_FAILED = 524 # Data deletion failed, manual intervention required
|
|
92
|
+
ERROR_PERSISTANCE_WITH_ERRORS = 525 # Data persistance failed, manual intervention required
|
|
93
|
+
ERROR_UPDATE_WITH_ERRORS = 526 # Data update failed, manual intervention required
|
|
94
|
+
ERROR_DELETE_WITH_ERRORS = 527 # Data deletion failed, manual intervention required
|
|
95
|
+
|
|
96
|
+
ERROR_THRESHOLD_REACHED = 551
|
|
97
|
+
ERROR_PIPELINE_THRESHOLD_REACHED = 552 # Error due to threshold reached, no immediate action required
|
|
98
|
+
ERROR_SUBTHRESHOLD_REACHED = 553 # Error due to threshold reached, no immediate action required
|
|
99
|
+
ERROR_DATA_QUALITY_THRESHOLD_REACHED = 554 # Error due to threshold reached, no immediate action required
|
|
100
|
+
ERROR_METADATA_QUALITY_THRESHOLD_REACHED = 555 # Error due to threshold reached, no immediate action required
|
|
101
|
+
# Critical errors indicate severe failures requiring immediate attention:
|
|
102
|
+
CRITICAL=600 # General critical error, requires immediate action
|
|
103
|
+
CRITICAL_SYSTEM_FAILURE = 601 # System-level failure (e.g., infrastructure, stackoverflow ), requires immediate action
|
|
104
|
+
|
|
105
|
+
UNKNOWN=1001 # Unknown error, should not be used in normal operation
|
|
106
|
+
|
|
107
|
+
def __str__(self):
|
|
108
|
+
return self.value
|