logos-sdk 0.0.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ MIT License
2
+ Copyright (c) 2018 YOUR NAME
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+ The above copyright notice and this permission notice shall be included in all
10
+ copies or substantial portions of the Software.
11
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17
+ SOFTWARE.
@@ -0,0 +1,244 @@
1
+ Metadata-Version: 2.1
2
+ Name: logos-sdk
3
+ Version: 0.0.23
4
+ Summary: SDK for Logos platform
5
+ Home-page: https://bitbucket.org/databy/logos-sdk-pip/src/master/
6
+ Author: Databy.io
7
+ Author-email: admin@proficio.cz
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: requests
13
+ Requires-Dist: google-auth
14
+ Requires-Dist: google-cloud-logging
15
+ Requires-Dist: google-cloud-bigquery
16
+ Requires-Dist: python-dotenv
17
+ Requires-Dist: google-api-python-client
18
+ Requires-Dist: httplib2
19
+ Requires-Dist: pandas
20
+ Requires-Dist: db-dtypes
21
+ Requires-Dist: numpy
22
+
23
+ # Logos Software Development Kit
24
+
25
+ Logos Software Development Kit (Logos SDK) is a private library encapsulating functionality shared by Logos Scripts - configurable scripts running as Cloud Functions, triggered by Cloud Scheduler, which are controlled and monitored via the Logos UI, as a part of the Logos Ecosystem.
26
+
27
+ # Functionality modules
28
+
29
+ ## Logging
30
+ Cloud Function Scripts within the Logos ecosystem have special requirements for logging. Apart from simple JSON messages, the logs need to also carry information on:
31
+
32
+ - trace (used for tracking runs of Cloud Functions)
33
+ - labels (fields used for internal Logos filtering)
34
+ - logger name
35
+
36
+ The standard Python `logging` library does not offer the possibility of capturing a combination of these values in a format parsable by Google Cloud Logging - this is only possible through the `google-cloud-logging` library. On the other hand, `google-cloud-logging` transmits all logs to Google Cloud Logging even when a script is run on the local machine (with the exception of running it within the `functions-framework` environment), which is something to be avoided when developing locally or testing on Bitbucket, as we do not want to waste our resources.
37
+
38
+ This module serves as a wrapper over the standard Python `logging` and `google-cloud-logging` libraries, with the default being set to `google-cloud-logging`. The switch is based on the presence of environment variables `DEVELOPMENT` or `TESTING`: if either of these is set within the venv environment, `logging` library is used instead.
39
+ If deploying to development instance in Cloud, we want the logs to be logged into Cloud, but not to clash with the production instance, therefore setting the environment variable `CLOUD_DEVELOPMENT` ensures that the logs are logged under name `logos-logging-development` instead of the standard `logos-logging`.
40
+
41
+ ```bash
42
+ export DEVELOPMENT=True
43
+ #or
44
+ export TESTING=True
45
+ #or
46
+ export CLOUD_DEVELOPMENT=True
47
+ ```
48
+
49
+ ### Usage
50
+
51
+ To set up the Logger, one can use the parsing function `setup_from_request`, which expects a Cloud Function Script trigger request in a usual format:
52
+
53
+ ```python
54
+ import functions_framework
55
+ import logos_sdk.logging as logos_logging
56
+
57
+ @functions_framework.http
58
+ def run(request):
59
+ logger, labels, settings, secrets = logos_logging.setup_from_request(request, "logos-logging")
60
+ ```
61
+ ```
62
+ EXPECTED_REQUEST_JSON_BODY = {
63
+ "settings": {},
64
+ "id": "",
65
+ "author": "",
66
+ "script": "",
67
+ "client": "",
68
+ "accesses": [
69
+ {
70
+ "account": {"id": "", "account_platform_id": ""},
71
+ "secret": {"id": "", "name": ""},
72
+ "platform": {"id": "", "short_name": ""}
73
+ },
74
+ ...
75
+ ],
76
+ }
77
+ ```
78
+
79
+ The trigger request can be also processed separately, in which case the logger is instantiated from the class itself:
80
+
81
+ ```python
82
+ LogosLogger(name="logos-logging", labels={}, trace="")`
83
+ ```
84
+
85
+ ### Output
86
+ When the standard Python `logging` library is used, the output logs have the following structure:
87
+
88
+ ```python
89
+ Lowest possible severity: INFO
90
+
91
+ String output: SEVERITY:logger-name:message
92
+
93
+ LogEntry output:
94
+ LogEntry.name = "logos-logging"
95
+ LogEntry.msg = {"message": ""}
96
+ LogEntry.level = 40 | ...
97
+ LogEntry.levelname = INFO | ERROR | ...
98
+ LogEntry.json_fields = {
99
+ "logging.googleapis.com/trace": trace,
100
+ "logging.googleapis.com/labels": {
101
+ "log_type": RESULT | NOTIFICATION | DEBUG,
102
+ **labels,
103
+ },
104
+ }
105
+ ```
106
+
107
+ With `google-cloud-logging`, the output logs should have the following structure:
108
+
109
+ ```python
110
+ Lowest possible severity: INFO
111
+ {
112
+ "logName": "projects/logos-382010/logs/logos-logging",
113
+ "trace": "projects/logos-382010/traces/some-trace",
114
+ "severity": INFO | ERROR | ...,
115
+ "jsonPayload": { "message": "", ... },
116
+ "labels": {
117
+ "log_type": RESULT | NOTIFICATION | DEBUG,
118
+ "id": "Logos id of the job",
119
+ "author": "Logos id of the job author",
120
+ "client": "Logos id of the client",
121
+ "script": "Logos id of the script",
122
+ **platform_accounts,
123
+ }
124
+ }
125
+ ```
126
+
127
+ Depending on the `platform_accounts` the Cloud Function Script accesses during its run, `labels` might also contain Logos platform `short_name` field as a key and Logo `account_platform_id` as a value, for example:
128
+
129
+ ```python
130
+ {
131
+ "merchant-center": "xxxxxxx",
132
+ "google-ads": "xxx-xxx-xxxx",
133
+ }
134
+ ```
135
+
136
+ To sum it up, an example of the complete labels contents would be:
137
+
138
+ ```python
139
+ {
140
+ "labels": {
141
+ "log_type": "result",
142
+ "id": "0",
143
+ "author": "0",
144
+ "client": "0",
145
+ "script": "0",
146
+ "merchant-center": "0000000",
147
+ "google-ads": "000-000-0000"
148
+ }
149
+ }
150
+ ```
151
+
152
+ ## Services
153
+ This module serves as a wrapper over communicating with Logos Services. When `DEVELOPMENT`, `TESTING` or `CLOUD_DEVELOPMENT` environment variable is set
154
+
155
+ ```bash
156
+ export DEVELOPMENT=True
157
+ #or
158
+ export TESTING=True
159
+ #or
160
+ export CLOUD_DEVELOPMENT=True
161
+ ```
162
+
163
+ the URLs of development services (bound to the development branches in BitBucket) are used. On the other hand, if none of these is set, production URLs are called (used in the Cloud production environment).
164
+
165
+ # Installation as a dependency to Logos Scripts
166
+
167
+ ## Local environment
168
+
169
+ This library is hosted as a private package in Google Artifact Registry, only accessible to authorised users - to be able to include it as a pip dependency in a Logos Script requirements.txt file, one must ensure that `google-auth`, `keyring` and `keyrings.google-artifactregistry-auth` packages were pre-installed. These libraries are able to read the contents of `GOOGLE_APPLICATION_CREDENTIALS` environment variable and pull the information necessary for authenticating to Google Artifact Registry when fetching the `logos-sdk` package from the extra index URL.
170
+
171
+ To sum it up, the required steps for installing `logos-sdk` as a dependency are:
172
+
173
+ 1. In a Logos Script requirements.txt, put the following:
174
+ ```bash
175
+ --extra-index-url https://europe-west1-python.pkg.dev/logos-382010/logos-sdk/simple logos-sdk
176
+
177
+ logos-sdk=[desired version]
178
+ ```
179
+ 2. Then in your terminal with the venv active, run following:
180
+ ```bash
181
+ export GOOGLE_APPLICATION_CREDENTIALS=logos-382010-ee0fd6995649.json
182
+ pip3 install -r requirements.txt
183
+ export DEVELOPMENT=True
184
+ ```
185
+
186
+ To verify that the library was successfully installed, you can view the installed package information via:
187
+
188
+ ```bash
189
+ pip3 show -f logos-sdk
190
+ ```
191
+
192
+ ## BitBucket pipelines environment
193
+
194
+ Firstly, read the local environment setting. If "been there, done that", the setting for BitBucket pipeline testing is very similar. At first, go to pipeline variables in BitBucket UI and create a variable `GOOGLE_CREDENTIALS`, pasting the contents of the `logos-accessor` service account credentials file into it. Then your bitbucket-pipelines.yml testing step should looks like this:
195
+
196
+ ```
197
+ pipelines:
198
+ default:
199
+ - step:
200
+ name: Test
201
+ caches:
202
+ - pip
203
+ script:
204
+ - echo $GOOGLE_CREDENTIALS > logos-382010-ee0fd6995649.json
205
+ - export GOOGLE_APPLICATION_CREDENTIALS=$PWD/logos-382010-ee0fd6995649.json
206
+ - export TESTING=True
207
+ - pip3 install pytest google-auth keyring keyrings.google-artifactregistry-auth
208
+ - if [ -f requirements.txt ]; then pip3 install -r requirements.txt; fi
209
+ - python3 -m pytest -v tests/* --junitxml=test-reports/report.xml
210
+ ```
211
+
212
+ ## Google Cloud environment
213
+ Firstly, read the local environment setting. If "been there, done that", your Dockerfile should contain the following steps:
214
+
215
+ ```
216
+ RUN apt-get update && apt-get install -y git
217
+ RUN pip3 install google-auth keyring keyrings.google-artifactregistry-auth
218
+ RUN pip3 install --no-cache-dir -r requirements.txt
219
+ ```
220
+
221
+ In your cloudbuild.yaml file, the build step should contain the `--network=cloudbuild` parameter, as this ensures that the `keyring` auth libraries have access to the necessary credentials directly from the Cloud Build environment (we no longer need to set `GOOGLE_APPLICATION_CREDENTIALS` environment variable):
222
+
223
+ ```
224
+ - name: 'gcr.io/cloud-builders/docker'
225
+ args: [ 'build', '--network=cloudbuild', '-t', 'gcr.io/logos-382010/merchant-control', '.' ]
226
+ ```
227
+
228
+ If you are deploying a development cloud instance, in Cloud Run settings, `CLOUD_DEVELOPMENT`needs to be set:
229
+
230
+ ```bash
231
+ export CLOUD_DEVELOPMENT=True
232
+ ```
233
+
234
+
235
+ # Development & versioning
236
+ This is a versioned package and a version number can only be pushed once into Google Artifact Registry. Development of new features/refactor/debug follow the naming convention of:
237
+
238
+ ```
239
+ "feature/[short name]"
240
+ "refactor/[short name]"
241
+ "debug/[short name]"
242
+ ```
243
+
244
+ Pull requests are directed do the current development branch, which always bears the number of the newest version, for example `development-0.0.2`. After adding a major feature, or a number of less significant refactors and hot-fixes, the current development branch is merged into master and deployed into Google Artifact Registry. The old development branch is then deleted and a new branch with the following version `development-0.0.3` will be created, and the first commit must include changing of the version in the `setup.py` file.
@@ -0,0 +1,222 @@
1
+ # Logos Software Development Kit
2
+
3
+ Logos Software Development Kit (Logos SDK) is a private library encapsulating functionality shared by Logos Scripts - configurable scripts running as Cloud Functions, triggered by Cloud Scheduler, which are controlled and monitored via the Logos UI, as a part of the Logos Ecosystem.
4
+
5
+ # Functionality modules
6
+
7
+ ## Logging
8
+ Cloud Function Scripts within the Logos ecosystem have special requirements for logging. Apart from simple JSON messages, the logs need to also carry information on:
9
+
10
+ - trace (used for tracking runs of Cloud Functions)
11
+ - labels (fields used for internal Logos filtering)
12
+ - logger name
13
+
14
+ The standard Python `logging` library does not offer the possibility of capturing a combination of these values in a format parsable by Google Cloud Logging - this is only possible through the `google-cloud-logging` library. On the other hand, `google-cloud-logging` transmits all logs to Google Cloud Logging even when a script is run on the local machine (with the exception of running it within the `functions-framework` environment), which is something to be avoided when developing locally or testing on Bitbucket, as we do not want to waste our resources.
15
+
16
+ This module serves as a wrapper over the standard Python `logging` and `google-cloud-logging` libraries, with the default being set to `google-cloud-logging`. The switch is based on the presence of environment variables `DEVELOPMENT` or `TESTING`: if either of these is set within the venv environment, `logging` library is used instead.
17
+ If deploying to development instance in Cloud, we want the logs to be logged into Cloud, but not to clash with the production instance, therefore setting the environment variable `CLOUD_DEVELOPMENT` ensures that the logs are logged under name `logos-logging-development` instead of the standard `logos-logging`.
18
+
19
+ ```bash
20
+ export DEVELOPMENT=True
21
+ #or
22
+ export TESTING=True
23
+ #or
24
+ export CLOUD_DEVELOPMENT=True
25
+ ```
26
+
27
+ ### Usage
28
+
29
+ To set up the Logger, one can use the parsing function `setup_from_request`, which expects a Cloud Function Script trigger request in a usual format:
30
+
31
+ ```python
32
+ import functions_framework
33
+ import logos_sdk.logging as logos_logging
34
+
35
+ @functions_framework.http
36
+ def run(request):
37
+ logger, labels, settings, secrets = logos_logging.setup_from_request(request, "logos-logging")
38
+ ```
39
+ ```
40
+ EXPECTED_REQUEST_JSON_BODY = {
41
+ "settings": {},
42
+ "id": "",
43
+ "author": "",
44
+ "script": "",
45
+ "client": "",
46
+ "accesses": [
47
+ {
48
+ "account": {"id": "", "account_platform_id": ""},
49
+ "secret": {"id": "", "name": ""},
50
+ "platform": {"id": "", "short_name": ""}
51
+ },
52
+ ...
53
+ ],
54
+ }
55
+ ```
56
+
57
+ The trigger request can be also processed separately, in which case the logger is instantiated from the class itself:
58
+
59
+ ```python
60
+ LogosLogger(name="logos-logging", labels={}, trace="")`
61
+ ```
62
+
63
+ ### Output
64
+ When the standard Python `logging` library is used, the output logs have the following structure:
65
+
66
+ ```python
67
+ Lowest possible severity: INFO
68
+
69
+ String output: SEVERITY:logger-name:message
70
+
71
+ LogEntry output:
72
+ LogEntry.name = "logos-logging"
73
+ LogEntry.msg = {"message": ""}
74
+ LogEntry.level = 40 | ...
75
+ LogEntry.levelname = INFO | ERROR | ...
76
+ LogEntry.json_fields = {
77
+ "logging.googleapis.com/trace": trace,
78
+ "logging.googleapis.com/labels": {
79
+ "log_type": RESULT | NOTIFICATION | DEBUG,
80
+ **labels,
81
+ },
82
+ }
83
+ ```
84
+
85
+ With `google-cloud-logging`, the output logs should have the following structure:
86
+
87
+ ```python
88
+ Lowest possible severity: INFO
89
+ {
90
+ "logName": "projects/logos-382010/logs/logos-logging",
91
+ "trace": "projects/logos-382010/traces/some-trace",
92
+ "severity": INFO | ERROR | ...,
93
+ "jsonPayload": { "message": "", ... },
94
+ "labels": {
95
+ "log_type": RESULT | NOTIFICATION | DEBUG,
96
+ "id": "Logos id of the job",
97
+ "author": "Logos id of the job author",
98
+ "client": "Logos id of the client",
99
+ "script": "Logos id of the script",
100
+ **platform_accounts,
101
+ }
102
+ }
103
+ ```
104
+
105
+ Depending on the `platform_accounts` the Cloud Function Script accesses during its run, `labels` might also contain Logos platform `short_name` field as a key and Logo `account_platform_id` as a value, for example:
106
+
107
+ ```python
108
+ {
109
+ "merchant-center": "xxxxxxx",
110
+ "google-ads": "xxx-xxx-xxxx",
111
+ }
112
+ ```
113
+
114
+ To sum it up, an example of the complete labels contents would be:
115
+
116
+ ```python
117
+ {
118
+ "labels": {
119
+ "log_type": "result",
120
+ "id": "0",
121
+ "author": "0",
122
+ "client": "0",
123
+ "script": "0",
124
+ "merchant-center": "0000000",
125
+ "google-ads": "000-000-0000"
126
+ }
127
+ }
128
+ ```
129
+
130
+ ## Services
131
+ This module serves as a wrapper over communicating with Logos Services. When `DEVELOPMENT`, `TESTING` or `CLOUD_DEVELOPMENT` environment variable is set
132
+
133
+ ```bash
134
+ export DEVELOPMENT=True
135
+ #or
136
+ export TESTING=True
137
+ #or
138
+ export CLOUD_DEVELOPMENT=True
139
+ ```
140
+
141
+ the URLs of development services (bound to the development branches in BitBucket) are used. On the other hand, if none of these is set, production URLs are called (used in the Cloud production environment).
142
+
143
+ # Installation as a dependency to Logos Scripts
144
+
145
+ ## Local environment
146
+
147
+ This library is hosted as a private package in Google Artifact Registry, only accessible to authorised users - to be able to include it as a pip dependency in a Logos Script requirements.txt file, one must ensure that `google-auth`, `keyring` and `keyrings.google-artifactregistry-auth` packages were pre-installed. These libraries are able to read the contents of `GOOGLE_APPLICATION_CREDENTIALS` environment variable and pull the information necessary for authenticating to Google Artifact Registry when fetching the `logos-sdk` package from the extra index URL.
148
+
149
+ To sum it up, the required steps for installing `logos-sdk` as a dependency are:
150
+
151
+ 1. In a Logos Script requirements.txt, put the following:
152
+ ```bash
153
+ --extra-index-url https://europe-west1-python.pkg.dev/logos-382010/logos-sdk/simple logos-sdk
154
+
155
+ logos-sdk=[desired version]
156
+ ```
157
+ 2. Then in your terminal with the venv active, run following:
158
+ ```bash
159
+ export GOOGLE_APPLICATION_CREDENTIALS=logos-382010-ee0fd6995649.json
160
+ pip3 install -r requirements.txt
161
+ export DEVELOPMENT=True
162
+ ```
163
+
164
+ To verify that the library was successfully installed, you can view the installed package information via:
165
+
166
+ ```bash
167
+ pip3 show -f logos-sdk
168
+ ```
169
+
170
+ ## BitBucket pipelines environment
171
+
172
+ Firstly, read the local environment setting. If "been there, done that", the setting for BitBucket pipeline testing is very similar. At first, go to pipeline variables in BitBucket UI and create a variable `GOOGLE_CREDENTIALS`, pasting the contents of the `logos-accessor` service account credentials file into it. Then your bitbucket-pipelines.yml testing step should looks like this:
173
+
174
+ ```
175
+ pipelines:
176
+ default:
177
+ - step:
178
+ name: Test
179
+ caches:
180
+ - pip
181
+ script:
182
+ - echo $GOOGLE_CREDENTIALS > logos-382010-ee0fd6995649.json
183
+ - export GOOGLE_APPLICATION_CREDENTIALS=$PWD/logos-382010-ee0fd6995649.json
184
+ - export TESTING=True
185
+ - pip3 install pytest google-auth keyring keyrings.google-artifactregistry-auth
186
+ - if [ -f requirements.txt ]; then pip3 install -r requirements.txt; fi
187
+ - python3 -m pytest -v tests/* --junitxml=test-reports/report.xml
188
+ ```
189
+
190
+ ## Google Cloud environment
191
+ Firstly, read the local environment setting. If "been there, done that", your Dockerfile should contain the following steps:
192
+
193
+ ```
194
+ RUN apt-get update && apt-get install -y git
195
+ RUN pip3 install google-auth keyring keyrings.google-artifactregistry-auth
196
+ RUN pip3 install --no-cache-dir -r requirements.txt
197
+ ```
198
+
199
+ In your cloudbuild.yaml file, the build step should contain the `--network=cloudbuild` parameter, as this ensures that the `keyring` auth libraries have access to the necessary credentials directly from the Cloud Build environment (we no longer need to set `GOOGLE_APPLICATION_CREDENTIALS` environment variable):
200
+
201
+ ```
202
+ - name: 'gcr.io/cloud-builders/docker'
203
+ args: [ 'build', '--network=cloudbuild', '-t', 'gcr.io/logos-382010/merchant-control', '.' ]
204
+ ```
205
+
206
+ If you are deploying a development cloud instance, in Cloud Run settings, `CLOUD_DEVELOPMENT`needs to be set:
207
+
208
+ ```bash
209
+ export CLOUD_DEVELOPMENT=True
210
+ ```
211
+
212
+
213
+ # Development & versioning
214
+ This is a versioned package and a version number can only be pushed once into Google Artifact Registry. Development of new features/refactor/debug follow the naming convention of:
215
+
216
+ ```
217
+ "feature/[short name]"
218
+ "refactor/[short name]"
219
+ "debug/[short name]"
220
+ ```
221
+
222
+ Pull requests are directed do the current development branch, which always bears the number of the newest version, for example `development-0.0.2`. After adding a major feature, or a number of less significant refactors and hot-fixes, the current development branch is merged into master and deployed into Google Artifact Registry. The old development branch is then deleted and a new branch with the following version `development-0.0.3` will be created, and the first commit must include changing of the version in the `setup.py` file.
@@ -0,0 +1,4 @@
1
+ PRODUCTION = "PRODUCTION"
2
+ DEVELOPMENT = "DEVELOPMENT"
3
+ TESTING = "TESTING"
4
+ CLOUD_DEVELOPMENT = "CLOUD_DEVELOPMENT"
@@ -0,0 +1,152 @@
1
+ import os
2
+ from datetime import datetime
3
+
4
+ import google.auth.exceptions
5
+ from typing import List, Dict, Union, Optional
6
+
7
+ from google.api_core.exceptions import NotFound
8
+ from google.cloud.logging import Client as LoggerClient
9
+ from google.auth.exceptions import DefaultCredentialsError
10
+ from google.cloud.bigquery import Table
11
+ from google.cloud import bigquery
12
+ import numpy as np
13
+ import time
14
+
15
+ from google.api_core.retry import Retry
16
+
17
+ from logos_sdk.big_query import retry_on_not_found
18
+ from dotenv import load_dotenv
19
+
20
+
21
+ class BigQueryException(Exception):
22
+ def __init__(self, messages):
23
+ self.messages = messages
24
+
25
+
26
+ class BigQuery:
27
+ BQ_ROWS_LIMIT = 10000
28
+ _service = None
29
+
30
+ def __init__(self):
31
+ load_dotenv()
32
+ self.project_id = os.environ.get("PROJECT_ID")
33
+ self._service = bigquery.Client(project=self.project_id)
34
+ try:
35
+ self.logger = LoggerClient(_use_grpc=False).logger(name="logos-logging")
36
+ except DefaultCredentialsError:
37
+ self.logger = None
38
+
39
+ def get_dataset(self, dataset_id: str):
40
+ return self._service.get_dataset(dataset_id)
41
+
42
+ def create_dataset(self, dataset_id: str):
43
+ return self._service.create_dataset(dataset_id)
44
+
45
+ def delete_dataset(self, dataset_id: str):
46
+ self._service.delete_dataset(dataset_id, not_found_ok=True)
47
+
48
+ def _get_table_id_sql_format(self, dataset_id: str, table_id: str):
49
+ return f"{self.project_id}.{dataset_id}.{table_id}"
50
+
51
+ def run_query(self, query: str) -> List[Dict]:
52
+ df = self._service.query(query).result().to_dataframe().fillna(np.nan)
53
+ return df.replace([np.nan], [None]).to_dict("records")
54
+
55
+ def get_table(self, dataset_id: str, table_id: str) -> Table:
56
+ sql_format = self._get_table_id_sql_format(dataset_id, table_id)
57
+ return self._service.get_table(sql_format)
58
+
59
+ def insert_into_table(
60
+ self, dataset_id: str, table_id: str, records: List[Dict]
61
+ ) -> None:
62
+ bq_table = self.get_table(dataset_id, table_id)
63
+ self._insert_into_table(bq_table, records)
64
+
65
+ def insert_create_table(
66
+ self,
67
+ dataset_id: str,
68
+ table_id: str,
69
+ records: List[Dict],
70
+ schema_columns: List[Dict],
71
+ ) -> None:
72
+ bq_table = self.check_table_exists(dataset_id, table_id)
73
+ if bq_table is None:
74
+ bq_table = self.create_table(dataset_id, table_id, schema_columns)
75
+
76
+ self._insert_into_table(bq_table, records)
77
+
78
+ def delete_table(self, dataset_id: str, table_id: str) -> None:
79
+ if self.check_table_exists(dataset_id, table_id):
80
+ sql_format = self._get_table_id_sql_format(dataset_id, table_id)
81
+ self._service.delete_table(sql_format)
82
+
83
+ def check_table_exists(self, dataset_id: str, table_id: str) -> Optional[Table]:
84
+ try:
85
+ return self.get_table(dataset_id, table_id)
86
+ except google.cloud.exceptions.NotFound:
87
+ return None
88
+
89
+ def create_table(
90
+ self, dataset_id: str, table_id: str, schema_columns: List[Dict]
91
+ ) -> Union[bool, Table]:
92
+ table_schema = [
93
+ bigquery.schema.SchemaField(row["name"], row["col_type"], mode=row["mode"])
94
+ for row in schema_columns
95
+ ]
96
+ try:
97
+ sql_format = self._get_table_id_sql_format(dataset_id, table_id)
98
+ table_object = bigquery.Table(sql_format, schema=table_schema)
99
+ return self._service.create_table(table_object)
100
+ except google.cloud.exceptions.Conflict:
101
+ return False
102
+
103
+ def create_view(self, dataset_id, view_id, sql_string):
104
+ try:
105
+ sql_format = self._get_table_id_sql_format(dataset_id, view_id)
106
+ view = bigquery.Table(sql_format)
107
+ view.view_query = sql_string
108
+ return self._service.create_table(view)
109
+ except google.cloud.exceptions.Conflict:
110
+ return False
111
+
112
+ def get_table_last_modified_date(self, dataset_id: str, table_id: str):
113
+ last_modified_timestamp = (
114
+ self._service.query(
115
+ f"SELECT TIMESTAMP_MILLIS(last_modified_time) as time_stamp "
116
+ f"FROM `{self.project_id}.{dataset_id}.__TABLES__` "
117
+ f"WHERE table_id = '{table_id}'"
118
+ )
119
+ .result()
120
+ .to_dataframe()
121
+ )
122
+
123
+ if last_modified_timestamp.empty:
124
+ raise NotFound("Table does not exist!")
125
+
126
+ last_modified_timestamp = last_modified_timestamp["time_stamp"].iloc[0]
127
+ last_modified_date = datetime.strptime(
128
+ str(last_modified_timestamp), "%Y-%m-%d %H:%M:%S.%f+00:00"
129
+ )
130
+
131
+ return last_modified_date
132
+
133
+ @retry_on_not_found
134
+ def _insert_into_table(
135
+ self, bq_table: Table, records: List[Dict], attempts: int
136
+ ) -> None:
137
+ if len(records) > self.BQ_ROWS_LIMIT:
138
+ for index in range(0, len(records), self.BQ_ROWS_LIMIT):
139
+ errors = self._service.insert_rows(
140
+ bq_table,
141
+ records[index: (index + self.BQ_ROWS_LIMIT)],
142
+ retry=Retry(
143
+ total=2, connect=4, backoff_factor=2, allowed_methods=None
144
+ ),
145
+ )
146
+ time.sleep(2)
147
+ if errors:
148
+ raise BigQueryException(errors)
149
+ else:
150
+ errors = self._service.insert_rows(bq_table, records)
151
+ if errors:
152
+ raise BigQueryException(errors)
@@ -0,0 +1,22 @@
1
+ from functools import wraps
2
+ from google.api_core.exceptions import NotFound
3
+ import time
4
+
5
+ MAX_NUMBER_OF_ATTEMPTS = 2
6
+
7
+
8
+ def retry_on_not_found(wrapped_function):
9
+ """This decorator retry call when table is not found. Insert into newly created table often fails with error
10
+ because API probably needs few seconds to see new created table"""
11
+
12
+ @wraps(wrapped_function)
13
+ def inner(*args, **kwargs):
14
+ for i in range(1, MAX_NUMBER_OF_ATTEMPTS + 1):
15
+ try:
16
+ kwargs["attempts"] = i
17
+ return wrapped_function(*args, **kwargs)
18
+ # this is because all request share same service
19
+ except NotFound as err:
20
+ time.sleep(2)
21
+
22
+ return inner