garf-executors 0.2.0__tar.gz → 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {garf_executors-0.2.0 → garf_executors-1.1.3}/PKG-INFO +24 -7
- {garf_executors-0.2.0 → garf_executors-1.1.3}/README.md +8 -2
- garf_executors-1.1.3/garf/executors/__init__.py +25 -0
- garf_executors-1.1.3/garf/executors/api_executor.py +228 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/bq_executor.py +42 -32
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/config.py +4 -3
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/entrypoints/cli.py +61 -30
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/entrypoints/grpc_server.py +22 -9
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/entrypoints/server.py +80 -14
- garf_executors-1.1.3/garf/executors/entrypoints/tracer.py +82 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/execution_context.py +6 -5
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/executor.py +4 -4
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/fetchers.py +54 -5
- garf_executors-1.1.3/garf/executors/garf_pb2.py +51 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/garf_pb2_grpc.py +45 -2
- garf_executors-1.1.3/garf/executors/query_processor.py +79 -0
- garf_executors-0.2.0/garf_executors/__init__.py → garf_executors-1.1.3/garf/executors/setup.py +14 -16
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/sql_executor.py +24 -11
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/telemetry.py +1 -1
- garf_executors-1.1.3/garf/executors/workflows/__init__.py +0 -0
- garf_executors-1.1.3/garf/executors/workflows/gcp_workflow.yaml +49 -0
- garf_executors-1.1.3/garf/executors/workflows/workflow.py +164 -0
- garf_executors-1.1.3/garf/executors/workflows/workflow_runner.py +172 -0
- garf_executors-1.1.3/garf_executors/__init__.py +25 -0
- garf_executors-1.1.3/garf_executors/api_executor.py +25 -0
- garf_executors-1.1.3/garf_executors/bq_executor.py +25 -0
- garf_executors-1.1.3/garf_executors/config.py +25 -0
- garf_executors-1.1.3/garf_executors/entrypoints/__init__.py +25 -0
- garf_executors-1.1.3/garf_executors/entrypoints/cli.py +25 -0
- garf_executors-1.1.3/garf_executors/entrypoints/grcp_server.py +25 -0
- garf_executors-1.1.3/garf_executors/entrypoints/server.py +25 -0
- garf_executors-1.1.3/garf_executors/entrypoints/tracer.py +25 -0
- garf_executors-1.1.3/garf_executors/entrypoints/utils.py +25 -0
- garf_executors-1.1.3/garf_executors/exceptions.py +25 -0
- garf_executors-1.1.3/garf_executors/execution_context.py +25 -0
- garf_executors-1.1.3/garf_executors/executor.py +25 -0
- garf_executors-1.1.3/garf_executors/fetchers.py +25 -0
- garf_executors-1.1.3/garf_executors/sql_executor.py +25 -0
- garf_executors-1.1.3/garf_executors/telemetry.py +25 -0
- garf_executors-1.1.3/garf_executors/workflow.py +25 -0
- {garf_executors-0.2.0 → garf_executors-1.1.3}/garf_executors.egg-info/PKG-INFO +24 -7
- garf_executors-1.1.3/garf_executors.egg-info/SOURCES.txt +49 -0
- garf_executors-1.1.3/garf_executors.egg-info/entry_points.txt +2 -0
- {garf_executors-0.2.0 → garf_executors-1.1.3}/garf_executors.egg-info/requires.txt +17 -4
- {garf_executors-0.2.0 → garf_executors-1.1.3}/garf_executors.egg-info/top_level.txt +1 -0
- {garf_executors-0.2.0 → garf_executors-1.1.3}/pyproject.toml +27 -6
- garf_executors-0.2.0/garf_executors/api_executor.py +0 -125
- garf_executors-0.2.0/garf_executors/entrypoints/tracer.py +0 -42
- garf_executors-0.2.0/garf_executors/garf_pb2.py +0 -45
- garf_executors-0.2.0/garf_executors.egg-info/SOURCES.txt +0 -26
- garf_executors-0.2.0/garf_executors.egg-info/entry_points.txt +0 -2
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/entrypoints/__init__.py +0 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/entrypoints/utils.py +0 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/exceptions.py +0 -0
- {garf_executors-0.2.0 → garf_executors-1.1.3}/garf_executors.egg-info/dependency_links.txt +0 -0
- {garf_executors-0.2.0 → garf_executors-1.1.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.1.3
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -17,25 +17,36 @@ Classifier: Operating System :: OS Independent
|
|
|
17
17
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
18
|
Requires-Python: >=3.9
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist: garf-core
|
|
21
|
-
Requires-Dist: garf-io
|
|
20
|
+
Requires-Dist: garf-core>=1.0.0
|
|
21
|
+
Requires-Dist: garf-io>=1.0.0
|
|
22
22
|
Requires-Dist: pyyaml
|
|
23
23
|
Requires-Dist: pydantic
|
|
24
24
|
Requires-Dist: opentelemetry-api
|
|
25
25
|
Requires-Dist: opentelemetry-sdk
|
|
26
|
+
Requires-Dist: opentelemetry-exporter-otlp
|
|
26
27
|
Provides-Extra: bq
|
|
27
28
|
Requires-Dist: garf-io[bq]; extra == "bq"
|
|
28
29
|
Requires-Dist: pandas; extra == "bq"
|
|
29
30
|
Requires-Dist: google-cloud-logging; extra == "bq"
|
|
31
|
+
Requires-Dist: smart_open[gcs]; extra == "bq"
|
|
30
32
|
Provides-Extra: sql
|
|
31
33
|
Requires-Dist: garf-io[sqlalchemy]; extra == "sql"
|
|
32
34
|
Requires-Dist: pandas; extra == "sql"
|
|
35
|
+
Provides-Extra: gcp
|
|
36
|
+
Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
|
|
33
37
|
Provides-Extra: server
|
|
34
38
|
Requires-Dist: fastapi[standard]; extra == "server"
|
|
39
|
+
Requires-Dist: pydantic-settings; extra == "server"
|
|
35
40
|
Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
|
|
36
|
-
Requires-Dist:
|
|
41
|
+
Requires-Dist: typer; extra == "server"
|
|
42
|
+
Requires-Dist: grpcio-reflection; extra == "server"
|
|
43
|
+
Provides-Extra: tests
|
|
44
|
+
Requires-Dist: pytest; extra == "tests"
|
|
45
|
+
Requires-Dist: pytest-mock; extra == "tests"
|
|
46
|
+
Requires-Dist: pytest-xdist; extra == "tests"
|
|
47
|
+
Requires-Dist: pytest-grpc; extra == "tests"
|
|
37
48
|
Provides-Extra: all
|
|
38
|
-
Requires-Dist: garf-executors[bq,server,sql]; extra == "all"
|
|
49
|
+
Requires-Dist: garf-executors[bq,gcp,server,sql]; extra == "all"
|
|
39
50
|
|
|
40
51
|
# `garf-executors` - One stop-shop for interacting with Reporting APIs.
|
|
41
52
|
|
|
@@ -64,8 +75,14 @@ garf <QUERIES> --source <API_SOURCE> \
|
|
|
64
75
|
where
|
|
65
76
|
|
|
66
77
|
* `<QUERIES>`- local or remote path(s) to files with queries.
|
|
67
|
-
*
|
|
68
|
-
*
|
|
78
|
+
* `source`- type of API to use. Based on that the appropriate report fetcher will be initialized. Explore supported APIs [here](https://google.github.io/garf/fetchers/overview/)
|
|
79
|
+
* `output` - output supported by [`garf-io` library](https://google.github.io/garf/usage/writers/).
|
|
69
80
|
|
|
70
81
|
If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
|
|
71
82
|
> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.
|
|
83
|
+
|
|
84
|
+
## Documentation
|
|
85
|
+
|
|
86
|
+
Explore full documentation working with `garf-executors`
|
|
87
|
+
|
|
88
|
+
* [Documentation](https://google.github.io/garf/usage/executors/)
|
|
@@ -25,8 +25,14 @@ garf <QUERIES> --source <API_SOURCE> \
|
|
|
25
25
|
where
|
|
26
26
|
|
|
27
27
|
* `<QUERIES>`- local or remote path(s) to files with queries.
|
|
28
|
-
*
|
|
29
|
-
*
|
|
28
|
+
* `source`- type of API to use. Based on that the appropriate report fetcher will be initialized. Explore supported APIs [here](https://google.github.io/garf/fetchers/overview/)
|
|
29
|
+
* `output` - output supported by [`garf-io` library](https://google.github.io/garf/usage/writers/).
|
|
30
30
|
|
|
31
31
|
If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
|
|
32
32
|
> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.
|
|
33
|
+
|
|
34
|
+
## Documentation
|
|
35
|
+
|
|
36
|
+
Explore full documentation working with `garf-executors`
|
|
37
|
+
|
|
38
|
+
* [Documentation](https://google.github.io/garf/usage/executors/)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Copyright 2025 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Executors to fetch data from various APIs."""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from garf.executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
'ApiQueryExecutor',
|
|
22
|
+
'ApiExecutionContext',
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
__version__ = '1.1.3'
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Copyright 2024 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# https://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Module for executing Garf queries and writing them to local/remote.
|
|
15
|
+
|
|
16
|
+
ApiQueryExecutor performs fetching data from API in a form of
|
|
17
|
+
GarfReport and saving it to local/remote storage.
|
|
18
|
+
"""
|
|
19
|
+
# pylint: disable=C0330, g-bad-import-order, g-multiple-import
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
import pathlib
|
|
25
|
+
|
|
26
|
+
from garf.core import report_fetcher, simulator
|
|
27
|
+
from garf.executors import (
|
|
28
|
+
exceptions,
|
|
29
|
+
execution_context,
|
|
30
|
+
executor,
|
|
31
|
+
fetchers,
|
|
32
|
+
query_processor,
|
|
33
|
+
)
|
|
34
|
+
from garf.executors.telemetry import tracer
|
|
35
|
+
from opentelemetry import metrics, trace
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
meter = metrics.get_meter('garf.executors')
|
|
39
|
+
|
|
40
|
+
api_counter = meter.create_counter(
|
|
41
|
+
'garf_api_execute_total',
|
|
42
|
+
unit='1',
|
|
43
|
+
description='Counts number of API executions',
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ApiExecutionContext(execution_context.ExecutionContext):
|
|
48
|
+
"""Common context for executing one or more queries."""
|
|
49
|
+
|
|
50
|
+
writer: str | list[str] = 'console'
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ApiQueryExecutor(executor.Executor):
|
|
54
|
+
"""Gets data from API and writes them to local/remote storage.
|
|
55
|
+
|
|
56
|
+
Attributes:
|
|
57
|
+
api_client: a client used for connecting to API.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
fetcher: report_fetcher.ApiReportFetcher,
|
|
63
|
+
report_simulator: simulator.ApiReportSimulator | None = None,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Initializes ApiQueryExecutor.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
fetcher: Instantiated report fetcher.
|
|
69
|
+
report_simulator: Instantiated simulator.
|
|
70
|
+
"""
|
|
71
|
+
self.fetcher = fetcher
|
|
72
|
+
self.simulator = report_simulator
|
|
73
|
+
super().__init__(
|
|
74
|
+
preprocessors=self.fetcher.preprocessors,
|
|
75
|
+
postprocessors=self.fetcher.postprocessors,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def from_fetcher_alias(
|
|
80
|
+
cls,
|
|
81
|
+
source: str,
|
|
82
|
+
fetcher_parameters: dict[str, str] | None = None,
|
|
83
|
+
enable_cache: bool = False,
|
|
84
|
+
cache_ttl_seconds: int = 3600,
|
|
85
|
+
) -> ApiQueryExecutor:
|
|
86
|
+
if not fetcher_parameters:
|
|
87
|
+
fetcher_parameters = {}
|
|
88
|
+
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
89
|
+
return ApiQueryExecutor(
|
|
90
|
+
fetcher=concrete_api_fetcher(
|
|
91
|
+
**fetcher_parameters,
|
|
92
|
+
enable_cache=enable_cache,
|
|
93
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
94
|
+
)
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
@tracer.start_as_current_span('api.execute')
|
|
98
|
+
def execute(
|
|
99
|
+
self,
|
|
100
|
+
query: str,
|
|
101
|
+
title: str,
|
|
102
|
+
context: ApiExecutionContext,
|
|
103
|
+
) -> str:
|
|
104
|
+
"""Reads query, extract results and stores them in a specified location.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
query: Location of the query.
|
|
108
|
+
title: Name of the query.
|
|
109
|
+
context: Query execution context.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Result of writing the report.
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
GarfExecutorError: When failed to execute query.
|
|
116
|
+
"""
|
|
117
|
+
if self.simulator:
|
|
118
|
+
return self.simulate(query=query, title=title, context=context)
|
|
119
|
+
context = query_processor.process_gquery(context)
|
|
120
|
+
span = trace.get_current_span()
|
|
121
|
+
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
122
|
+
span.set_attribute(
|
|
123
|
+
'api.client.class', self.fetcher.api_client.__class__.__name__
|
|
124
|
+
)
|
|
125
|
+
try:
|
|
126
|
+
span.set_attribute('query.title', title)
|
|
127
|
+
span.set_attribute('query.text', query)
|
|
128
|
+
logger.debug('starting query %s', query)
|
|
129
|
+
title = pathlib.Path(title).name.split('.')[0]
|
|
130
|
+
api_counter.add(
|
|
131
|
+
1, {'api.client.class': self.fetcher.api_client.__class__.__name__}
|
|
132
|
+
)
|
|
133
|
+
results = self.fetcher.fetch(
|
|
134
|
+
query_specification=query,
|
|
135
|
+
args=context.query_parameters,
|
|
136
|
+
title=title,
|
|
137
|
+
**context.fetcher_parameters,
|
|
138
|
+
)
|
|
139
|
+
writer_clients = context.writer_clients
|
|
140
|
+
if not writer_clients:
|
|
141
|
+
logger.warning('No writers configured, skipping write operation')
|
|
142
|
+
return None
|
|
143
|
+
writing_results = []
|
|
144
|
+
for writer_client in writer_clients:
|
|
145
|
+
logger.debug(
|
|
146
|
+
'Start writing data for query %s via %s writer',
|
|
147
|
+
title,
|
|
148
|
+
type(writer_client),
|
|
149
|
+
)
|
|
150
|
+
result = writer_client.write(results, title)
|
|
151
|
+
logger.debug(
|
|
152
|
+
'Finish writing data for query %s via %s writer',
|
|
153
|
+
title,
|
|
154
|
+
type(writer_client),
|
|
155
|
+
)
|
|
156
|
+
writing_results.append(result)
|
|
157
|
+
logger.info('%s executed successfully', title)
|
|
158
|
+
# Return the last writer's result for backward compatibility
|
|
159
|
+
return writing_results[-1] if writing_results else None
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error('%s generated an exception: %s', title, str(e))
|
|
162
|
+
raise exceptions.GarfExecutorError(
|
|
163
|
+
'%s generated an exception: %s', title, str(e)
|
|
164
|
+
) from e
|
|
165
|
+
|
|
166
|
+
@tracer.start_as_current_span('api.simulate')
|
|
167
|
+
def simulate(
|
|
168
|
+
self,
|
|
169
|
+
query: str,
|
|
170
|
+
title: str,
|
|
171
|
+
context: ApiExecutionContext,
|
|
172
|
+
) -> str:
|
|
173
|
+
"""Reads query, simulates results and stores them in a specified location.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
query: Location of the query.
|
|
177
|
+
title: Name of the query.
|
|
178
|
+
context: Query execution context.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Result of writing the report.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
GarfExecutorError: When failed to execute query.
|
|
185
|
+
"""
|
|
186
|
+
context = query_processor.process_gquery(context)
|
|
187
|
+
span = trace.get_current_span()
|
|
188
|
+
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
189
|
+
span.set_attribute(
|
|
190
|
+
'api.client.class', self.fetcher.api_client.__class__.__name__
|
|
191
|
+
)
|
|
192
|
+
try:
|
|
193
|
+
span.set_attribute('query.title', title)
|
|
194
|
+
span.set_attribute('query.text', query)
|
|
195
|
+
logger.debug('starting query %s', query)
|
|
196
|
+
title = pathlib.Path(title).name.split('.')[0]
|
|
197
|
+
results = self.simulator.simulate(
|
|
198
|
+
query_specification=query,
|
|
199
|
+
args=context.query_parameters,
|
|
200
|
+
title=title,
|
|
201
|
+
**context.fetcher_parameters,
|
|
202
|
+
)
|
|
203
|
+
writer_clients = context.writer_clients
|
|
204
|
+
if not writer_clients:
|
|
205
|
+
logger.warning('No writers configured, skipping write operation')
|
|
206
|
+
return None
|
|
207
|
+
writing_results = []
|
|
208
|
+
for writer_client in writer_clients:
|
|
209
|
+
logger.debug(
|
|
210
|
+
'Start writing data for query %s via %s writer',
|
|
211
|
+
title,
|
|
212
|
+
type(writer_client),
|
|
213
|
+
)
|
|
214
|
+
result = writer_client.write(results, title)
|
|
215
|
+
logger.debug(
|
|
216
|
+
'Finish writing data for query %s via %s writer',
|
|
217
|
+
title,
|
|
218
|
+
type(writer_client),
|
|
219
|
+
)
|
|
220
|
+
writing_results.append(result)
|
|
221
|
+
logger.info('%s executed successfully', title)
|
|
222
|
+
# Return the last writer's result for backward compatibility
|
|
223
|
+
return writing_results[-1] if writing_results else None
|
|
224
|
+
except Exception as e:
|
|
225
|
+
logger.error('%s generated an exception: %s', title, str(e))
|
|
226
|
+
raise exceptions.GarfExecutorError(
|
|
227
|
+
'%s generated an exception: %s', title, str(e)
|
|
228
|
+
) from e
|
|
@@ -28,11 +28,11 @@ except ImportError as e:
|
|
|
28
28
|
|
|
29
29
|
import logging
|
|
30
30
|
|
|
31
|
-
from
|
|
31
|
+
from garf.core import query_editor, report
|
|
32
|
+
from garf.executors import exceptions, execution_context, executor
|
|
33
|
+
from garf.executors.telemetry import tracer
|
|
32
34
|
from google.cloud import exceptions as google_cloud_exceptions
|
|
33
|
-
|
|
34
|
-
from garf_executors import exceptions, execution_context, executor
|
|
35
|
-
from garf_executors.telemetry import tracer
|
|
35
|
+
from opentelemetry import trace
|
|
36
36
|
|
|
37
37
|
logger = logging.getLogger(__name__)
|
|
38
38
|
|
|
@@ -54,6 +54,7 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
54
54
|
self,
|
|
55
55
|
project_id: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
|
|
56
56
|
location: str | None = None,
|
|
57
|
+
**kwargs: str,
|
|
57
58
|
) -> None:
|
|
58
59
|
"""Initializes BigQueryExecutor.
|
|
59
60
|
|
|
@@ -68,6 +69,7 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
68
69
|
)
|
|
69
70
|
self.project_id = project_id
|
|
70
71
|
self.location = location
|
|
72
|
+
super().__init__()
|
|
71
73
|
|
|
72
74
|
@property
|
|
73
75
|
def client(self) -> bigquery.Client:
|
|
@@ -93,41 +95,49 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
93
95
|
Returns:
|
|
94
96
|
Report with data if query returns some data otherwise empty Report.
|
|
95
97
|
"""
|
|
98
|
+
span = trace.get_current_span()
|
|
99
|
+
span.set_attribute('query.title', title)
|
|
100
|
+
span.set_attribute('query.text', query)
|
|
101
|
+
logger.info('Executing script: %s', title)
|
|
96
102
|
query_text = self.replace_params_template(query, context.query_parameters)
|
|
97
103
|
self.create_datasets(context.query_parameters.macro)
|
|
98
104
|
job = self.client.query(query_text)
|
|
99
105
|
try:
|
|
100
106
|
result = job.result()
|
|
107
|
+
except google_cloud_exceptions.GoogleCloudError as e:
|
|
108
|
+
raise BigQueryExecutorError(
|
|
109
|
+
f'Failed to execute query {title}: Reason: {e}'
|
|
110
|
+
) from e
|
|
101
111
|
logger.debug('%s launched successfully', title)
|
|
102
|
-
|
|
103
|
-
|
|
112
|
+
if result.total_rows:
|
|
113
|
+
results = report.GarfReport.from_pandas(result.to_dataframe())
|
|
114
|
+
else:
|
|
115
|
+
results = report.GarfReport()
|
|
116
|
+
if context.writer and results:
|
|
117
|
+
writer_clients = context.writer_clients
|
|
118
|
+
if not writer_clients:
|
|
119
|
+
logger.warning('No writers configured, skipping write operation')
|
|
104
120
|
else:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
logger.info('%s executed successfully', title)
|
|
126
|
-
# Return the last writer's result for backward compatibility
|
|
127
|
-
return writing_results[-1] if writing_results else None
|
|
128
|
-
return results
|
|
129
|
-
except google_cloud_exceptions.GoogleCloudError as e:
|
|
130
|
-
raise BigQueryExecutorError(e) from e
|
|
121
|
+
writing_results = []
|
|
122
|
+
for writer_client in writer_clients:
|
|
123
|
+
logger.debug(
|
|
124
|
+
'Start writing data for query %s via %s writer',
|
|
125
|
+
title,
|
|
126
|
+
type(writer_client),
|
|
127
|
+
)
|
|
128
|
+
writing_result = writer_client.write(results, title)
|
|
129
|
+
logger.debug(
|
|
130
|
+
'Finish writing data for query %s via %s writer',
|
|
131
|
+
title,
|
|
132
|
+
type(writer_client),
|
|
133
|
+
)
|
|
134
|
+
writing_results.append(writing_result)
|
|
135
|
+
# Return the last writer's result for backward compatibility
|
|
136
|
+
logger.info('%s executed successfully', title)
|
|
137
|
+
return writing_results[-1] if writing_results else None
|
|
138
|
+
logger.info('%s executed successfully', title)
|
|
139
|
+
span.set_attribute('execute.num_results', len(results))
|
|
140
|
+
return results
|
|
131
141
|
|
|
132
142
|
@tracer.start_as_current_span('bq.create_datasets')
|
|
133
143
|
def create_datasets(self, macros: dict | None) -> None:
|
|
@@ -24,8 +24,7 @@ import pathlib
|
|
|
24
24
|
import pydantic
|
|
25
25
|
import smart_open
|
|
26
26
|
import yaml
|
|
27
|
-
|
|
28
|
-
from garf_executors.execution_context import ExecutionContext
|
|
27
|
+
from garf.executors.execution_context import ExecutionContext
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class Config(pydantic.BaseModel):
|
|
@@ -47,5 +46,7 @@ class Config(pydantic.BaseModel):
|
|
|
47
46
|
def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
|
|
48
47
|
"""Saves config to local or remote yaml file."""
|
|
49
48
|
with smart_open.open(path, 'w', encoding='utf-8') as f:
|
|
50
|
-
yaml.dump(
|
|
49
|
+
yaml.dump(
|
|
50
|
+
self.model_dump(exclude_none=True).get('sources'), f, encoding='utf-8'
|
|
51
|
+
)
|
|
51
52
|
return f'Config is saved to {str(path)}'
|
{garf_executors-0.2.0/garf_executors → garf_executors-1.1.3/garf/executors}/entrypoints/cli.py
RENAMED
|
@@ -21,17 +21,23 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import argparse
|
|
23
23
|
import logging
|
|
24
|
+
import pathlib
|
|
24
25
|
import sys
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
import
|
|
29
|
-
from
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
import garf.executors
|
|
28
|
+
from garf.executors import config, exceptions, setup
|
|
29
|
+
from garf.executors.entrypoints import utils
|
|
30
|
+
from garf.executors.entrypoints.tracer import (
|
|
31
|
+
initialize_meter,
|
|
32
|
+
initialize_tracer,
|
|
33
|
+
)
|
|
34
|
+
from garf.executors.telemetry import tracer
|
|
35
|
+
from garf.executors.workflows import workflow, workflow_runner
|
|
36
|
+
from garf.io import reader
|
|
37
|
+
from opentelemetry import trace
|
|
33
38
|
|
|
34
39
|
initialize_tracer()
|
|
40
|
+
meter_provider = initialize_meter()
|
|
35
41
|
|
|
36
42
|
|
|
37
43
|
@tracer.start_as_current_span('garf.entrypoints.cli')
|
|
@@ -39,6 +45,7 @@ def main():
|
|
|
39
45
|
parser = argparse.ArgumentParser()
|
|
40
46
|
parser.add_argument('query', nargs='*')
|
|
41
47
|
parser.add_argument('-c', '--config', dest='config', default=None)
|
|
48
|
+
parser.add_argument('-w', '--workflow', dest='workflow', default=None)
|
|
42
49
|
parser.add_argument('--source', dest='source', default=None)
|
|
43
50
|
parser.add_argument('--output', dest='output', default='console')
|
|
44
51
|
parser.add_argument('--input', dest='input', default='file')
|
|
@@ -51,6 +58,7 @@ def main():
|
|
|
51
58
|
parser.add_argument(
|
|
52
59
|
'--no-parallel-queries', dest='parallel_queries', action='store_false'
|
|
53
60
|
)
|
|
61
|
+
parser.add_argument('--simulate', dest='simulate', action='store_true')
|
|
54
62
|
parser.add_argument('--dry-run', dest='dry_run', action='store_true')
|
|
55
63
|
parser.add_argument('-v', '--version', dest='version', action='store_true')
|
|
56
64
|
parser.add_argument(
|
|
@@ -65,54 +73,76 @@ def main():
|
|
|
65
73
|
default=3600,
|
|
66
74
|
type=int,
|
|
67
75
|
)
|
|
76
|
+
parser.add_argument('--workflow-skip', dest='workflow_skip', default=None)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
'--workflow-include', dest='workflow_include', default=None
|
|
79
|
+
)
|
|
68
80
|
parser.set_defaults(parallel_queries=True)
|
|
81
|
+
parser.set_defaults(simulate=False)
|
|
69
82
|
parser.set_defaults(enable_cache=False)
|
|
70
83
|
parser.set_defaults(dry_run=False)
|
|
71
84
|
args, kwargs = parser.parse_known_args()
|
|
72
85
|
|
|
86
|
+
span = trace.get_current_span()
|
|
87
|
+
command_args = ' '.join(sys.argv[1:])
|
|
88
|
+
span.set_attribute('cli.command', f'garf {command_args}')
|
|
73
89
|
if args.version:
|
|
74
|
-
print(
|
|
90
|
+
print(garf.executors.__version__)
|
|
75
91
|
sys.exit()
|
|
76
92
|
logger = utils.init_logging(
|
|
77
93
|
loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
|
|
78
94
|
)
|
|
95
|
+
reader_client = reader.create_reader(args.input)
|
|
96
|
+
param_types = ['source', 'macro', 'template']
|
|
97
|
+
outputs = args.output.split(',')
|
|
98
|
+
extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(kwargs)
|
|
99
|
+
source_parameters = extra_parameters.get('source', {})
|
|
100
|
+
writer_parameters = {}
|
|
101
|
+
for output in outputs:
|
|
102
|
+
writer_parameters.update(extra_parameters.get(output))
|
|
103
|
+
|
|
104
|
+
context = garf.executors.api_executor.ApiExecutionContext(
|
|
105
|
+
query_parameters={
|
|
106
|
+
'macro': extra_parameters.get('macro'),
|
|
107
|
+
'template': extra_parameters.get('template'),
|
|
108
|
+
},
|
|
109
|
+
writer=outputs,
|
|
110
|
+
writer_parameters=writer_parameters,
|
|
111
|
+
fetcher_parameters=source_parameters,
|
|
112
|
+
)
|
|
113
|
+
if workflow_file := args.workflow:
|
|
114
|
+
wf_parent = pathlib.Path.cwd() / pathlib.Path(workflow_file).parent
|
|
115
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file, context)
|
|
116
|
+
workflow_skip = args.workflow_skip if args.workflow_skip else None
|
|
117
|
+
workflow_include = args.workflow_include if args.workflow_include else None
|
|
118
|
+
workflow_runner.WorkflowRunner(
|
|
119
|
+
execution_workflow=execution_workflow, wf_parent=wf_parent
|
|
120
|
+
).run(
|
|
121
|
+
enable_cache=args.enable_cache,
|
|
122
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
123
|
+
selected_aliases=workflow_include,
|
|
124
|
+
skipped_aliases=workflow_skip,
|
|
125
|
+
)
|
|
126
|
+
meter_provider.shutdown()
|
|
127
|
+
sys.exit()
|
|
128
|
+
|
|
79
129
|
if not args.query:
|
|
80
130
|
logger.error('Please provide one or more queries to run')
|
|
81
131
|
raise exceptions.GarfExecutorError(
|
|
82
132
|
'Please provide one or more queries to run'
|
|
83
133
|
)
|
|
84
|
-
reader_client = reader.create_reader(args.input)
|
|
85
134
|
if config_file := args.config:
|
|
86
135
|
execution_config = config.Config.from_file(config_file)
|
|
87
136
|
if not (context := execution_config.sources.get(args.source)):
|
|
88
137
|
raise exceptions.GarfExecutorError(
|
|
89
138
|
f'No execution context found for source {args.source} in {config_file}'
|
|
90
139
|
)
|
|
91
|
-
|
|
92
|
-
param_types = ['source', 'macro', 'template']
|
|
93
|
-
outputs = args.output.split(',')
|
|
94
|
-
extra_parameters = utils.ParamsParser([*param_types, *outputs]).parse(
|
|
95
|
-
kwargs
|
|
96
|
-
)
|
|
97
|
-
source_parameters = extra_parameters.get('source', {})
|
|
98
|
-
writer_parameters = {}
|
|
99
|
-
for output in outputs:
|
|
100
|
-
writer_parameters.update(extra_parameters.get(output))
|
|
101
|
-
|
|
102
|
-
context = garf_executors.api_executor.ApiExecutionContext(
|
|
103
|
-
query_parameters={
|
|
104
|
-
'macro': extra_parameters.get('macro'),
|
|
105
|
-
'template': extra_parameters.get('template'),
|
|
106
|
-
},
|
|
107
|
-
writer=outputs,
|
|
108
|
-
writer_parameters=writer_parameters,
|
|
109
|
-
fetcher_parameters=source_parameters,
|
|
110
|
-
)
|
|
111
|
-
query_executor = garf_executors.setup_executor(
|
|
140
|
+
query_executor = setup.setup_executor(
|
|
112
141
|
source=args.source,
|
|
113
142
|
fetcher_parameters=context.fetcher_parameters,
|
|
114
143
|
enable_cache=args.enable_cache,
|
|
115
144
|
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
145
|
+
simulate=args.simulate,
|
|
116
146
|
)
|
|
117
147
|
batch = {query: reader_client.read(query) for query in args.query}
|
|
118
148
|
if args.parallel_queries and len(args.query) > 1:
|
|
@@ -127,6 +157,7 @@ def main():
|
|
|
127
157
|
query=reader_client.read(query), title=query, context=context
|
|
128
158
|
)
|
|
129
159
|
logging.shutdown()
|
|
160
|
+
meter_provider.shutdown()
|
|
130
161
|
|
|
131
162
|
|
|
132
163
|
if __name__ == '__main__':
|
|
@@ -19,29 +19,42 @@ import logging
|
|
|
19
19
|
from concurrent import futures
|
|
20
20
|
|
|
21
21
|
import grpc
|
|
22
|
+
from garf.executors import execution_context, garf_pb2, garf_pb2_grpc, setup
|
|
23
|
+
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
22
24
|
from google.protobuf.json_format import MessageToDict
|
|
23
25
|
from grpc_reflection.v1alpha import reflection
|
|
24
26
|
|
|
25
|
-
import garf_executors
|
|
26
|
-
from garf_executors import garf_pb2, garf_pb2_grpc
|
|
27
|
-
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
28
|
-
|
|
29
27
|
|
|
30
28
|
class GarfService(garf_pb2_grpc.GarfService):
|
|
31
29
|
def Execute(self, request, context):
|
|
32
|
-
query_executor =
|
|
30
|
+
query_executor = setup.setup_executor(
|
|
33
31
|
request.source, request.context.fetcher_parameters
|
|
34
32
|
)
|
|
35
|
-
execution_context = garf_executors.execution_context.ExecutionContext(
|
|
36
|
-
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
37
|
-
)
|
|
38
33
|
result = query_executor.execute(
|
|
39
34
|
query=request.query,
|
|
40
35
|
title=request.title,
|
|
41
|
-
context=execution_context
|
|
36
|
+
context=execution_context.ExecutionContext(
|
|
37
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
38
|
+
),
|
|
42
39
|
)
|
|
43
40
|
return garf_pb2.ExecuteResponse(results=[result])
|
|
44
41
|
|
|
42
|
+
def Fetch(self, request, context):
|
|
43
|
+
query_executor = setup.setup_executor(
|
|
44
|
+
request.source, request.context.fetcher_parameters
|
|
45
|
+
)
|
|
46
|
+
query_args = execution_context.ExecutionContext(
|
|
47
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
48
|
+
).query_parameters
|
|
49
|
+
result = query_executor.fetcher.fetch(
|
|
50
|
+
query_specification=request.query,
|
|
51
|
+
title=request.title,
|
|
52
|
+
args=query_args,
|
|
53
|
+
)
|
|
54
|
+
return garf_pb2.FetchResponse(
|
|
55
|
+
columns=result.column_names, rows=result.to_list(row_type='dict')
|
|
56
|
+
)
|
|
57
|
+
|
|
45
58
|
|
|
46
59
|
if __name__ == '__main__':
|
|
47
60
|
parser = argparse.ArgumentParser()
|