garf-executors 0.1.7__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {garf_executors-0.1.7 → garf_executors-1.0.7}/PKG-INFO +23 -7
- {garf_executors-0.1.7 → garf_executors-1.0.7}/README.md +8 -2
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/__init__.py +7 -7
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/api_executor.py +36 -11
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/bq_executor.py +42 -32
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/config.py +4 -3
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/cli.py +83 -35
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/grpc_server.py +21 -6
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/server.py +21 -12
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/tracer.py +20 -5
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/execution_context.py +6 -5
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/executor.py +41 -4
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/fetchers.py +7 -5
- garf_executors-1.0.7/garf/executors/garf_pb2.py +51 -0
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/garf_pb2_grpc.py +45 -2
- garf_executors-1.0.7/garf/executors/query_processor.py +77 -0
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/sql_executor.py +24 -11
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/telemetry.py +1 -1
- garf_executors-1.0.7/garf/executors/workflow.py +109 -0
- garf_executors-1.0.7/garf_executors/__init__.py +25 -0
- garf_executors-1.0.7/garf_executors/api_executor.py +25 -0
- garf_executors-1.0.7/garf_executors/bq_executor.py +25 -0
- garf_executors-1.0.7/garf_executors/config.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/__init__.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/cli.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/grcp_server.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/server.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/tracer.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/utils.py +25 -0
- garf_executors-1.0.7/garf_executors/exceptions.py +25 -0
- garf_executors-1.0.7/garf_executors/execution_context.py +25 -0
- garf_executors-1.0.7/garf_executors/executor.py +25 -0
- garf_executors-1.0.7/garf_executors/fetchers.py +25 -0
- garf_executors-1.0.7/garf_executors/sql_executor.py +25 -0
- garf_executors-1.0.7/garf_executors/telemetry.py +25 -0
- garf_executors-1.0.7/garf_executors/workflow.py +25 -0
- {garf_executors-0.1.7 → garf_executors-1.0.7}/garf_executors.egg-info/PKG-INFO +23 -7
- garf_executors-1.0.7/garf_executors.egg-info/SOURCES.txt +45 -0
- garf_executors-1.0.7/garf_executors.egg-info/entry_points.txt +2 -0
- {garf_executors-0.1.7 → garf_executors-1.0.7}/garf_executors.egg-info/requires.txt +16 -4
- {garf_executors-0.1.7 → garf_executors-1.0.7}/garf_executors.egg-info/top_level.txt +1 -0
- {garf_executors-0.1.7 → garf_executors-1.0.7}/pyproject.toml +22 -6
- garf_executors-0.1.7/garf_executors/garf_pb2.py +0 -45
- garf_executors-0.1.7/garf_executors.egg-info/SOURCES.txt +0 -26
- garf_executors-0.1.7/garf_executors.egg-info/entry_points.txt +0 -2
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/__init__.py +0 -0
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/utils.py +0 -0
- {garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/exceptions.py +0 -0
- {garf_executors-0.1.7 → garf_executors-1.0.7}/garf_executors.egg-info/dependency_links.txt +0 -0
- {garf_executors-0.1.7 → garf_executors-1.0.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.7
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -17,25 +17,35 @@ Classifier: Operating System :: OS Independent
|
|
|
17
17
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
18
|
Requires-Python: >=3.9
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist: garf-core
|
|
21
|
-
Requires-Dist: garf-io
|
|
20
|
+
Requires-Dist: garf-core>=1.0.0
|
|
21
|
+
Requires-Dist: garf-io>=1.0.0
|
|
22
22
|
Requires-Dist: pyyaml
|
|
23
23
|
Requires-Dist: pydantic
|
|
24
24
|
Requires-Dist: opentelemetry-api
|
|
25
25
|
Requires-Dist: opentelemetry-sdk
|
|
26
|
+
Requires-Dist: opentelemetry-exporter-otlp
|
|
26
27
|
Provides-Extra: bq
|
|
27
28
|
Requires-Dist: garf-io[bq]; extra == "bq"
|
|
28
29
|
Requires-Dist: pandas; extra == "bq"
|
|
29
30
|
Requires-Dist: google-cloud-logging; extra == "bq"
|
|
31
|
+
Requires-Dist: smart_open[gcs]; extra == "bq"
|
|
30
32
|
Provides-Extra: sql
|
|
31
33
|
Requires-Dist: garf-io[sqlalchemy]; extra == "sql"
|
|
32
34
|
Requires-Dist: pandas; extra == "sql"
|
|
35
|
+
Provides-Extra: gcp
|
|
36
|
+
Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
|
|
33
37
|
Provides-Extra: server
|
|
34
38
|
Requires-Dist: fastapi[standard]; extra == "server"
|
|
35
39
|
Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
|
|
36
|
-
Requires-Dist:
|
|
40
|
+
Requires-Dist: typer; extra == "server"
|
|
41
|
+
Requires-Dist: grpcio-reflection; extra == "server"
|
|
42
|
+
Provides-Extra: tests
|
|
43
|
+
Requires-Dist: pytest; extra == "tests"
|
|
44
|
+
Requires-Dist: pytest-mock; extra == "tests"
|
|
45
|
+
Requires-Dist: pytest-xdist; extra == "tests"
|
|
46
|
+
Requires-Dist: pytest-grpc; extra == "tests"
|
|
37
47
|
Provides-Extra: all
|
|
38
|
-
Requires-Dist: garf-executors[bq,server,sql]; extra == "all"
|
|
48
|
+
Requires-Dist: garf-executors[bq,gcp,server,sql]; extra == "all"
|
|
39
49
|
|
|
40
50
|
# `garf-executors` - One stop-shop for interacting with Reporting APIs.
|
|
41
51
|
|
|
@@ -64,8 +74,14 @@ garf <QUERIES> --source <API_SOURCE> \
|
|
|
64
74
|
where
|
|
65
75
|
|
|
66
76
|
* `<QUERIES>`- local or remote path(s) to files with queries.
|
|
67
|
-
*
|
|
68
|
-
*
|
|
77
|
+
* `source`- type of API to use. Based on that the appropriate report fetcher will be initialized. Explore supported APIs [here](https://google.github.io/garf/fetchers/overview/)
|
|
78
|
+
* `output` - output supported by [`garf-io` library](https://google.github.io/garf/usage/writers/).
|
|
69
79
|
|
|
70
80
|
If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
|
|
71
81
|
> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.
|
|
82
|
+
|
|
83
|
+
## Documentation
|
|
84
|
+
|
|
85
|
+
Explore full documentation working with `garf-executors`
|
|
86
|
+
|
|
87
|
+
* [Documentation](https://google.github.io/garf/usage/executors/)
|
|
@@ -25,8 +25,14 @@ garf <QUERIES> --source <API_SOURCE> \
|
|
|
25
25
|
where
|
|
26
26
|
|
|
27
27
|
* `<QUERIES>`- local or remote path(s) to files with queries.
|
|
28
|
-
*
|
|
29
|
-
*
|
|
28
|
+
* `source`- type of API to use. Based on that the appropriate report fetcher will be initialized. Explore supported APIs [here](https://google.github.io/garf/fetchers/overview/)
|
|
29
|
+
* `output` - output supported by [`garf-io` library](https://google.github.io/garf/usage/writers/).
|
|
30
30
|
|
|
31
31
|
If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
|
|
32
32
|
> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.
|
|
33
|
+
|
|
34
|
+
## Documentation
|
|
35
|
+
|
|
36
|
+
Explore full documentation working with `garf-executors`
|
|
37
|
+
|
|
38
|
+
* [Documentation](https://google.github.io/garf/usage/executors/)
|
|
@@ -17,9 +17,9 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import importlib
|
|
19
19
|
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
20
|
+
from garf.executors import executor, fetchers
|
|
21
|
+
from garf.executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
22
|
+
from garf.executors.telemetry import tracer
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@tracer.start_as_current_span('setup_executor')
|
|
@@ -31,10 +31,10 @@ def setup_executor(
|
|
|
31
31
|
) -> type[executor.Executor]:
|
|
32
32
|
"""Initializes executors based on a source and parameters."""
|
|
33
33
|
if source == 'bq':
|
|
34
|
-
bq_executor = importlib.import_module('
|
|
34
|
+
bq_executor = importlib.import_module('garf.executors.bq_executor')
|
|
35
35
|
query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
|
|
36
36
|
elif source == 'sqldb':
|
|
37
|
-
sql_executor = importlib.import_module('
|
|
37
|
+
sql_executor = importlib.import_module('garf.executors.sql_executor')
|
|
38
38
|
query_executor = (
|
|
39
39
|
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
40
40
|
fetcher_parameters.get('connection_string')
|
|
@@ -43,7 +43,7 @@ def setup_executor(
|
|
|
43
43
|
else:
|
|
44
44
|
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
45
45
|
query_executor = ApiQueryExecutor(
|
|
46
|
-
concrete_api_fetcher(
|
|
46
|
+
fetcher=concrete_api_fetcher(
|
|
47
47
|
**fetcher_parameters,
|
|
48
48
|
enable_cache=enable_cache,
|
|
49
49
|
cache_ttl_seconds=cache_ttl_seconds,
|
|
@@ -57,4 +57,4 @@ __all__ = [
|
|
|
57
57
|
'ApiExecutionContext',
|
|
58
58
|
]
|
|
59
59
|
|
|
60
|
-
__version__ = '0.
|
|
60
|
+
__version__ = '1.0.7'
|
|
@@ -21,13 +21,19 @@ GarfReport and saving it to local/remote storage.
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
23
|
import logging
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
import pathlib
|
|
25
|
+
|
|
26
|
+
from garf.core import report_fetcher
|
|
27
|
+
from garf.executors import (
|
|
28
|
+
exceptions,
|
|
29
|
+
execution_context,
|
|
30
|
+
executor,
|
|
31
|
+
fetchers,
|
|
32
|
+
query_processor,
|
|
33
|
+
)
|
|
34
|
+
from garf.executors.telemetry import tracer
|
|
26
35
|
from opentelemetry import trace
|
|
27
36
|
|
|
28
|
-
from garf_executors import exceptions, execution_context, executor, fetchers
|
|
29
|
-
from garf_executors.telemetry import tracer
|
|
30
|
-
|
|
31
37
|
logger = logging.getLogger(__name__)
|
|
32
38
|
|
|
33
39
|
|
|
@@ -51,15 +57,29 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
51
57
|
fetcher: Instantiated report fetcher.
|
|
52
58
|
"""
|
|
53
59
|
self.fetcher = fetcher
|
|
60
|
+
super().__init__(
|
|
61
|
+
preprocessors=self.fetcher.preprocessors,
|
|
62
|
+
postprocessors=self.fetcher.postprocessors,
|
|
63
|
+
)
|
|
54
64
|
|
|
55
65
|
@classmethod
|
|
56
66
|
def from_fetcher_alias(
|
|
57
|
-
cls,
|
|
67
|
+
cls,
|
|
68
|
+
source: str,
|
|
69
|
+
fetcher_parameters: dict[str, str] | None = None,
|
|
70
|
+
enable_cache: bool = False,
|
|
71
|
+
cache_ttl_seconds: int = 3600,
|
|
58
72
|
) -> ApiQueryExecutor:
|
|
59
73
|
if not fetcher_parameters:
|
|
60
74
|
fetcher_parameters = {}
|
|
61
75
|
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
62
|
-
return ApiQueryExecutor(
|
|
76
|
+
return ApiQueryExecutor(
|
|
77
|
+
fetcher=concrete_api_fetcher(
|
|
78
|
+
**fetcher_parameters,
|
|
79
|
+
enable_cache=enable_cache,
|
|
80
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
81
|
+
)
|
|
82
|
+
)
|
|
63
83
|
|
|
64
84
|
@tracer.start_as_current_span('api.execute')
|
|
65
85
|
def execute(
|
|
@@ -81,16 +101,21 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
81
101
|
Raises:
|
|
82
102
|
GarfExecutorError: When failed to execute query.
|
|
83
103
|
"""
|
|
104
|
+
context = query_processor.process_gquery(context)
|
|
84
105
|
span = trace.get_current_span()
|
|
85
|
-
span.set_attribute('fetcher', self.fetcher.__class__.__name__)
|
|
86
|
-
span.set_attribute(
|
|
106
|
+
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
107
|
+
span.set_attribute(
|
|
108
|
+
'api.client.class', self.fetcher.api_client.__class__.__name__
|
|
109
|
+
)
|
|
87
110
|
try:
|
|
88
|
-
span.set_attribute('
|
|
89
|
-
span.set_attribute('
|
|
111
|
+
span.set_attribute('query.title', title)
|
|
112
|
+
span.set_attribute('query.text', query)
|
|
90
113
|
logger.debug('starting query %s', query)
|
|
114
|
+
title = pathlib.Path(title).name.split('.')[0]
|
|
91
115
|
results = self.fetcher.fetch(
|
|
92
116
|
query_specification=query,
|
|
93
117
|
args=context.query_parameters,
|
|
118
|
+
title=title,
|
|
94
119
|
**context.fetcher_parameters,
|
|
95
120
|
)
|
|
96
121
|
writer_clients = context.writer_clients
|
|
@@ -28,11 +28,11 @@ except ImportError as e:
|
|
|
28
28
|
|
|
29
29
|
import logging
|
|
30
30
|
|
|
31
|
-
from
|
|
31
|
+
from garf.core import query_editor, report
|
|
32
|
+
from garf.executors import exceptions, execution_context, executor
|
|
33
|
+
from garf.executors.telemetry import tracer
|
|
32
34
|
from google.cloud import exceptions as google_cloud_exceptions
|
|
33
|
-
|
|
34
|
-
from garf_executors import exceptions, execution_context, executor
|
|
35
|
-
from garf_executors.telemetry import tracer
|
|
35
|
+
from opentelemetry import trace
|
|
36
36
|
|
|
37
37
|
logger = logging.getLogger(__name__)
|
|
38
38
|
|
|
@@ -54,6 +54,7 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
54
54
|
self,
|
|
55
55
|
project_id: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
|
|
56
56
|
location: str | None = None,
|
|
57
|
+
**kwargs: str,
|
|
57
58
|
) -> None:
|
|
58
59
|
"""Initializes BigQueryExecutor.
|
|
59
60
|
|
|
@@ -68,6 +69,7 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
68
69
|
)
|
|
69
70
|
self.project_id = project_id
|
|
70
71
|
self.location = location
|
|
72
|
+
super().__init__()
|
|
71
73
|
|
|
72
74
|
@property
|
|
73
75
|
def client(self) -> bigquery.Client:
|
|
@@ -93,41 +95,49 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
93
95
|
Returns:
|
|
94
96
|
Report with data if query returns some data otherwise empty Report.
|
|
95
97
|
"""
|
|
98
|
+
span = trace.get_current_span()
|
|
99
|
+
span.set_attribute('query.title', title)
|
|
100
|
+
span.set_attribute('query.text', query)
|
|
101
|
+
logger.info('Executing script: %s', title)
|
|
96
102
|
query_text = self.replace_params_template(query, context.query_parameters)
|
|
97
103
|
self.create_datasets(context.query_parameters.macro)
|
|
98
104
|
job = self.client.query(query_text)
|
|
99
105
|
try:
|
|
100
106
|
result = job.result()
|
|
107
|
+
except google_cloud_exceptions.GoogleCloudError as e:
|
|
108
|
+
raise BigQueryExecutorError(
|
|
109
|
+
f'Failed to execute query {title}: Reason: {e}'
|
|
110
|
+
) from e
|
|
101
111
|
logger.debug('%s launched successfully', title)
|
|
102
|
-
|
|
103
|
-
|
|
112
|
+
if result.total_rows:
|
|
113
|
+
results = report.GarfReport.from_pandas(result.to_dataframe())
|
|
114
|
+
else:
|
|
115
|
+
results = report.GarfReport()
|
|
116
|
+
if context.writer and results:
|
|
117
|
+
writer_clients = context.writer_clients
|
|
118
|
+
if not writer_clients:
|
|
119
|
+
logger.warning('No writers configured, skipping write operation')
|
|
104
120
|
else:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
logger.info('%s executed successfully', title)
|
|
126
|
-
# Return the last writer's result for backward compatibility
|
|
127
|
-
return writing_results[-1] if writing_results else None
|
|
128
|
-
return results
|
|
129
|
-
except google_cloud_exceptions.GoogleCloudError as e:
|
|
130
|
-
raise BigQueryExecutorError(e) from e
|
|
121
|
+
writing_results = []
|
|
122
|
+
for writer_client in writer_clients:
|
|
123
|
+
logger.debug(
|
|
124
|
+
'Start writing data for query %s via %s writer',
|
|
125
|
+
title,
|
|
126
|
+
type(writer_client),
|
|
127
|
+
)
|
|
128
|
+
writing_result = writer_client.write(results, title)
|
|
129
|
+
logger.debug(
|
|
130
|
+
'Finish writing data for query %s via %s writer',
|
|
131
|
+
title,
|
|
132
|
+
type(writer_client),
|
|
133
|
+
)
|
|
134
|
+
writing_results.append(writing_result)
|
|
135
|
+
# Return the last writer's result for backward compatibility
|
|
136
|
+
logger.info('%s executed successfully', title)
|
|
137
|
+
return writing_results[-1] if writing_results else None
|
|
138
|
+
logger.info('%s executed successfully', title)
|
|
139
|
+
span.set_attribute('execute.num_results', len(results))
|
|
140
|
+
return results
|
|
131
141
|
|
|
132
142
|
@tracer.start_as_current_span('bq.create_datasets')
|
|
133
143
|
def create_datasets(self, macros: dict | None) -> None:
|
|
@@ -24,8 +24,7 @@ import pathlib
|
|
|
24
24
|
import pydantic
|
|
25
25
|
import smart_open
|
|
26
26
|
import yaml
|
|
27
|
-
|
|
28
|
-
from garf_executors.execution_context import ExecutionContext
|
|
27
|
+
from garf.executors.execution_context import ExecutionContext
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class Config(pydantic.BaseModel):
|
|
@@ -47,5 +46,7 @@ class Config(pydantic.BaseModel):
|
|
|
47
46
|
def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
|
|
48
47
|
"""Saves config to local or remote yaml file."""
|
|
49
48
|
with smart_open.open(path, 'w', encoding='utf-8') as f:
|
|
50
|
-
yaml.dump(
|
|
49
|
+
yaml.dump(
|
|
50
|
+
self.model_dump(exclude_none=True).get('sources'), f, encoding='utf-8'
|
|
51
|
+
)
|
|
51
52
|
return f'Config is saved to {str(path)}'
|
{garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/cli.py
RENAMED
|
@@ -21,15 +21,17 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import argparse
|
|
23
23
|
import logging
|
|
24
|
+
import pathlib
|
|
25
|
+
import re
|
|
24
26
|
import sys
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
import
|
|
29
|
-
from
|
|
30
|
-
from
|
|
31
|
-
from
|
|
32
|
-
from
|
|
28
|
+
import garf.executors
|
|
29
|
+
from garf.executors import config, exceptions, workflow
|
|
30
|
+
from garf.executors.entrypoints import utils
|
|
31
|
+
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
32
|
+
from garf.executors.telemetry import tracer
|
|
33
|
+
from garf.io import reader
|
|
34
|
+
from opentelemetry import trace
|
|
33
35
|
|
|
34
36
|
initialize_tracer()
|
|
35
37
|
|
|
@@ -39,6 +41,7 @@ def main():
|
|
|
39
41
|
parser = argparse.ArgumentParser()
|
|
40
42
|
parser.add_argument('query', nargs='*')
|
|
41
43
|
parser.add_argument('-c', '--config', dest='config', default=None)
|
|
44
|
+
parser.add_argument('-w', '--workflow', dest='workflow', default=None)
|
|
42
45
|
parser.add_argument('--source', dest='source', default=None)
|
|
43
46
|
parser.add_argument('--output', dest='output', default='console')
|
|
44
47
|
parser.add_argument('--input', dest='input', default='file')
|
|
@@ -70,32 +73,76 @@ def main():
|
|
|
70
73
|
parser.set_defaults(dry_run=False)
|
|
71
74
|
args, kwargs = parser.parse_known_args()
|
|
72
75
|
|
|
76
|
+
span = trace.get_current_span()
|
|
77
|
+
command_args = ' '.join(sys.argv[1:])
|
|
78
|
+
span.set_attribute('cli.command', f'garf {command_args}')
|
|
73
79
|
if args.version:
|
|
74
|
-
print(
|
|
80
|
+
print(garf.executors.__version__)
|
|
75
81
|
sys.exit()
|
|
76
82
|
logger = utils.init_logging(
|
|
77
83
|
loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
|
|
78
84
|
)
|
|
85
|
+
reader_client = reader.create_reader(args.input)
|
|
86
|
+
if workflow_file := args.workflow:
|
|
87
|
+
wf_parent = pathlib.Path.cwd() / pathlib.Path(workflow_file).parent
|
|
88
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file)
|
|
89
|
+
for i, step in enumerate(execution_workflow.steps, 1):
|
|
90
|
+
step_span_name = f'{i}-{step.fetcher}'
|
|
91
|
+
if step.alias:
|
|
92
|
+
step_span_name = f'{step_span_name}-{step.alias}'
|
|
93
|
+
with tracer.start_as_current_span(step_span_name):
|
|
94
|
+
query_executor = garf.executors.setup_executor(
|
|
95
|
+
source=step.fetcher,
|
|
96
|
+
fetcher_parameters=step.fetcher_parameters,
|
|
97
|
+
enable_cache=args.enable_cache,
|
|
98
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
99
|
+
)
|
|
100
|
+
batch = {}
|
|
101
|
+
if not (queries := step.queries):
|
|
102
|
+
logger.error('Please provide one or more queries to run')
|
|
103
|
+
raise exceptions.GarfExecutorError(
|
|
104
|
+
'Please provide one or more queries to run'
|
|
105
|
+
)
|
|
106
|
+
for query in queries:
|
|
107
|
+
if isinstance(query, garf.executors.workflow.QueryPath):
|
|
108
|
+
if re.match(
|
|
109
|
+
'^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)', query.path
|
|
110
|
+
):
|
|
111
|
+
batch[query.path] = reader_client.read(query.path)
|
|
112
|
+
else:
|
|
113
|
+
query_path = wf_parent / pathlib.Path(query.path)
|
|
114
|
+
if not query_path.exists():
|
|
115
|
+
raise workflow.GarfWorkflowError(
|
|
116
|
+
f'Query: {query_path} not found'
|
|
117
|
+
)
|
|
118
|
+
batch[query.path] = reader_client.read(query_path)
|
|
119
|
+
elif isinstance(query, garf.executors.workflow.QueryFolder):
|
|
120
|
+
query_path = wf_parent / pathlib.Path(query.folder)
|
|
121
|
+
if not query_path.exists():
|
|
122
|
+
raise workflow.GarfWorkflowError(
|
|
123
|
+
f'Folder: {query_path} not found'
|
|
124
|
+
)
|
|
125
|
+
for p in query_path.rglob('*'):
|
|
126
|
+
if p.suffix == '.sql':
|
|
127
|
+
batch[p.stem] = reader_client.read(p)
|
|
128
|
+
else:
|
|
129
|
+
batch[query.query.title] = query.query.text
|
|
130
|
+
query_executor.execute_batch(
|
|
131
|
+
batch, step.context, args.parallel_threshold
|
|
132
|
+
)
|
|
133
|
+
sys.exit()
|
|
134
|
+
|
|
79
135
|
if not args.query:
|
|
80
136
|
logger.error('Please provide one or more queries to run')
|
|
81
137
|
raise exceptions.GarfExecutorError(
|
|
82
138
|
'Please provide one or more queries to run'
|
|
83
139
|
)
|
|
84
|
-
reader_client = reader.create_reader(args.input)
|
|
85
140
|
if config_file := args.config:
|
|
86
141
|
execution_config = config.Config.from_file(config_file)
|
|
87
142
|
if not (context := execution_config.sources.get(args.source)):
|
|
88
143
|
raise exceptions.GarfExecutorError(
|
|
89
144
|
f'No execution context found for source {args.source} in {config_file}'
|
|
90
145
|
)
|
|
91
|
-
query_executor = garf_executors.setup_executor(
|
|
92
|
-
source=args.source,
|
|
93
|
-
fetcher_parameters=context.fetcher_parameters,
|
|
94
|
-
enable_cache=args.enable_cache,
|
|
95
|
-
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
96
|
-
)
|
|
97
|
-
batch = {query: reader_client.read(query) for query in args.query}
|
|
98
|
-
query_executor.execute_batch(batch, context, args.parallel_threshold)
|
|
99
146
|
else:
|
|
100
147
|
param_types = ['source', 'macro', 'template']
|
|
101
148
|
outputs = args.output.split(',')
|
|
@@ -107,7 +154,7 @@ def main():
|
|
|
107
154
|
for output in outputs:
|
|
108
155
|
writer_parameters.update(extra_parameters.get(output))
|
|
109
156
|
|
|
110
|
-
context =
|
|
157
|
+
context = garf.executors.api_executor.ApiExecutionContext(
|
|
111
158
|
query_parameters={
|
|
112
159
|
'macro': extra_parameters.get('macro'),
|
|
113
160
|
'template': extra_parameters.get('template'),
|
|
@@ -116,23 +163,24 @@ def main():
|
|
|
116
163
|
writer_parameters=writer_parameters,
|
|
117
164
|
fetcher_parameters=source_parameters,
|
|
118
165
|
)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
)
|
|
166
|
+
query_executor = garf.executors.setup_executor(
|
|
167
|
+
source=args.source,
|
|
168
|
+
fetcher_parameters=context.fetcher_parameters,
|
|
169
|
+
enable_cache=args.enable_cache,
|
|
170
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
171
|
+
)
|
|
172
|
+
batch = {query: reader_client.read(query) for query in args.query}
|
|
173
|
+
if args.parallel_queries and len(args.query) > 1:
|
|
174
|
+
logger.info('Running queries in parallel')
|
|
175
|
+
batch = {query: reader_client.read(query) for query in args.query}
|
|
176
|
+
query_executor.execute_batch(batch, context, args.parallel_threshold)
|
|
177
|
+
else:
|
|
178
|
+
if len(args.query) > 1:
|
|
179
|
+
logger.info('Running queries sequentially')
|
|
180
|
+
for query in args.query:
|
|
181
|
+
query_executor.execute(
|
|
182
|
+
query=reader_client.read(query), title=query, context=context
|
|
183
|
+
)
|
|
136
184
|
logging.shutdown()
|
|
137
185
|
|
|
138
186
|
|
|
@@ -18,21 +18,20 @@ import argparse
|
|
|
18
18
|
import logging
|
|
19
19
|
from concurrent import futures
|
|
20
20
|
|
|
21
|
+
import garf.executors
|
|
21
22
|
import grpc
|
|
23
|
+
from garf.executors import garf_pb2, garf_pb2_grpc
|
|
24
|
+
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
22
25
|
from google.protobuf.json_format import MessageToDict
|
|
23
26
|
from grpc_reflection.v1alpha import reflection
|
|
24
27
|
|
|
25
|
-
import garf_executors
|
|
26
|
-
from garf_executors import garf_pb2, garf_pb2_grpc
|
|
27
|
-
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
28
|
-
|
|
29
28
|
|
|
30
29
|
class GarfService(garf_pb2_grpc.GarfService):
|
|
31
30
|
def Execute(self, request, context):
|
|
32
|
-
query_executor =
|
|
31
|
+
query_executor = garf.executors.setup_executor(
|
|
33
32
|
request.source, request.context.fetcher_parameters
|
|
34
33
|
)
|
|
35
|
-
execution_context =
|
|
34
|
+
execution_context = garf.executors.execution_context.ExecutionContext(
|
|
36
35
|
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
37
36
|
)
|
|
38
37
|
result = query_executor.execute(
|
|
@@ -42,6 +41,22 @@ class GarfService(garf_pb2_grpc.GarfService):
|
|
|
42
41
|
)
|
|
43
42
|
return garf_pb2.ExecuteResponse(results=[result])
|
|
44
43
|
|
|
44
|
+
def Fetch(self, request, context):
|
|
45
|
+
query_executor = garf.executors.setup_executor(
|
|
46
|
+
request.source, request.context.fetcher_parameters
|
|
47
|
+
)
|
|
48
|
+
execution_context = garf.executors.execution_context.ExecutionContext(
|
|
49
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
50
|
+
)
|
|
51
|
+
result = query_executor.fetcher.fetch(
|
|
52
|
+
query_specification=request.query,
|
|
53
|
+
title=request.title,
|
|
54
|
+
args=execution_context.query_parameters,
|
|
55
|
+
)
|
|
56
|
+
return garf_pb2.FetchResponse(
|
|
57
|
+
columns=result.column_names, rows=result.to_list(row_type='dict')
|
|
58
|
+
)
|
|
59
|
+
|
|
45
60
|
|
|
46
61
|
if __name__ == '__main__':
|
|
47
62
|
parser = argparse.ArgumentParser()
|
{garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/server.py
RENAMED
|
@@ -17,18 +17,20 @@
|
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
import fastapi
|
|
20
|
+
import garf.executors
|
|
20
21
|
import pydantic
|
|
22
|
+
import typer
|
|
21
23
|
import uvicorn
|
|
22
|
-
from
|
|
24
|
+
from garf.executors import exceptions
|
|
25
|
+
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
26
|
+
from garf.io import reader
|
|
23
27
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
|
24
|
-
|
|
25
|
-
import garf_executors
|
|
26
|
-
from garf_executors import exceptions
|
|
27
|
-
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
28
|
+
from typing_extensions import Annotated
|
|
28
29
|
|
|
29
30
|
initialize_tracer()
|
|
30
31
|
app = fastapi.FastAPI()
|
|
31
32
|
FastAPIInstrumentor.instrument_app(app)
|
|
33
|
+
typer_app = typer.Typer()
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
class ApiExecutorRequest(pydantic.BaseModel):
|
|
@@ -46,7 +48,7 @@ class ApiExecutorRequest(pydantic.BaseModel):
|
|
|
46
48
|
title: Optional[str] = None
|
|
47
49
|
query: Optional[str] = None
|
|
48
50
|
query_path: Optional[Union[str, list[str]]] = None
|
|
49
|
-
context:
|
|
51
|
+
context: garf.executors.api_executor.ApiExecutionContext
|
|
50
52
|
|
|
51
53
|
@pydantic.model_validator(mode='after')
|
|
52
54
|
def check_query_specified(self):
|
|
@@ -75,18 +77,18 @@ class ApiExecutorResponse(pydantic.BaseModel):
|
|
|
75
77
|
|
|
76
78
|
@app.get('/api/version')
|
|
77
79
|
async def version() -> str:
|
|
78
|
-
return
|
|
80
|
+
return garf.executors.__version__
|
|
79
81
|
|
|
80
82
|
|
|
81
83
|
@app.get('/api/fetchers')
|
|
82
84
|
async def get_fetchers() -> list[str]:
|
|
83
85
|
"""Shows all available API sources."""
|
|
84
|
-
return list(
|
|
86
|
+
return list(garf.executors.fetchers.find_fetchers())
|
|
85
87
|
|
|
86
88
|
|
|
87
89
|
@app.post('/api/execute')
|
|
88
|
-
|
|
89
|
-
query_executor =
|
|
90
|
+
def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
91
|
+
query_executor = garf.executors.setup_executor(
|
|
90
92
|
request.source, request.context.fetcher_parameters
|
|
91
93
|
)
|
|
92
94
|
result = query_executor.execute(request.query, request.title, request.context)
|
|
@@ -95,7 +97,7 @@ async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
95
97
|
|
|
96
98
|
@app.post('/api/execute:batch')
|
|
97
99
|
def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
98
|
-
query_executor =
|
|
100
|
+
query_executor = garf.executors.setup_executor(
|
|
99
101
|
request.source, request.context.fetcher_parameters
|
|
100
102
|
)
|
|
101
103
|
reader_client = reader.FileReader()
|
|
@@ -104,5 +106,12 @@ def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
104
106
|
return ApiExecutorResponse(results=results)
|
|
105
107
|
|
|
106
108
|
|
|
109
|
+
@typer_app.command()
|
|
110
|
+
def main(
|
|
111
|
+
port: Annotated[int, typer.Option(help='Port to start the server')] = 8000,
|
|
112
|
+
):
|
|
113
|
+
uvicorn.run(app, port=port)
|
|
114
|
+
|
|
115
|
+
|
|
107
116
|
if __name__ == '__main__':
|
|
108
|
-
|
|
117
|
+
typer_app()
|
{garf_executors-0.1.7/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/tracer.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -35,8 +35,23 @@ def initialize_tracer():
|
|
|
35
35
|
tracer_provider = TracerProvider(resource=resource)
|
|
36
36
|
|
|
37
37
|
if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
if gcp_project_id := os.getenv('OTEL_EXPORTER_GCP_PROJECT_ID'):
|
|
39
|
+
try:
|
|
40
|
+
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
|
|
41
|
+
except ImportError as e:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
'Please install garf-executors with GCP support '
|
|
44
|
+
'- `pip install garf-executors[gcp]`'
|
|
45
|
+
) from e
|
|
46
|
+
|
|
47
|
+
cloud_span_processor = BatchSpanProcessor(
|
|
48
|
+
CloudTraceSpanExporter(project_id=gcp_project_id)
|
|
49
|
+
)
|
|
50
|
+
tracer_provider.add_span_processor(cloud_span_processor)
|
|
51
|
+
else:
|
|
52
|
+
otlp_processor = BatchSpanProcessor(
|
|
53
|
+
OTLPSpanExporter(endpoint=otel_endpoint, insecure=True)
|
|
54
|
+
)
|
|
55
|
+
tracer_provider.add_span_processor(otlp_processor)
|
|
56
|
+
|
|
42
57
|
trace.set_tracer_provider(tracer_provider)
|