garf-executors 0.2.0__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {garf_executors-0.2.0 → garf_executors-1.0.7}/PKG-INFO +23 -7
- {garf_executors-0.2.0 → garf_executors-1.0.7}/README.md +8 -2
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/__init__.py +6 -6
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/api_executor.py +32 -11
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/bq_executor.py +42 -32
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/config.py +4 -3
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/cli.py +66 -11
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/grpc_server.py +21 -6
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/server.py +21 -12
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/tracer.py +20 -5
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/execution_context.py +6 -5
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/executor.py +4 -4
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/fetchers.py +7 -5
- garf_executors-1.0.7/garf/executors/garf_pb2.py +51 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/garf_pb2_grpc.py +45 -2
- garf_executors-1.0.7/garf/executors/query_processor.py +77 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/sql_executor.py +24 -11
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/telemetry.py +1 -1
- garf_executors-1.0.7/garf/executors/workflow.py +109 -0
- garf_executors-1.0.7/garf_executors/__init__.py +25 -0
- garf_executors-1.0.7/garf_executors/api_executor.py +25 -0
- garf_executors-1.0.7/garf_executors/bq_executor.py +25 -0
- garf_executors-1.0.7/garf_executors/config.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/__init__.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/cli.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/grcp_server.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/server.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/tracer.py +25 -0
- garf_executors-1.0.7/garf_executors/entrypoints/utils.py +25 -0
- garf_executors-1.0.7/garf_executors/exceptions.py +25 -0
- garf_executors-1.0.7/garf_executors/execution_context.py +25 -0
- garf_executors-1.0.7/garf_executors/executor.py +25 -0
- garf_executors-1.0.7/garf_executors/fetchers.py +25 -0
- garf_executors-1.0.7/garf_executors/sql_executor.py +25 -0
- garf_executors-1.0.7/garf_executors/telemetry.py +25 -0
- garf_executors-1.0.7/garf_executors/workflow.py +25 -0
- {garf_executors-0.2.0 → garf_executors-1.0.7}/garf_executors.egg-info/PKG-INFO +23 -7
- garf_executors-1.0.7/garf_executors.egg-info/SOURCES.txt +45 -0
- garf_executors-1.0.7/garf_executors.egg-info/entry_points.txt +2 -0
- {garf_executors-0.2.0 → garf_executors-1.0.7}/garf_executors.egg-info/requires.txt +16 -4
- {garf_executors-0.2.0 → garf_executors-1.0.7}/garf_executors.egg-info/top_level.txt +1 -0
- {garf_executors-0.2.0 → garf_executors-1.0.7}/pyproject.toml +22 -6
- garf_executors-0.2.0/garf_executors/garf_pb2.py +0 -45
- garf_executors-0.2.0/garf_executors.egg-info/SOURCES.txt +0 -26
- garf_executors-0.2.0/garf_executors.egg-info/entry_points.txt +0 -2
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/__init__.py +0 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/utils.py +0 -0
- {garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/exceptions.py +0 -0
- {garf_executors-0.2.0 → garf_executors-1.0.7}/garf_executors.egg-info/dependency_links.txt +0 -0
- {garf_executors-0.2.0 → garf_executors-1.0.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: garf-executors
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.7
|
|
4
4
|
Summary: Executes queries against API and writes data to local/remote storage.
|
|
5
5
|
Author-email: "Google Inc. (gTech gPS CSE team)" <no-reply@google.com>, Andrei Markin <andrey.markin.ppc@gmail.com>
|
|
6
6
|
License: Apache 2.0
|
|
@@ -17,25 +17,35 @@ Classifier: Operating System :: OS Independent
|
|
|
17
17
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
18
18
|
Requires-Python: >=3.9
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
|
-
Requires-Dist: garf-core
|
|
21
|
-
Requires-Dist: garf-io
|
|
20
|
+
Requires-Dist: garf-core>=1.0.0
|
|
21
|
+
Requires-Dist: garf-io>=1.0.0
|
|
22
22
|
Requires-Dist: pyyaml
|
|
23
23
|
Requires-Dist: pydantic
|
|
24
24
|
Requires-Dist: opentelemetry-api
|
|
25
25
|
Requires-Dist: opentelemetry-sdk
|
|
26
|
+
Requires-Dist: opentelemetry-exporter-otlp
|
|
26
27
|
Provides-Extra: bq
|
|
27
28
|
Requires-Dist: garf-io[bq]; extra == "bq"
|
|
28
29
|
Requires-Dist: pandas; extra == "bq"
|
|
29
30
|
Requires-Dist: google-cloud-logging; extra == "bq"
|
|
31
|
+
Requires-Dist: smart_open[gcs]; extra == "bq"
|
|
30
32
|
Provides-Extra: sql
|
|
31
33
|
Requires-Dist: garf-io[sqlalchemy]; extra == "sql"
|
|
32
34
|
Requires-Dist: pandas; extra == "sql"
|
|
35
|
+
Provides-Extra: gcp
|
|
36
|
+
Requires-Dist: opentelemetry-exporter-gcp-trace; extra == "gcp"
|
|
33
37
|
Provides-Extra: server
|
|
34
38
|
Requires-Dist: fastapi[standard]; extra == "server"
|
|
35
39
|
Requires-Dist: opentelemetry-instrumentation-fastapi; extra == "server"
|
|
36
|
-
Requires-Dist:
|
|
40
|
+
Requires-Dist: typer; extra == "server"
|
|
41
|
+
Requires-Dist: grpcio-reflection; extra == "server"
|
|
42
|
+
Provides-Extra: tests
|
|
43
|
+
Requires-Dist: pytest; extra == "tests"
|
|
44
|
+
Requires-Dist: pytest-mock; extra == "tests"
|
|
45
|
+
Requires-Dist: pytest-xdist; extra == "tests"
|
|
46
|
+
Requires-Dist: pytest-grpc; extra == "tests"
|
|
37
47
|
Provides-Extra: all
|
|
38
|
-
Requires-Dist: garf-executors[bq,server,sql]; extra == "all"
|
|
48
|
+
Requires-Dist: garf-executors[bq,gcp,server,sql]; extra == "all"
|
|
39
49
|
|
|
40
50
|
# `garf-executors` - One stop-shop for interacting with Reporting APIs.
|
|
41
51
|
|
|
@@ -64,8 +74,14 @@ garf <QUERIES> --source <API_SOURCE> \
|
|
|
64
74
|
where
|
|
65
75
|
|
|
66
76
|
* `<QUERIES>`- local or remote path(s) to files with queries.
|
|
67
|
-
*
|
|
68
|
-
*
|
|
77
|
+
* `source`- type of API to use. Based on that the appropriate report fetcher will be initialized. Explore supported APIs [here](https://google.github.io/garf/fetchers/overview/)
|
|
78
|
+
* `output` - output supported by [`garf-io` library](https://google.github.io/garf/usage/writers/).
|
|
69
79
|
|
|
70
80
|
If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
|
|
71
81
|
> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.
|
|
82
|
+
|
|
83
|
+
## Documentation
|
|
84
|
+
|
|
85
|
+
Explore full documentation working with `garf-executors`
|
|
86
|
+
|
|
87
|
+
* [Documentation](https://google.github.io/garf/usage/executors/)
|
|
@@ -25,8 +25,14 @@ garf <QUERIES> --source <API_SOURCE> \
|
|
|
25
25
|
where
|
|
26
26
|
|
|
27
27
|
* `<QUERIES>`- local or remote path(s) to files with queries.
|
|
28
|
-
*
|
|
29
|
-
*
|
|
28
|
+
* `source`- type of API to use. Based on that the appropriate report fetcher will be initialized. Explore supported APIs [here](https://google.github.io/garf/fetchers/overview/)
|
|
29
|
+
* `output` - output supported by [`garf-io` library](https://google.github.io/garf/usage/writers/).
|
|
30
30
|
|
|
31
31
|
If your report fetcher requires additional parameters you can pass them via key value pairs under `--source.` argument, i.e.`--source.regionCode='US'` - to get data only from *US*.
|
|
32
32
|
> Concrete `--source` parameters are dependent on a particular report fetcher and should be looked up in a documentation for this fetcher.
|
|
33
|
+
|
|
34
|
+
## Documentation
|
|
35
|
+
|
|
36
|
+
Explore full documentation working with `garf-executors`
|
|
37
|
+
|
|
38
|
+
* [Documentation](https://google.github.io/garf/usage/executors/)
|
|
@@ -17,9 +17,9 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import importlib
|
|
19
19
|
|
|
20
|
-
from
|
|
21
|
-
from
|
|
22
|
-
from
|
|
20
|
+
from garf.executors import executor, fetchers
|
|
21
|
+
from garf.executors.api_executor import ApiExecutionContext, ApiQueryExecutor
|
|
22
|
+
from garf.executors.telemetry import tracer
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@tracer.start_as_current_span('setup_executor')
|
|
@@ -31,10 +31,10 @@ def setup_executor(
|
|
|
31
31
|
) -> type[executor.Executor]:
|
|
32
32
|
"""Initializes executors based on a source and parameters."""
|
|
33
33
|
if source == 'bq':
|
|
34
|
-
bq_executor = importlib.import_module('
|
|
34
|
+
bq_executor = importlib.import_module('garf.executors.bq_executor')
|
|
35
35
|
query_executor = bq_executor.BigQueryExecutor(**fetcher_parameters)
|
|
36
36
|
elif source == 'sqldb':
|
|
37
|
-
sql_executor = importlib.import_module('
|
|
37
|
+
sql_executor = importlib.import_module('garf.executors.sql_executor')
|
|
38
38
|
query_executor = (
|
|
39
39
|
sql_executor.SqlAlchemyQueryExecutor.from_connection_string(
|
|
40
40
|
fetcher_parameters.get('connection_string')
|
|
@@ -57,4 +57,4 @@ __all__ = [
|
|
|
57
57
|
'ApiExecutionContext',
|
|
58
58
|
]
|
|
59
59
|
|
|
60
|
-
__version__ = '0.
|
|
60
|
+
__version__ = '1.0.7'
|
|
@@ -21,13 +21,19 @@ GarfReport and saving it to local/remote storage.
|
|
|
21
21
|
from __future__ import annotations
|
|
22
22
|
|
|
23
23
|
import logging
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
import pathlib
|
|
25
|
+
|
|
26
|
+
from garf.core import report_fetcher
|
|
27
|
+
from garf.executors import (
|
|
28
|
+
exceptions,
|
|
29
|
+
execution_context,
|
|
30
|
+
executor,
|
|
31
|
+
fetchers,
|
|
32
|
+
query_processor,
|
|
33
|
+
)
|
|
34
|
+
from garf.executors.telemetry import tracer
|
|
26
35
|
from opentelemetry import trace
|
|
27
36
|
|
|
28
|
-
from garf_executors import exceptions, execution_context, executor, fetchers
|
|
29
|
-
from garf_executors.telemetry import tracer
|
|
30
|
-
|
|
31
37
|
logger = logging.getLogger(__name__)
|
|
32
38
|
|
|
33
39
|
|
|
@@ -58,12 +64,22 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
58
64
|
|
|
59
65
|
@classmethod
|
|
60
66
|
def from_fetcher_alias(
|
|
61
|
-
cls,
|
|
67
|
+
cls,
|
|
68
|
+
source: str,
|
|
69
|
+
fetcher_parameters: dict[str, str] | None = None,
|
|
70
|
+
enable_cache: bool = False,
|
|
71
|
+
cache_ttl_seconds: int = 3600,
|
|
62
72
|
) -> ApiQueryExecutor:
|
|
63
73
|
if not fetcher_parameters:
|
|
64
74
|
fetcher_parameters = {}
|
|
65
75
|
concrete_api_fetcher = fetchers.get_report_fetcher(source)
|
|
66
|
-
return ApiQueryExecutor(
|
|
76
|
+
return ApiQueryExecutor(
|
|
77
|
+
fetcher=concrete_api_fetcher(
|
|
78
|
+
**fetcher_parameters,
|
|
79
|
+
enable_cache=enable_cache,
|
|
80
|
+
cache_ttl_seconds=cache_ttl_seconds,
|
|
81
|
+
)
|
|
82
|
+
)
|
|
67
83
|
|
|
68
84
|
@tracer.start_as_current_span('api.execute')
|
|
69
85
|
def execute(
|
|
@@ -85,16 +101,21 @@ class ApiQueryExecutor(executor.Executor):
|
|
|
85
101
|
Raises:
|
|
86
102
|
GarfExecutorError: When failed to execute query.
|
|
87
103
|
"""
|
|
104
|
+
context = query_processor.process_gquery(context)
|
|
88
105
|
span = trace.get_current_span()
|
|
89
|
-
span.set_attribute('fetcher', self.fetcher.__class__.__name__)
|
|
90
|
-
span.set_attribute(
|
|
106
|
+
span.set_attribute('fetcher.class', self.fetcher.__class__.__name__)
|
|
107
|
+
span.set_attribute(
|
|
108
|
+
'api.client.class', self.fetcher.api_client.__class__.__name__
|
|
109
|
+
)
|
|
91
110
|
try:
|
|
92
|
-
span.set_attribute('
|
|
93
|
-
span.set_attribute('
|
|
111
|
+
span.set_attribute('query.title', title)
|
|
112
|
+
span.set_attribute('query.text', query)
|
|
94
113
|
logger.debug('starting query %s', query)
|
|
114
|
+
title = pathlib.Path(title).name.split('.')[0]
|
|
95
115
|
results = self.fetcher.fetch(
|
|
96
116
|
query_specification=query,
|
|
97
117
|
args=context.query_parameters,
|
|
118
|
+
title=title,
|
|
98
119
|
**context.fetcher_parameters,
|
|
99
120
|
)
|
|
100
121
|
writer_clients = context.writer_clients
|
|
@@ -28,11 +28,11 @@ except ImportError as e:
|
|
|
28
28
|
|
|
29
29
|
import logging
|
|
30
30
|
|
|
31
|
-
from
|
|
31
|
+
from garf.core import query_editor, report
|
|
32
|
+
from garf.executors import exceptions, execution_context, executor
|
|
33
|
+
from garf.executors.telemetry import tracer
|
|
32
34
|
from google.cloud import exceptions as google_cloud_exceptions
|
|
33
|
-
|
|
34
|
-
from garf_executors import exceptions, execution_context, executor
|
|
35
|
-
from garf_executors.telemetry import tracer
|
|
35
|
+
from opentelemetry import trace
|
|
36
36
|
|
|
37
37
|
logger = logging.getLogger(__name__)
|
|
38
38
|
|
|
@@ -54,6 +54,7 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
54
54
|
self,
|
|
55
55
|
project_id: str | None = os.getenv('GOOGLE_CLOUD_PROJECT'),
|
|
56
56
|
location: str | None = None,
|
|
57
|
+
**kwargs: str,
|
|
57
58
|
) -> None:
|
|
58
59
|
"""Initializes BigQueryExecutor.
|
|
59
60
|
|
|
@@ -68,6 +69,7 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
68
69
|
)
|
|
69
70
|
self.project_id = project_id
|
|
70
71
|
self.location = location
|
|
72
|
+
super().__init__()
|
|
71
73
|
|
|
72
74
|
@property
|
|
73
75
|
def client(self) -> bigquery.Client:
|
|
@@ -93,41 +95,49 @@ class BigQueryExecutor(executor.Executor, query_editor.TemplateProcessorMixin):
|
|
|
93
95
|
Returns:
|
|
94
96
|
Report with data if query returns some data otherwise empty Report.
|
|
95
97
|
"""
|
|
98
|
+
span = trace.get_current_span()
|
|
99
|
+
span.set_attribute('query.title', title)
|
|
100
|
+
span.set_attribute('query.text', query)
|
|
101
|
+
logger.info('Executing script: %s', title)
|
|
96
102
|
query_text = self.replace_params_template(query, context.query_parameters)
|
|
97
103
|
self.create_datasets(context.query_parameters.macro)
|
|
98
104
|
job = self.client.query(query_text)
|
|
99
105
|
try:
|
|
100
106
|
result = job.result()
|
|
107
|
+
except google_cloud_exceptions.GoogleCloudError as e:
|
|
108
|
+
raise BigQueryExecutorError(
|
|
109
|
+
f'Failed to execute query {title}: Reason: {e}'
|
|
110
|
+
) from e
|
|
101
111
|
logger.debug('%s launched successfully', title)
|
|
102
|
-
|
|
103
|
-
|
|
112
|
+
if result.total_rows:
|
|
113
|
+
results = report.GarfReport.from_pandas(result.to_dataframe())
|
|
114
|
+
else:
|
|
115
|
+
results = report.GarfReport()
|
|
116
|
+
if context.writer and results:
|
|
117
|
+
writer_clients = context.writer_clients
|
|
118
|
+
if not writer_clients:
|
|
119
|
+
logger.warning('No writers configured, skipping write operation')
|
|
104
120
|
else:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
logger.info('%s executed successfully', title)
|
|
126
|
-
# Return the last writer's result for backward compatibility
|
|
127
|
-
return writing_results[-1] if writing_results else None
|
|
128
|
-
return results
|
|
129
|
-
except google_cloud_exceptions.GoogleCloudError as e:
|
|
130
|
-
raise BigQueryExecutorError(e) from e
|
|
121
|
+
writing_results = []
|
|
122
|
+
for writer_client in writer_clients:
|
|
123
|
+
logger.debug(
|
|
124
|
+
'Start writing data for query %s via %s writer',
|
|
125
|
+
title,
|
|
126
|
+
type(writer_client),
|
|
127
|
+
)
|
|
128
|
+
writing_result = writer_client.write(results, title)
|
|
129
|
+
logger.debug(
|
|
130
|
+
'Finish writing data for query %s via %s writer',
|
|
131
|
+
title,
|
|
132
|
+
type(writer_client),
|
|
133
|
+
)
|
|
134
|
+
writing_results.append(writing_result)
|
|
135
|
+
# Return the last writer's result for backward compatibility
|
|
136
|
+
logger.info('%s executed successfully', title)
|
|
137
|
+
return writing_results[-1] if writing_results else None
|
|
138
|
+
logger.info('%s executed successfully', title)
|
|
139
|
+
span.set_attribute('execute.num_results', len(results))
|
|
140
|
+
return results
|
|
131
141
|
|
|
132
142
|
@tracer.start_as_current_span('bq.create_datasets')
|
|
133
143
|
def create_datasets(self, macros: dict | None) -> None:
|
|
@@ -24,8 +24,7 @@ import pathlib
|
|
|
24
24
|
import pydantic
|
|
25
25
|
import smart_open
|
|
26
26
|
import yaml
|
|
27
|
-
|
|
28
|
-
from garf_executors.execution_context import ExecutionContext
|
|
27
|
+
from garf.executors.execution_context import ExecutionContext
|
|
29
28
|
|
|
30
29
|
|
|
31
30
|
class Config(pydantic.BaseModel):
|
|
@@ -47,5 +46,7 @@ class Config(pydantic.BaseModel):
|
|
|
47
46
|
def save(self, path: str | pathlib.Path | os.PathLike[str]) -> str:
|
|
48
47
|
"""Saves config to local or remote yaml file."""
|
|
49
48
|
with smart_open.open(path, 'w', encoding='utf-8') as f:
|
|
50
|
-
yaml.dump(
|
|
49
|
+
yaml.dump(
|
|
50
|
+
self.model_dump(exclude_none=True).get('sources'), f, encoding='utf-8'
|
|
51
|
+
)
|
|
51
52
|
return f'Config is saved to {str(path)}'
|
{garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/cli.py
RENAMED
|
@@ -21,15 +21,17 @@ from __future__ import annotations
|
|
|
21
21
|
|
|
22
22
|
import argparse
|
|
23
23
|
import logging
|
|
24
|
+
import pathlib
|
|
25
|
+
import re
|
|
24
26
|
import sys
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
import
|
|
29
|
-
from
|
|
30
|
-
from
|
|
31
|
-
from
|
|
32
|
-
from
|
|
28
|
+
import garf.executors
|
|
29
|
+
from garf.executors import config, exceptions, workflow
|
|
30
|
+
from garf.executors.entrypoints import utils
|
|
31
|
+
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
32
|
+
from garf.executors.telemetry import tracer
|
|
33
|
+
from garf.io import reader
|
|
34
|
+
from opentelemetry import trace
|
|
33
35
|
|
|
34
36
|
initialize_tracer()
|
|
35
37
|
|
|
@@ -39,6 +41,7 @@ def main():
|
|
|
39
41
|
parser = argparse.ArgumentParser()
|
|
40
42
|
parser.add_argument('query', nargs='*')
|
|
41
43
|
parser.add_argument('-c', '--config', dest='config', default=None)
|
|
44
|
+
parser.add_argument('-w', '--workflow', dest='workflow', default=None)
|
|
42
45
|
parser.add_argument('--source', dest='source', default=None)
|
|
43
46
|
parser.add_argument('--output', dest='output', default='console')
|
|
44
47
|
parser.add_argument('--input', dest='input', default='file')
|
|
@@ -70,18 +73,70 @@ def main():
|
|
|
70
73
|
parser.set_defaults(dry_run=False)
|
|
71
74
|
args, kwargs = parser.parse_known_args()
|
|
72
75
|
|
|
76
|
+
span = trace.get_current_span()
|
|
77
|
+
command_args = ' '.join(sys.argv[1:])
|
|
78
|
+
span.set_attribute('cli.command', f'garf {command_args}')
|
|
73
79
|
if args.version:
|
|
74
|
-
print(
|
|
80
|
+
print(garf.executors.__version__)
|
|
75
81
|
sys.exit()
|
|
76
82
|
logger = utils.init_logging(
|
|
77
83
|
loglevel=args.loglevel.upper(), logger_type=args.logger, name=args.log_name
|
|
78
84
|
)
|
|
85
|
+
reader_client = reader.create_reader(args.input)
|
|
86
|
+
if workflow_file := args.workflow:
|
|
87
|
+
wf_parent = pathlib.Path.cwd() / pathlib.Path(workflow_file).parent
|
|
88
|
+
execution_workflow = workflow.Workflow.from_file(workflow_file)
|
|
89
|
+
for i, step in enumerate(execution_workflow.steps, 1):
|
|
90
|
+
step_span_name = f'{i}-{step.fetcher}'
|
|
91
|
+
if step.alias:
|
|
92
|
+
step_span_name = f'{step_span_name}-{step.alias}'
|
|
93
|
+
with tracer.start_as_current_span(step_span_name):
|
|
94
|
+
query_executor = garf.executors.setup_executor(
|
|
95
|
+
source=step.fetcher,
|
|
96
|
+
fetcher_parameters=step.fetcher_parameters,
|
|
97
|
+
enable_cache=args.enable_cache,
|
|
98
|
+
cache_ttl_seconds=args.cache_ttl_seconds,
|
|
99
|
+
)
|
|
100
|
+
batch = {}
|
|
101
|
+
if not (queries := step.queries):
|
|
102
|
+
logger.error('Please provide one or more queries to run')
|
|
103
|
+
raise exceptions.GarfExecutorError(
|
|
104
|
+
'Please provide one or more queries to run'
|
|
105
|
+
)
|
|
106
|
+
for query in queries:
|
|
107
|
+
if isinstance(query, garf.executors.workflow.QueryPath):
|
|
108
|
+
if re.match(
|
|
109
|
+
'^(http|gs|s3|aruze|hdfs|webhdfs|ssh|scp|sftp)', query.path
|
|
110
|
+
):
|
|
111
|
+
batch[query.path] = reader_client.read(query.path)
|
|
112
|
+
else:
|
|
113
|
+
query_path = wf_parent / pathlib.Path(query.path)
|
|
114
|
+
if not query_path.exists():
|
|
115
|
+
raise workflow.GarfWorkflowError(
|
|
116
|
+
f'Query: {query_path} not found'
|
|
117
|
+
)
|
|
118
|
+
batch[query.path] = reader_client.read(query_path)
|
|
119
|
+
elif isinstance(query, garf.executors.workflow.QueryFolder):
|
|
120
|
+
query_path = wf_parent / pathlib.Path(query.folder)
|
|
121
|
+
if not query_path.exists():
|
|
122
|
+
raise workflow.GarfWorkflowError(
|
|
123
|
+
f'Folder: {query_path} not found'
|
|
124
|
+
)
|
|
125
|
+
for p in query_path.rglob('*'):
|
|
126
|
+
if p.suffix == '.sql':
|
|
127
|
+
batch[p.stem] = reader_client.read(p)
|
|
128
|
+
else:
|
|
129
|
+
batch[query.query.title] = query.query.text
|
|
130
|
+
query_executor.execute_batch(
|
|
131
|
+
batch, step.context, args.parallel_threshold
|
|
132
|
+
)
|
|
133
|
+
sys.exit()
|
|
134
|
+
|
|
79
135
|
if not args.query:
|
|
80
136
|
logger.error('Please provide one or more queries to run')
|
|
81
137
|
raise exceptions.GarfExecutorError(
|
|
82
138
|
'Please provide one or more queries to run'
|
|
83
139
|
)
|
|
84
|
-
reader_client = reader.create_reader(args.input)
|
|
85
140
|
if config_file := args.config:
|
|
86
141
|
execution_config = config.Config.from_file(config_file)
|
|
87
142
|
if not (context := execution_config.sources.get(args.source)):
|
|
@@ -99,7 +154,7 @@ def main():
|
|
|
99
154
|
for output in outputs:
|
|
100
155
|
writer_parameters.update(extra_parameters.get(output))
|
|
101
156
|
|
|
102
|
-
context =
|
|
157
|
+
context = garf.executors.api_executor.ApiExecutionContext(
|
|
103
158
|
query_parameters={
|
|
104
159
|
'macro': extra_parameters.get('macro'),
|
|
105
160
|
'template': extra_parameters.get('template'),
|
|
@@ -108,7 +163,7 @@ def main():
|
|
|
108
163
|
writer_parameters=writer_parameters,
|
|
109
164
|
fetcher_parameters=source_parameters,
|
|
110
165
|
)
|
|
111
|
-
query_executor =
|
|
166
|
+
query_executor = garf.executors.setup_executor(
|
|
112
167
|
source=args.source,
|
|
113
168
|
fetcher_parameters=context.fetcher_parameters,
|
|
114
169
|
enable_cache=args.enable_cache,
|
|
@@ -18,21 +18,20 @@ import argparse
|
|
|
18
18
|
import logging
|
|
19
19
|
from concurrent import futures
|
|
20
20
|
|
|
21
|
+
import garf.executors
|
|
21
22
|
import grpc
|
|
23
|
+
from garf.executors import garf_pb2, garf_pb2_grpc
|
|
24
|
+
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
22
25
|
from google.protobuf.json_format import MessageToDict
|
|
23
26
|
from grpc_reflection.v1alpha import reflection
|
|
24
27
|
|
|
25
|
-
import garf_executors
|
|
26
|
-
from garf_executors import garf_pb2, garf_pb2_grpc
|
|
27
|
-
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
28
|
-
|
|
29
28
|
|
|
30
29
|
class GarfService(garf_pb2_grpc.GarfService):
|
|
31
30
|
def Execute(self, request, context):
|
|
32
|
-
query_executor =
|
|
31
|
+
query_executor = garf.executors.setup_executor(
|
|
33
32
|
request.source, request.context.fetcher_parameters
|
|
34
33
|
)
|
|
35
|
-
execution_context =
|
|
34
|
+
execution_context = garf.executors.execution_context.ExecutionContext(
|
|
36
35
|
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
37
36
|
)
|
|
38
37
|
result = query_executor.execute(
|
|
@@ -42,6 +41,22 @@ class GarfService(garf_pb2_grpc.GarfService):
|
|
|
42
41
|
)
|
|
43
42
|
return garf_pb2.ExecuteResponse(results=[result])
|
|
44
43
|
|
|
44
|
+
def Fetch(self, request, context):
|
|
45
|
+
query_executor = garf.executors.setup_executor(
|
|
46
|
+
request.source, request.context.fetcher_parameters
|
|
47
|
+
)
|
|
48
|
+
execution_context = garf.executors.execution_context.ExecutionContext(
|
|
49
|
+
**MessageToDict(request.context, preserving_proto_field_name=True)
|
|
50
|
+
)
|
|
51
|
+
result = query_executor.fetcher.fetch(
|
|
52
|
+
query_specification=request.query,
|
|
53
|
+
title=request.title,
|
|
54
|
+
args=execution_context.query_parameters,
|
|
55
|
+
)
|
|
56
|
+
return garf_pb2.FetchResponse(
|
|
57
|
+
columns=result.column_names, rows=result.to_list(row_type='dict')
|
|
58
|
+
)
|
|
59
|
+
|
|
45
60
|
|
|
46
61
|
if __name__ == '__main__':
|
|
47
62
|
parser = argparse.ArgumentParser()
|
{garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/server.py
RENAMED
|
@@ -17,18 +17,20 @@
|
|
|
17
17
|
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
import fastapi
|
|
20
|
+
import garf.executors
|
|
20
21
|
import pydantic
|
|
22
|
+
import typer
|
|
21
23
|
import uvicorn
|
|
22
|
-
from
|
|
24
|
+
from garf.executors import exceptions
|
|
25
|
+
from garf.executors.entrypoints.tracer import initialize_tracer
|
|
26
|
+
from garf.io import reader
|
|
23
27
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
|
24
|
-
|
|
25
|
-
import garf_executors
|
|
26
|
-
from garf_executors import exceptions
|
|
27
|
-
from garf_executors.entrypoints.tracer import initialize_tracer
|
|
28
|
+
from typing_extensions import Annotated
|
|
28
29
|
|
|
29
30
|
initialize_tracer()
|
|
30
31
|
app = fastapi.FastAPI()
|
|
31
32
|
FastAPIInstrumentor.instrument_app(app)
|
|
33
|
+
typer_app = typer.Typer()
|
|
32
34
|
|
|
33
35
|
|
|
34
36
|
class ApiExecutorRequest(pydantic.BaseModel):
|
|
@@ -46,7 +48,7 @@ class ApiExecutorRequest(pydantic.BaseModel):
|
|
|
46
48
|
title: Optional[str] = None
|
|
47
49
|
query: Optional[str] = None
|
|
48
50
|
query_path: Optional[Union[str, list[str]]] = None
|
|
49
|
-
context:
|
|
51
|
+
context: garf.executors.api_executor.ApiExecutionContext
|
|
50
52
|
|
|
51
53
|
@pydantic.model_validator(mode='after')
|
|
52
54
|
def check_query_specified(self):
|
|
@@ -75,18 +77,18 @@ class ApiExecutorResponse(pydantic.BaseModel):
|
|
|
75
77
|
|
|
76
78
|
@app.get('/api/version')
|
|
77
79
|
async def version() -> str:
|
|
78
|
-
return
|
|
80
|
+
return garf.executors.__version__
|
|
79
81
|
|
|
80
82
|
|
|
81
83
|
@app.get('/api/fetchers')
|
|
82
84
|
async def get_fetchers() -> list[str]:
|
|
83
85
|
"""Shows all available API sources."""
|
|
84
|
-
return list(
|
|
86
|
+
return list(garf.executors.fetchers.find_fetchers())
|
|
85
87
|
|
|
86
88
|
|
|
87
89
|
@app.post('/api/execute')
|
|
88
|
-
|
|
89
|
-
query_executor =
|
|
90
|
+
def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
91
|
+
query_executor = garf.executors.setup_executor(
|
|
90
92
|
request.source, request.context.fetcher_parameters
|
|
91
93
|
)
|
|
92
94
|
result = query_executor.execute(request.query, request.title, request.context)
|
|
@@ -95,7 +97,7 @@ async def execute(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
95
97
|
|
|
96
98
|
@app.post('/api/execute:batch')
|
|
97
99
|
def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
98
|
-
query_executor =
|
|
100
|
+
query_executor = garf.executors.setup_executor(
|
|
99
101
|
request.source, request.context.fetcher_parameters
|
|
100
102
|
)
|
|
101
103
|
reader_client = reader.FileReader()
|
|
@@ -104,5 +106,12 @@ def execute_batch(request: ApiExecutorRequest) -> ApiExecutorResponse:
|
|
|
104
106
|
return ApiExecutorResponse(results=results)
|
|
105
107
|
|
|
106
108
|
|
|
109
|
+
@typer_app.command()
|
|
110
|
+
def main(
|
|
111
|
+
port: Annotated[int, typer.Option(help='Port to start the server')] = 8000,
|
|
112
|
+
):
|
|
113
|
+
uvicorn.run(app, port=port)
|
|
114
|
+
|
|
115
|
+
|
|
107
116
|
if __name__ == '__main__':
|
|
108
|
-
|
|
117
|
+
typer_app()
|
{garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/entrypoints/tracer.py
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -35,8 +35,23 @@ def initialize_tracer():
|
|
|
35
35
|
tracer_provider = TracerProvider(resource=resource)
|
|
36
36
|
|
|
37
37
|
if otel_endpoint := os.getenv('OTEL_EXPORTER_OTLP_ENDPOINT'):
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
38
|
+
if gcp_project_id := os.getenv('OTEL_EXPORTER_GCP_PROJECT_ID'):
|
|
39
|
+
try:
|
|
40
|
+
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
|
|
41
|
+
except ImportError as e:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
'Please install garf-executors with GCP support '
|
|
44
|
+
'- `pip install garf-executors[gcp]`'
|
|
45
|
+
) from e
|
|
46
|
+
|
|
47
|
+
cloud_span_processor = BatchSpanProcessor(
|
|
48
|
+
CloudTraceSpanExporter(project_id=gcp_project_id)
|
|
49
|
+
)
|
|
50
|
+
tracer_provider.add_span_processor(cloud_span_processor)
|
|
51
|
+
else:
|
|
52
|
+
otlp_processor = BatchSpanProcessor(
|
|
53
|
+
OTLPSpanExporter(endpoint=otel_endpoint, insecure=True)
|
|
54
|
+
)
|
|
55
|
+
tracer_provider.add_span_processor(otlp_processor)
|
|
56
|
+
|
|
42
57
|
trace.set_tracer_provider(tracer_provider)
|
{garf_executors-0.2.0/garf_executors → garf_executors-1.0.7/garf/executors}/execution_context.py
RENAMED
|
@@ -20,13 +20,14 @@ from __future__ import annotations
|
|
|
20
20
|
|
|
21
21
|
import os
|
|
22
22
|
import pathlib
|
|
23
|
+
from typing import Any
|
|
23
24
|
|
|
24
25
|
import pydantic
|
|
25
26
|
import smart_open
|
|
26
27
|
import yaml
|
|
27
|
-
from
|
|
28
|
-
from
|
|
29
|
-
from
|
|
28
|
+
from garf.core import query_editor
|
|
29
|
+
from garf.io import writer
|
|
30
|
+
from garf.io.writers import abs_writer
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
class ExecutionContext(pydantic.BaseModel):
|
|
@@ -42,8 +43,8 @@ class ExecutionContext(pydantic.BaseModel):
|
|
|
42
43
|
query_parameters: query_editor.GarfQueryParameters | None = pydantic.Field(
|
|
43
44
|
default_factory=dict
|
|
44
45
|
)
|
|
45
|
-
fetcher_parameters: dict[str,
|
|
46
|
-
|
|
46
|
+
fetcher_parameters: dict[str, Any] | None = pydantic.Field(
|
|
47
|
+
default_factory=dict
|
|
47
48
|
)
|
|
48
49
|
writer: str | list[str] | None = None
|
|
49
50
|
writer_parameters: dict[str, str] | None = pydantic.Field(
|
|
@@ -18,12 +18,11 @@ import asyncio
|
|
|
18
18
|
import inspect
|
|
19
19
|
from typing import Optional
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from garf.core import report_fetcher
|
|
22
|
+
from garf.executors import execution_context, query_processor
|
|
23
|
+
from garf.executors.telemetry import tracer
|
|
22
24
|
from opentelemetry import trace
|
|
23
25
|
|
|
24
|
-
from garf_executors import execution_context
|
|
25
|
-
from garf_executors.telemetry import tracer
|
|
26
|
-
|
|
27
26
|
|
|
28
27
|
class Executor:
|
|
29
28
|
"""Defines common functionality between executors."""
|
|
@@ -113,6 +112,7 @@ def _handle_processors(
|
|
|
113
112
|
processors: dict[str, report_fetcher.Processor],
|
|
114
113
|
context: execution_context.ExecutionContext,
|
|
115
114
|
) -> None:
|
|
115
|
+
context = query_processor.process_gquery(context)
|
|
116
116
|
for k, processor in processors.items():
|
|
117
117
|
processor_signature = list(inspect.signature(processor).parameters.keys())
|
|
118
118
|
if k in context.fetcher_parameters:
|