awx-zipline-ai 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. agent/__init__.py +1 -0
  2. agent/constants.py +15 -0
  3. agent/ttypes.py +1684 -0
  4. ai/__init__.py +0 -0
  5. ai/chronon/__init__.py +0 -0
  6. ai/chronon/airflow_helpers.py +251 -0
  7. ai/chronon/api/__init__.py +1 -0
  8. ai/chronon/api/common/__init__.py +1 -0
  9. ai/chronon/api/common/constants.py +15 -0
  10. ai/chronon/api/common/ttypes.py +1844 -0
  11. ai/chronon/api/constants.py +15 -0
  12. ai/chronon/api/ttypes.py +3624 -0
  13. ai/chronon/cli/compile/column_hashing.py +313 -0
  14. ai/chronon/cli/compile/compile_context.py +177 -0
  15. ai/chronon/cli/compile/compiler.py +160 -0
  16. ai/chronon/cli/compile/conf_validator.py +590 -0
  17. ai/chronon/cli/compile/display/class_tracker.py +112 -0
  18. ai/chronon/cli/compile/display/compile_status.py +95 -0
  19. ai/chronon/cli/compile/display/compiled_obj.py +12 -0
  20. ai/chronon/cli/compile/display/console.py +3 -0
  21. ai/chronon/cli/compile/display/diff_result.py +46 -0
  22. ai/chronon/cli/compile/fill_templates.py +40 -0
  23. ai/chronon/cli/compile/parse_configs.py +141 -0
  24. ai/chronon/cli/compile/parse_teams.py +238 -0
  25. ai/chronon/cli/compile/serializer.py +115 -0
  26. ai/chronon/cli/git_utils.py +156 -0
  27. ai/chronon/cli/logger.py +61 -0
  28. ai/chronon/constants.py +3 -0
  29. ai/chronon/eval/__init__.py +122 -0
  30. ai/chronon/eval/query_parsing.py +19 -0
  31. ai/chronon/eval/sample_tables.py +100 -0
  32. ai/chronon/eval/table_scan.py +186 -0
  33. ai/chronon/fetcher/__init__.py +1 -0
  34. ai/chronon/fetcher/constants.py +15 -0
  35. ai/chronon/fetcher/ttypes.py +127 -0
  36. ai/chronon/group_by.py +692 -0
  37. ai/chronon/hub/__init__.py +1 -0
  38. ai/chronon/hub/constants.py +15 -0
  39. ai/chronon/hub/ttypes.py +1228 -0
  40. ai/chronon/join.py +566 -0
  41. ai/chronon/logger.py +24 -0
  42. ai/chronon/model.py +35 -0
  43. ai/chronon/observability/__init__.py +1 -0
  44. ai/chronon/observability/constants.py +15 -0
  45. ai/chronon/observability/ttypes.py +2192 -0
  46. ai/chronon/orchestration/__init__.py +1 -0
  47. ai/chronon/orchestration/constants.py +15 -0
  48. ai/chronon/orchestration/ttypes.py +4406 -0
  49. ai/chronon/planner/__init__.py +1 -0
  50. ai/chronon/planner/constants.py +15 -0
  51. ai/chronon/planner/ttypes.py +1686 -0
  52. ai/chronon/query.py +126 -0
  53. ai/chronon/repo/__init__.py +40 -0
  54. ai/chronon/repo/aws.py +298 -0
  55. ai/chronon/repo/cluster.py +65 -0
  56. ai/chronon/repo/compile.py +56 -0
  57. ai/chronon/repo/constants.py +164 -0
  58. ai/chronon/repo/default_runner.py +291 -0
  59. ai/chronon/repo/explore.py +421 -0
  60. ai/chronon/repo/extract_objects.py +137 -0
  61. ai/chronon/repo/gcp.py +585 -0
  62. ai/chronon/repo/gitpython_utils.py +14 -0
  63. ai/chronon/repo/hub_runner.py +171 -0
  64. ai/chronon/repo/hub_uploader.py +108 -0
  65. ai/chronon/repo/init.py +53 -0
  66. ai/chronon/repo/join_backfill.py +105 -0
  67. ai/chronon/repo/run.py +293 -0
  68. ai/chronon/repo/serializer.py +141 -0
  69. ai/chronon/repo/team_json_utils.py +46 -0
  70. ai/chronon/repo/utils.py +472 -0
  71. ai/chronon/repo/zipline.py +51 -0
  72. ai/chronon/repo/zipline_hub.py +105 -0
  73. ai/chronon/resources/gcp/README.md +174 -0
  74. ai/chronon/resources/gcp/group_bys/test/__init__.py +0 -0
  75. ai/chronon/resources/gcp/group_bys/test/data.py +34 -0
  76. ai/chronon/resources/gcp/joins/test/__init__.py +0 -0
  77. ai/chronon/resources/gcp/joins/test/data.py +30 -0
  78. ai/chronon/resources/gcp/sources/test/__init__.py +0 -0
  79. ai/chronon/resources/gcp/sources/test/data.py +23 -0
  80. ai/chronon/resources/gcp/teams.py +70 -0
  81. ai/chronon/resources/gcp/zipline-cli-install.sh +54 -0
  82. ai/chronon/source.py +88 -0
  83. ai/chronon/staging_query.py +185 -0
  84. ai/chronon/types.py +57 -0
  85. ai/chronon/utils.py +557 -0
  86. ai/chronon/windows.py +50 -0
  87. awx_zipline_ai-0.2.0.dist-info/METADATA +173 -0
  88. awx_zipline_ai-0.2.0.dist-info/RECORD +93 -0
  89. awx_zipline_ai-0.2.0.dist-info/WHEEL +5 -0
  90. awx_zipline_ai-0.2.0.dist-info/entry_points.txt +2 -0
  91. awx_zipline_ai-0.2.0.dist-info/licenses/LICENSE +202 -0
  92. awx_zipline_ai-0.2.0.dist-info/top_level.txt +3 -0
  93. jars/__init__.py +0 -0
@@ -0,0 +1,164 @@
1
+ from enum import Enum
2
+
3
+
4
+ class RunMode(str, Enum):
5
+ def __str__(self):
6
+ return self.value
7
+
8
+ BACKFILL = "backfill"
9
+ BACKFILL_LEFT = "backfill-left"
10
+ BACKFILL_FINAL = "backfill-final"
11
+ DEPLOY = "deploy"
12
+ UPLOAD = "upload"
13
+ UPLOAD_TO_KV = "upload-to-kv"
14
+ STATS_SUMMARY = "stats-summary"
15
+ LOG_SUMMARY = "log-summary"
16
+ ANALYZE = "analyze"
17
+ STREAMING = "streaming"
18
+ METADATA_UPLOAD = "metadata-upload"
19
+ FETCH = "fetch"
20
+ CONSISTENCY_METRICS_COMPUTE = "consistency-metrics-compute"
21
+ BUILD_COMPARISON_TABLE = "build-comparison-table"
22
+ COMPARE = "compare"
23
+ LOCAL_STREAMING = "local-streaming"
24
+ LOG_FLATTENER = "log-flattener"
25
+ METADATA_EXPORT = "metadata-export"
26
+ LABEL_JOIN = "label-join"
27
+ STREAMING_CLIENT = "streaming-client"
28
+ SOURCE_JOB = "source-job"
29
+ JOIN_PART_JOB = "join-part-job"
30
+ MERGE_JOB = "merge-job"
31
+ METASTORE = "metastore"
32
+ INFO = "info"
33
+
34
+
35
+ ONLINE_ARGS = "--online-jar={online_jar} --online-class={online_class} "
36
+ OFFLINE_ARGS = "--conf-path={conf_path} --end-date={ds} "
37
+ ONLINE_WRITE_ARGS = "--conf-path={conf_path} " + ONLINE_ARGS
38
+
39
+ ONLINE_OFFLINE_WRITE_ARGS = OFFLINE_ARGS + ONLINE_ARGS
40
+ ONLINE_MODES = [
41
+ RunMode.STREAMING,
42
+ RunMode.METADATA_UPLOAD,
43
+ RunMode.FETCH,
44
+ RunMode.LOCAL_STREAMING,
45
+ RunMode.STREAMING_CLIENT,
46
+ ]
47
+ SPARK_MODES = [
48
+ RunMode.BACKFILL,
49
+ RunMode.BACKFILL_LEFT,
50
+ RunMode.BACKFILL_FINAL,
51
+ RunMode.UPLOAD,
52
+ RunMode.UPLOAD_TO_KV,
53
+ RunMode.STREAMING,
54
+ RunMode.STREAMING_CLIENT,
55
+ RunMode.CONSISTENCY_METRICS_COMPUTE,
56
+ RunMode.BUILD_COMPARISON_TABLE,
57
+ RunMode.COMPARE,
58
+ RunMode.ANALYZE,
59
+ RunMode.STATS_SUMMARY,
60
+ RunMode.LOG_SUMMARY,
61
+ RunMode.LOG_FLATTENER,
62
+ RunMode.METADATA_EXPORT,
63
+ RunMode.LABEL_JOIN,
64
+ RunMode.SOURCE_JOB,
65
+ RunMode.JOIN_PART_JOB,
66
+ RunMode.MERGE_JOB,
67
+ ]
68
+ MODES_USING_EMBEDDED = [
69
+ RunMode.METADATA_UPLOAD,
70
+ RunMode.FETCH,
71
+ RunMode.LOCAL_STREAMING,
72
+ ]
73
+
74
+ # Constants for supporting multiple spark versions.
75
+ SUPPORTED_SPARK = ["2.4.0", "3.1.1", "3.2.1", "3.5.1"]
76
+ SCALA_VERSION_FOR_SPARK = {
77
+ "2.4.0": "2.11",
78
+ "3.1.1": "2.12",
79
+ "3.2.1": "2.13",
80
+ "3.5.1": "2.12",
81
+ }
82
+
83
+ MODE_ARGS = {
84
+ RunMode.BACKFILL: OFFLINE_ARGS,
85
+ RunMode.BACKFILL_LEFT: OFFLINE_ARGS,
86
+ RunMode.BACKFILL_FINAL: OFFLINE_ARGS,
87
+ RunMode.UPLOAD: OFFLINE_ARGS,
88
+ RunMode.UPLOAD_TO_KV: ONLINE_OFFLINE_WRITE_ARGS,
89
+ RunMode.STATS_SUMMARY: OFFLINE_ARGS,
90
+ RunMode.LOG_SUMMARY: OFFLINE_ARGS,
91
+ RunMode.ANALYZE: OFFLINE_ARGS,
92
+ RunMode.STREAMING: ONLINE_WRITE_ARGS,
93
+ RunMode.METADATA_UPLOAD: ONLINE_WRITE_ARGS,
94
+ RunMode.FETCH: ONLINE_ARGS,
95
+ RunMode.CONSISTENCY_METRICS_COMPUTE: OFFLINE_ARGS,
96
+ RunMode.BUILD_COMPARISON_TABLE: OFFLINE_ARGS,
97
+ RunMode.COMPARE: OFFLINE_ARGS,
98
+ RunMode.LOCAL_STREAMING: ONLINE_WRITE_ARGS + " -d",
99
+ RunMode.LOG_FLATTENER: OFFLINE_ARGS,
100
+ RunMode.METADATA_EXPORT: OFFLINE_ARGS,
101
+ RunMode.LABEL_JOIN: OFFLINE_ARGS,
102
+ RunMode.STREAMING_CLIENT: ONLINE_WRITE_ARGS,
103
+ RunMode.SOURCE_JOB: OFFLINE_ARGS,
104
+ RunMode.JOIN_PART_JOB: OFFLINE_ARGS,
105
+ RunMode.MERGE_JOB: OFFLINE_ARGS,
106
+ RunMode.METASTORE: "", # purposely left blank. we'll handle this specifically
107
+ RunMode.INFO: "",
108
+ }
109
+
110
+ ROUTES = {
111
+ "group_bys": {
112
+ RunMode.UPLOAD: "group-by-upload",
113
+ RunMode.UPLOAD_TO_KV: "group-by-upload-bulk-load",
114
+ RunMode.BACKFILL: "group-by-backfill",
115
+ RunMode.STREAMING: "group-by-streaming",
116
+ RunMode.METADATA_UPLOAD: "metadata-upload",
117
+ RunMode.LOCAL_STREAMING: "group-by-streaming",
118
+ RunMode.FETCH: "fetch",
119
+ RunMode.ANALYZE: "analyze",
120
+ RunMode.METADATA_EXPORT: "metadata-export",
121
+ RunMode.STREAMING_CLIENT: "group-by-streaming",
122
+ },
123
+ "joins": {
124
+ RunMode.BACKFILL: "join",
125
+ RunMode.BACKFILL_LEFT: "join-left",
126
+ RunMode.BACKFILL_FINAL: "join-final",
127
+ RunMode.METADATA_UPLOAD: "metadata-upload",
128
+ RunMode.FETCH: "fetch",
129
+ RunMode.CONSISTENCY_METRICS_COMPUTE: "consistency-metrics-compute",
130
+ RunMode.BUILD_COMPARISON_TABLE: "build-comparison-table",
131
+ RunMode.COMPARE: "compare-join-query",
132
+ RunMode.STATS_SUMMARY: "stats-summary",
133
+ RunMode.LOG_SUMMARY: "log-summary",
134
+ RunMode.ANALYZE: "analyze",
135
+ RunMode.LOG_FLATTENER: "log-flattener",
136
+ RunMode.METADATA_EXPORT: "metadata-export",
137
+ RunMode.LABEL_JOIN: "label-join",
138
+ RunMode.SOURCE_JOB: "source-job",
139
+ RunMode.JOIN_PART_JOB: "join-part-job",
140
+ RunMode.MERGE_JOB: "merge-job",
141
+ },
142
+ "staging_queries": {
143
+ RunMode.BACKFILL: "staging-query-backfill",
144
+ RunMode.METADATA_EXPORT: "metadata-export",
145
+ },
146
+ }
147
+
148
+ UNIVERSAL_ROUTES = ["info"]
149
+
150
+ APP_NAME_TEMPLATE = "chronon_{conf_type}_{mode}_{context}_{name}"
151
+ RENDER_INFO_DEFAULT_SCRIPT = "scripts/render_info.py"
152
+
153
+ ZIPLINE_DIRECTORY = "/tmp/zipline"
154
+
155
+ CLOUD_PROVIDER_KEYWORD = "CLOUD_PROVIDER"
156
+
157
+ # cloud provider
158
+ AWS = "AWS"
159
+ GCP = "GCP"
160
+
161
+ # arg keywords
162
+ ONLINE_CLASS_ARG = "online_class"
163
+ ONLINE_JAR_ARG = "online_jar"
164
+ ONLINE_ARGS = "online_args"
@@ -0,0 +1,291 @@
1
+ import json
2
+ import logging
3
+ import multiprocessing
4
+ import os
5
+
6
+ from ai.chronon.repo import utils
7
+ from ai.chronon.repo.constants import (
8
+ MODE_ARGS,
9
+ ONLINE_ARGS,
10
+ ONLINE_CLASS_ARG,
11
+ ONLINE_JAR_ARG,
12
+ ONLINE_MODES,
13
+ ROUTES,
14
+ SPARK_MODES,
15
+ UNIVERSAL_ROUTES,
16
+ RunMode,
17
+ )
18
+
19
+
20
+ class Runner:
21
+ def __init__(self, args, jar_path):
22
+ self.repo = args["repo"]
23
+ self.conf = args["conf"]
24
+ self.local_abs_conf_path = os.path.realpath(os.path.join(self.repo, self.conf))
25
+ self.sub_help = args["sub_help"]
26
+ self.mode = args["mode"]
27
+ self.online_jar = args.get(ONLINE_JAR_ARG)
28
+ self.online_class = args.get(ONLINE_CLASS_ARG)
29
+ self.online_args = args.get(ONLINE_ARGS)
30
+
31
+ self.conf_type = (args.get("conf_type") or "").replace(
32
+ "-", "_"
33
+ ) # in case user sets dash instead of underscore
34
+
35
+ # streaming flink
36
+ self.conf_metadata_name = utils.get_metadata_name_from_conf(self.repo, self.conf)
37
+ self.kafka_bootstrap = args.get("kafka_bootstrap")
38
+ self.latest_savepoint = args.get("latest_savepoint")
39
+ self.custom_savepoint = args.get("custom_savepoint")
40
+ self.no_savepoint = args.get("no_savepoint")
41
+ self.version_check = args.get("version_check")
42
+ self.additional_jars = args.get("additional_jars")
43
+
44
+ flink_state_uri = args.get("flink_state_uri")
45
+ if flink_state_uri:
46
+ self.streaming_manifest_path = os.path.join(flink_state_uri, "manifests")
47
+ self.streaming_checkpoint_path = os.path.join(flink_state_uri, "checkpoints")
48
+
49
+ self.mock_source = args.get("mock_source")
50
+
51
+ self.validate = args.get("validate")
52
+ self.validate_rows = args.get("validate_rows")
53
+ self.enable_debug = args.get("enable_debug")
54
+ self.uploader = args.get("uploader")
55
+
56
+ valid_jar = args["online_jar"] and os.path.exists(args["online_jar"])
57
+
58
+ # fetch online jar if necessary
59
+ if (
60
+ (self.mode in ONLINE_MODES)
61
+ and (not args["sub_help"])
62
+ and not valid_jar
63
+ and (args.get("online_jar_fetch"))
64
+ ):
65
+ print("Downloading online_jar")
66
+ self.online_jar = utils.check_output(
67
+ "{}".format(args["online_jar_fetch"])
68
+ ).decode("utf-8")
69
+ os.environ["CHRONON_ONLINE_JAR"] = self.online_jar
70
+ print("Downloaded jar to {}".format(self.online_jar))
71
+
72
+ if (self.conf
73
+ and (self.mode != "metastore")): # TODO: don't check for metastore
74
+ try:
75
+ self.context, self.conf_type, self.team, _ = self.conf.split("/")[-4:]
76
+ except Exception as e:
77
+ logging.error(
78
+ "Invalid conf path: {}, please ensure to supply the relative path to zipline/ folder".format(
79
+ self.conf
80
+ )
81
+ )
82
+ raise e
83
+ possible_modes = list(ROUTES[self.conf_type].keys()) + UNIVERSAL_ROUTES
84
+ assert (
85
+ args["mode"] in possible_modes
86
+ ), "Invalid mode:{} for conf:{} of type:{}, please choose from {}".format(
87
+ args["mode"], self.conf, self.conf_type, possible_modes
88
+ )
89
+
90
+ self.ds = args["end_ds"] if "end_ds" in args and args["end_ds"] else args["ds"]
91
+ self.start_ds = (
92
+ args["start_ds"] if "start_ds" in args and args["start_ds"] else None
93
+ )
94
+ self.parallelism = (
95
+ int(args["parallelism"])
96
+ if "parallelism" in args and args["parallelism"]
97
+ else 1
98
+ )
99
+ self.jar_path = jar_path
100
+
101
+ self.args = args["args"] if args["args"] else ""
102
+ self.app_name = args["app_name"]
103
+ if self.mode == "streaming":
104
+ self.spark_submit = args["spark_streaming_submit_path"]
105
+ elif self.mode == "info":
106
+ assert os.path.exists(
107
+ args["render_info"]
108
+ ), "Invalid path for the render info script: {}".format(args["render_info"])
109
+ self.render_info = args["render_info"]
110
+ else:
111
+ self.spark_submit = args["spark_submit_path"]
112
+ self.list_apps_cmd = args["list_apps"]
113
+
114
+ self.disable_cloud_logging = args.get("disable_cloud_logging")
115
+
116
+
117
+ def run_spark_streaming(self):
118
+ # streaming mode
119
+ self.app_name = self.app_name.replace(
120
+ "_streaming-client_", "_streaming_"
121
+ ) # If the job is running cluster mode we want to kill it.
122
+ print(
123
+ "Checking to see if a streaming job by the name {} already exists".format(
124
+ self.app_name
125
+ )
126
+ )
127
+ running_apps = (
128
+ utils.check_output("{}".format(self.list_apps_cmd))
129
+ .decode("utf-8")
130
+ .split("\n")
131
+ )
132
+ running_app_map = {}
133
+ for app in running_apps:
134
+ try:
135
+ app_json = json.loads(app.strip())
136
+ app_name = app_json["app_name"].strip()
137
+ if app_name not in running_app_map:
138
+ running_app_map[app_name] = []
139
+ running_app_map[app_name].append(app_json)
140
+ except Exception as ex:
141
+ print("failed to process line into app: " + app)
142
+ print(ex)
143
+
144
+ filtered_apps = running_app_map.get(self.app_name, [])
145
+ if len(filtered_apps) > 0:
146
+ print(
147
+ "Found running apps by the name {} in \n{}\n".format(
148
+ self.app_name,
149
+ "\n".join([str(app) for app in filtered_apps]),
150
+ )
151
+ )
152
+ if self.mode == "streaming":
153
+ assert (
154
+ len(filtered_apps) == 1
155
+ ), "More than one found, please kill them all"
156
+ print("All good. No need to start a new app.")
157
+ return
158
+ elif self.mode == "streaming-client":
159
+ raise RuntimeError(
160
+ "Attempting to submit an application in client mode, but there's already"
161
+ " an existing one running."
162
+ )
163
+ command = (
164
+ "bash {script} --class ai.chronon.spark.Driver {jar} {subcommand} {args} {additional_args}"
165
+ ).format(
166
+ script=self.spark_submit,
167
+ jar=self.jar_path,
168
+ subcommand=ROUTES[self.conf_type][self.mode],
169
+ args=self._gen_final_args(),
170
+ additional_args=os.environ.get("CHRONON_CONFIG_ADDITIONAL_ARGS", ""),
171
+ )
172
+ return command
173
+
174
+ def run(self):
175
+ command_list = []
176
+ if self.mode == "info":
177
+ command_list.append(
178
+ "python3 {script} --conf {conf} --ds {ds} --repo {repo}".format(
179
+ script=self.render_info, conf=self.conf, ds=self.ds, repo=self.repo
180
+ )
181
+ )
182
+ elif self.sub_help or (self.mode not in SPARK_MODES):
183
+ if self.mode == "fetch":
184
+ entrypoint = "ai.chronon.online.fetcher.FetcherMain"
185
+ else:
186
+ entrypoint = "ai.chronon.spark.Driver"
187
+ command_list.append(
188
+ "java -cp {jar} {entrypoint} {subcommand} {args}".format(
189
+ jar=self.jar_path,
190
+ entrypoint=entrypoint,
191
+ args="--help" if self.sub_help else self._gen_final_args(),
192
+ subcommand=ROUTES[self.conf_type][self.mode],
193
+ )
194
+ )
195
+ else:
196
+ if self.mode in ["streaming", "streaming-client"]:
197
+ # streaming mode
198
+ command = self.run_spark_streaming()
199
+ command_list.append(command)
200
+ else:
201
+ if self.parallelism > 1:
202
+ assert self.start_ds is not None and self.ds is not None, (
203
+ "To use parallelism, please specify --start-ds and --end-ds to "
204
+ "break down into multiple backfill jobs"
205
+ )
206
+ date_ranges = utils.split_date_range(
207
+ self.start_ds, self.ds, self.parallelism
208
+ )
209
+ for start_ds, end_ds in date_ranges:
210
+ command = (
211
+ "bash {script} --class ai.chronon.spark.Driver "
212
+ + "{jar} {subcommand} {args} {additional_args}"
213
+ ).format(
214
+ script=self.spark_submit,
215
+ jar=self.jar_path,
216
+ subcommand=ROUTES[self.conf_type][self.mode],
217
+ args=self._gen_final_args(start_ds=start_ds, end_ds=end_ds),
218
+ additional_args=os.environ.get(
219
+ "CHRONON_CONFIG_ADDITIONAL_ARGS", ""
220
+ ),
221
+ )
222
+ command_list.append(command)
223
+ else:
224
+ command = (
225
+ "bash {script} --class ai.chronon.spark.Driver "
226
+ + "{jar} {subcommand} {args} {additional_args}"
227
+ ).format(
228
+ script=self.spark_submit,
229
+ jar=self.jar_path,
230
+ subcommand=ROUTES[self.conf_type][self.mode],
231
+ args=self._gen_final_args(self.start_ds),
232
+ additional_args=os.environ.get(
233
+ "CHRONON_CONFIG_ADDITIONAL_ARGS", ""
234
+ ),
235
+ )
236
+ command_list.append(command)
237
+
238
+ if len(command_list) > 1:
239
+ # parallel backfill mode
240
+ with multiprocessing.Pool(processes=int(self.parallelism)) as pool:
241
+ logging.info(
242
+ "Running args list {} with pool size {}".format(
243
+ command_list, self.parallelism
244
+ )
245
+ )
246
+ pool.map(utils.check_call, command_list)
247
+ elif len(command_list) == 1:
248
+ utils.check_call(command_list[0])
249
+
250
+ def _gen_final_args(
251
+ self, start_ds=None, end_ds=None, override_conf_path=None, **kwargs
252
+ ):
253
+ base_args = MODE_ARGS.get(self.mode).format(
254
+ conf_path=override_conf_path if override_conf_path else self.conf,
255
+ ds=end_ds if end_ds else self.ds,
256
+ online_jar=self.online_jar,
257
+ online_class=self.online_class,
258
+ )
259
+
260
+ submitter_args = []
261
+
262
+ if self.conf_type:
263
+ submitter_args.append(f"--conf-type={self.conf_type}")
264
+
265
+ if self.uploader:
266
+ submitter_args.append(f"--uploader={self.uploader}")
267
+
268
+ if self.additional_jars:
269
+ submitter_args.append(f"--additional-jars={self.additional_jars}")
270
+
271
+ if self.mode != RunMode.FETCH:
272
+ submitter_args.append(" --local-conf-path={conf}".format(
273
+ conf=self.local_abs_conf_path
274
+ ))
275
+ submitter_args.append(" --original-mode={mode}".format(mode=self.mode))
276
+
277
+ override_start_partition_arg = (
278
+ "--start-partition-override=" + start_ds if start_ds else ""
279
+ )
280
+
281
+ additional_args = " ".join(
282
+ f"--{key.replace('_', '-')}={value}"
283
+ for key, value in kwargs.items()
284
+ if value
285
+ )
286
+
287
+ final_args = " ".join(
288
+ [base_args, str(self.args), override_start_partition_arg, ' '.join(submitter_args), additional_args]
289
+ )
290
+
291
+ return final_args