google-cloud-dataproc 0.10.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -2
  3. data/AUTHENTICATION.md +51 -59
  4. data/LICENSE.md +203 -0
  5. data/MIGRATING.md +310 -0
  6. data/README.md +33 -44
  7. data/lib/{google/cloud/dataproc/v1/doc/google/protobuf/empty.rb → google-cloud-dataproc.rb} +4 -14
  8. data/lib/google/cloud/dataproc.rb +167 -305
  9. data/lib/google/cloud/dataproc/version.rb +6 -2
  10. metadata +97 -100
  11. data/LICENSE +0 -201
  12. data/lib/google/cloud/dataproc/v1.rb +0 -371
  13. data/lib/google/cloud/dataproc/v1/autoscaling_policies_pb.rb +0 -80
  14. data/lib/google/cloud/dataproc/v1/autoscaling_policies_services_pb.rb +0 -59
  15. data/lib/google/cloud/dataproc/v1/autoscaling_policy_service_client.rb +0 -494
  16. data/lib/google/cloud/dataproc/v1/autoscaling_policy_service_client_config.json +0 -51
  17. data/lib/google/cloud/dataproc/v1/cluster_controller_client.rb +0 -824
  18. data/lib/google/cloud/dataproc/v1/cluster_controller_client_config.json +0 -59
  19. data/lib/google/cloud/dataproc/v1/clusters_pb.rb +0 -234
  20. data/lib/google/cloud/dataproc/v1/clusters_services_pb.rb +0 -69
  21. data/lib/google/cloud/dataproc/v1/credentials.rb +0 -41
  22. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/autoscaling_policies.rb +0 -238
  23. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/clusters.rb +0 -819
  24. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/jobs.rb +0 -759
  25. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/workflow_templates.rb +0 -566
  26. data/lib/google/cloud/dataproc/v1/doc/google/longrunning/operations.rb +0 -51
  27. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/any.rb +0 -131
  28. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/duration.rb +0 -91
  29. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/field_mask.rb +0 -222
  30. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/timestamp.rb +0 -113
  31. data/lib/google/cloud/dataproc/v1/doc/google/rpc/status.rb +0 -39
  32. data/lib/google/cloud/dataproc/v1/job_controller_client.rb +0 -592
  33. data/lib/google/cloud/dataproc/v1/job_controller_client_config.json +0 -59
  34. data/lib/google/cloud/dataproc/v1/jobs_pb.rb +0 -273
  35. data/lib/google/cloud/dataproc/v1/jobs_services_pb.rb +0 -61
  36. data/lib/google/cloud/dataproc/v1/operations_pb.rb +0 -45
  37. data/lib/google/cloud/dataproc/v1/shared_pb.rb +0 -26
  38. data/lib/google/cloud/dataproc/v1/workflow_template_service_client.rb +0 -770
  39. data/lib/google/cloud/dataproc/v1/workflow_template_service_client_config.json +0 -64
  40. data/lib/google/cloud/dataproc/v1/workflow_templates_pb.rb +0 -184
  41. data/lib/google/cloud/dataproc/v1/workflow_templates_services_pb.rb +0 -105
  42. data/lib/google/cloud/dataproc/v1beta2.rb +0 -371
  43. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policies_pb.rb +0 -80
  44. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policies_services_pb.rb +0 -59
  45. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policy_service_client.rb +0 -494
  46. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policy_service_client_config.json +0 -51
  47. data/lib/google/cloud/dataproc/v1beta2/cluster_controller_client.rb +0 -833
  48. data/lib/google/cloud/dataproc/v1beta2/cluster_controller_client_config.json +0 -59
  49. data/lib/google/cloud/dataproc/v1beta2/clusters_pb.rb +0 -241
  50. data/lib/google/cloud/dataproc/v1beta2/clusters_services_pb.rb +0 -69
  51. data/lib/google/cloud/dataproc/v1beta2/credentials.rb +0 -41
  52. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/autoscaling_policies.rb +0 -238
  53. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/clusters.rb +0 -841
  54. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/jobs.rb +0 -728
  55. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/workflow_templates.rb +0 -579
  56. data/lib/google/cloud/dataproc/v1beta2/doc/google/longrunning/operations.rb +0 -51
  57. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/any.rb +0 -131
  58. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/duration.rb +0 -91
  59. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/empty.rb +0 -29
  60. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/field_mask.rb +0 -222
  61. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/timestamp.rb +0 -113
  62. data/lib/google/cloud/dataproc/v1beta2/doc/google/rpc/status.rb +0 -39
  63. data/lib/google/cloud/dataproc/v1beta2/job_controller_client.rb +0 -592
  64. data/lib/google/cloud/dataproc/v1beta2/job_controller_client_config.json +0 -59
  65. data/lib/google/cloud/dataproc/v1beta2/jobs_pb.rb +0 -261
  66. data/lib/google/cloud/dataproc/v1beta2/jobs_services_pb.rb +0 -61
  67. data/lib/google/cloud/dataproc/v1beta2/operations_pb.rb +0 -44
  68. data/lib/google/cloud/dataproc/v1beta2/shared_pb.rb +0 -30
  69. data/lib/google/cloud/dataproc/v1beta2/workflow_template_service_client.rb +0 -778
  70. data/lib/google/cloud/dataproc/v1beta2/workflow_template_service_client_config.json +0 -64
  71. data/lib/google/cloud/dataproc/v1beta2/workflow_templates_pb.rb +0 -186
  72. data/lib/google/cloud/dataproc/v1beta2/workflow_templates_services_pb.rb +0 -105
@@ -1,759 +0,0 @@
1
- # Copyright 2020 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- module Google
17
- module Cloud
18
- module Dataproc
19
- module V1
20
- # The runtime logging config of the job.
21
- # @!attribute [rw] driver_log_levels
22
- # @return [Hash{String => Google::Cloud::Dataproc::V1::LoggingConfig::Level}]
23
- # The per-package log levels for the driver. This may include
24
- # "root" package name to configure rootLogger.
25
- # Examples:
26
- # 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
27
- class LoggingConfig
28
- # The Log4j level for job execution. When running an
29
- # [Apache Hive](http://hive.apache.org/) job, Cloud
30
- # Dataproc configures the Hive client to an equivalent verbosity level.
31
- module Level
32
- # Level is unspecified. Use default level for log4j.
33
- LEVEL_UNSPECIFIED = 0
34
-
35
- # Use ALL level for log4j.
36
- ALL = 1
37
-
38
- # Use TRACE level for log4j.
39
- TRACE = 2
40
-
41
- # Use DEBUG level for log4j.
42
- DEBUG = 3
43
-
44
- # Use INFO level for log4j.
45
- INFO = 4
46
-
47
- # Use WARN level for log4j.
48
- WARN = 5
49
-
50
- # Use ERROR level for log4j.
51
- ERROR = 6
52
-
53
- # Use FATAL level for log4j.
54
- FATAL = 7
55
-
56
- # Turn off log4j.
57
- OFF = 8
58
- end
59
- end
60
-
61
- # A Dataproc job for running
62
- # [Apache Hadoop
63
- # MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
64
- # jobs on [Apache Hadoop
65
- # YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
66
- # @!attribute [rw] main_jar_file_uri
67
- # @return [String]
68
- # The HCFS URI of the jar file containing the main class.
69
- # Examples:
70
- # 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
71
- # 'hdfs:/tmp/test-samples/custom-wordcount.jar'
72
- # 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
73
- # @!attribute [rw] main_class
74
- # @return [String]
75
- # The name of the driver's main class. The jar file containing the class
76
- # must be in the default CLASSPATH or specified in `jar_file_uris`.
77
- # @!attribute [rw] args
78
- # @return [Array<String>]
79
- # Optional. The arguments to pass to the driver. Do not
80
- # include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as
81
- # job properties, since a collision may occur that causes an incorrect job
82
- # submission.
83
- # @!attribute [rw] jar_file_uris
84
- # @return [Array<String>]
85
- # Optional. Jar file URIs to add to the CLASSPATHs of the
86
- # Hadoop driver and tasks.
87
- # @!attribute [rw] file_uris
88
- # @return [Array<String>]
89
- # Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
90
- # to the working directory of Hadoop drivers and distributed tasks. Useful
91
- # for naively parallel tasks.
92
- # @!attribute [rw] archive_uris
93
- # @return [Array<String>]
94
- # Optional. HCFS URIs of archives to be extracted in the working directory of
95
- # Hadoop drivers and tasks. Supported file types:
96
- # .jar, .tar, .tar.gz, .tgz, or .zip.
97
- # @!attribute [rw] properties
98
- # @return [Hash{String => String}]
99
- # Optional. A mapping of property names to values, used to configure Hadoop.
100
- # Properties that conflict with values set by the Dataproc API may be
101
- # overwritten. Can include properties set in /etc/hadoop/conf/*-site and
102
- # classes in user code.
103
- # @!attribute [rw] logging_config
104
- # @return [Google::Cloud::Dataproc::V1::LoggingConfig]
105
- # Optional. The runtime log config for job execution.
106
- class HadoopJob; end
107
-
108
- # A Dataproc job for running [Apache Spark](http://spark.apache.org/)
109
- # applications on YARN.
110
- # @!attribute [rw] main_jar_file_uri
111
- # @return [String]
112
- # The HCFS URI of the jar file that contains the main class.
113
- # @!attribute [rw] main_class
114
- # @return [String]
115
- # The name of the driver's main class. The jar file that contains the class
116
- # must be in the default CLASSPATH or specified in `jar_file_uris`.
117
- # @!attribute [rw] args
118
- # @return [Array<String>]
119
- # Optional. The arguments to pass to the driver. Do not include arguments,
120
- # such as `--conf`, that can be set as job properties, since a collision may
121
- # occur that causes an incorrect job submission.
122
- # @!attribute [rw] jar_file_uris
123
- # @return [Array<String>]
124
- # Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
125
- # Spark driver and tasks.
126
- # @!attribute [rw] file_uris
127
- # @return [Array<String>]
128
- # Optional. HCFS URIs of files to be copied to the working directory of
129
- # Spark drivers and distributed tasks. Useful for naively parallel tasks.
130
- # @!attribute [rw] archive_uris
131
- # @return [Array<String>]
132
- # Optional. HCFS URIs of archives to be extracted in the working directory
133
- # of Spark drivers and tasks. Supported file types:
134
- # .jar, .tar, .tar.gz, .tgz, and .zip.
135
- # @!attribute [rw] properties
136
- # @return [Hash{String => String}]
137
- # Optional. A mapping of property names to values, used to configure Spark.
138
- # Properties that conflict with values set by the Dataproc API may be
139
- # overwritten. Can include properties set in
140
- # /etc/spark/conf/spark-defaults.conf and classes in user code.
141
- # @!attribute [rw] logging_config
142
- # @return [Google::Cloud::Dataproc::V1::LoggingConfig]
143
- # Optional. The runtime log config for job execution.
144
- class SparkJob; end
145
-
146
- # A Dataproc job for running
147
- # [Apache
148
- # PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
149
- # applications on YARN.
150
- # @!attribute [rw] main_python_file_uri
151
- # @return [String]
152
- # Required. The HCFS URI of the main Python file to use as the driver. Must
153
- # be a .py file.
154
- # @!attribute [rw] args
155
- # @return [Array<String>]
156
- # Optional. The arguments to pass to the driver. Do not include arguments,
157
- # such as `--conf`, that can be set as job properties, since a collision may
158
- # occur that causes an incorrect job submission.
159
- # @!attribute [rw] python_file_uris
160
- # @return [Array<String>]
161
- # Optional. HCFS file URIs of Python files to pass to the PySpark
162
- # framework. Supported file types: .py, .egg, and .zip.
163
- # @!attribute [rw] jar_file_uris
164
- # @return [Array<String>]
165
- # Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
166
- # Python driver and tasks.
167
- # @!attribute [rw] file_uris
168
- # @return [Array<String>]
169
- # Optional. HCFS URIs of files to be copied to the working directory of
170
- # Python drivers and distributed tasks. Useful for naively parallel tasks.
171
- # @!attribute [rw] archive_uris
172
- # @return [Array<String>]
173
- # Optional. HCFS URIs of archives to be extracted in the working directory of
174
- # .jar, .tar, .tar.gz, .tgz, and .zip.
175
- # @!attribute [rw] properties
176
- # @return [Hash{String => String}]
177
- # Optional. A mapping of property names to values, used to configure PySpark.
178
- # Properties that conflict with values set by the Dataproc API may be
179
- # overwritten. Can include properties set in
180
- # /etc/spark/conf/spark-defaults.conf and classes in user code.
181
- # @!attribute [rw] logging_config
182
- # @return [Google::Cloud::Dataproc::V1::LoggingConfig]
183
- # Optional. The runtime log config for job execution.
184
- class PySparkJob; end
185
-
186
- # A list of queries to run on a cluster.
187
- # @!attribute [rw] queries
188
- # @return [Array<String>]
189
- # Required. The queries to execute. You do not need to terminate a query
190
- # with a semicolon. Multiple queries can be specified in one string
191
- # by separating each with a semicolon. Here is an example of an Cloud
192
- # Dataproc API snippet that uses a QueryList to specify a HiveJob:
193
- #
194
- # "hiveJob": {
195
- # "queryList": {
196
- # "queries": [
197
- # "query1",
198
- # "query2",
199
- # "query3;query4",
200
- # ]
201
- # }
202
- # }
203
- class QueryList; end
204
-
205
- # A Dataproc job for running [Apache Hive](https://hive.apache.org/)
206
- # queries on YARN.
207
- # @!attribute [rw] query_file_uri
208
- # @return [String]
209
- # The HCFS URI of the script that contains Hive queries.
210
- # @!attribute [rw] query_list
211
- # @return [Google::Cloud::Dataproc::V1::QueryList]
212
- # A list of queries.
213
- # @!attribute [rw] continue_on_failure
214
- # @return [true, false]
215
- # Optional. Whether to continue executing queries if a query fails.
216
- # The default value is `false`. Setting to `true` can be useful when
217
- # executing independent parallel queries.
218
- # @!attribute [rw] script_variables
219
- # @return [Hash{String => String}]
220
- # Optional. Mapping of query variable names to values (equivalent to the
221
- # Hive command: `SET name="value";`).
222
- # @!attribute [rw] properties
223
- # @return [Hash{String => String}]
224
- # Optional. A mapping of property names and values, used to configure Hive.
225
- # Properties that conflict with values set by the Dataproc API may be
226
- # overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
227
- # /etc/hive/conf/hive-site.xml, and classes in user code.
228
- # @!attribute [rw] jar_file_uris
229
- # @return [Array<String>]
230
- # Optional. HCFS URIs of jar files to add to the CLASSPATH of the
231
- # Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
232
- # and UDFs.
233
- class HiveJob; end
234
-
235
- # A Dataproc job for running [Apache Spark
236
- # SQL](http://spark.apache.org/sql/) queries.
237
- # @!attribute [rw] query_file_uri
238
- # @return [String]
239
- # The HCFS URI of the script that contains SQL queries.
240
- # @!attribute [rw] query_list
241
- # @return [Google::Cloud::Dataproc::V1::QueryList]
242
- # A list of queries.
243
- # @!attribute [rw] script_variables
244
- # @return [Hash{String => String}]
245
- # Optional. Mapping of query variable names to values (equivalent to the
246
- # Spark SQL command: SET `name="value";`).
247
- # @!attribute [rw] properties
248
- # @return [Hash{String => String}]
249
- # Optional. A mapping of property names to values, used to configure
250
- # Spark SQL's SparkConf. Properties that conflict with values set by the
251
- # Dataproc API may be overwritten.
252
- # @!attribute [rw] jar_file_uris
253
- # @return [Array<String>]
254
- # Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
255
- # @!attribute [rw] logging_config
256
- # @return [Google::Cloud::Dataproc::V1::LoggingConfig]
257
- # Optional. The runtime log config for job execution.
258
- class SparkSqlJob; end
259
-
260
- # A Dataproc job for running [Apache Pig](https://pig.apache.org/)
261
- # queries on YARN.
262
- # @!attribute [rw] query_file_uri
263
- # @return [String]
264
- # The HCFS URI of the script that contains the Pig queries.
265
- # @!attribute [rw] query_list
266
- # @return [Google::Cloud::Dataproc::V1::QueryList]
267
- # A list of queries.
268
- # @!attribute [rw] continue_on_failure
269
- # @return [true, false]
270
- # Optional. Whether to continue executing queries if a query fails.
271
- # The default value is `false`. Setting to `true` can be useful when
272
- # executing independent parallel queries.
273
- # @!attribute [rw] script_variables
274
- # @return [Hash{String => String}]
275
- # Optional. Mapping of query variable names to values (equivalent to the Pig
276
- # command: `name=[value]`).
277
- # @!attribute [rw] properties
278
- # @return [Hash{String => String}]
279
- # Optional. A mapping of property names to values, used to configure Pig.
280
- # Properties that conflict with values set by the Dataproc API may be
281
- # overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
282
- # /etc/pig/conf/pig.properties, and classes in user code.
283
- # @!attribute [rw] jar_file_uris
284
- # @return [Array<String>]
285
- # Optional. HCFS URIs of jar files to add to the CLASSPATH of
286
- # the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
287
- # @!attribute [rw] logging_config
288
- # @return [Google::Cloud::Dataproc::V1::LoggingConfig]
289
- # Optional. The runtime log config for job execution.
290
- class PigJob; end
291
-
292
- # A Dataproc job for running
293
- # [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
294
- # applications on YARN.
295
- # @!attribute [rw] main_r_file_uri
296
- # @return [String]
297
- # Required. The HCFS URI of the main R file to use as the driver.
298
- # Must be a .R file.
299
- # @!attribute [rw] args
300
- # @return [Array<String>]
301
- # Optional. The arguments to pass to the driver. Do not include arguments,
302
- # such as `--conf`, that can be set as job properties, since a collision may
303
- # occur that causes an incorrect job submission.
304
- # @!attribute [rw] file_uris
305
- # @return [Array<String>]
306
- # Optional. HCFS URIs of files to be copied to the working directory of
307
- # R drivers and distributed tasks. Useful for naively parallel tasks.
308
- # @!attribute [rw] archive_uris
309
- # @return [Array<String>]
310
- # Optional. HCFS URIs of archives to be extracted in the working directory of
311
- # Spark drivers and tasks. Supported file types:
312
- # .jar, .tar, .tar.gz, .tgz, and .zip.
313
- # @!attribute [rw] properties
314
- # @return [Hash{String => String}]
315
- # Optional. A mapping of property names to values, used to configure SparkR.
316
- # Properties that conflict with values set by the Dataproc API may be
317
- # overwritten. Can include properties set in
318
- # /etc/spark/conf/spark-defaults.conf and classes in user code.
319
- # @!attribute [rw] logging_config
320
- # @return [Google::Cloud::Dataproc::V1::LoggingConfig]
321
- # Optional. The runtime log config for job execution.
322
- class SparkRJob; end
323
-
324
- # A Dataproc job for running [Presto](https://prestosql.io/) queries
325
- # @!attribute [rw] query_file_uri
326
- # @return [String]
327
- # The HCFS URI of the script that contains SQL queries.
328
- # @!attribute [rw] query_list
329
- # @return [Google::Cloud::Dataproc::V1::QueryList]
330
- # A list of queries.
331
- # @!attribute [rw] continue_on_failure
332
- # @return [true, false]
333
- # Optional. Whether to continue executing queries if a query fails.
334
- # The default value is `false`. Setting to `true` can be useful when
335
- # executing independent parallel queries.
336
- # @!attribute [rw] output_format
337
- # @return [String]
338
- # Optional. The format in which query output will be displayed. See the
339
- # Presto documentation for supported output formats
340
- # @!attribute [rw] client_tags
341
- # @return [Array<String>]
342
- # Optional. Presto client tags to attach to this query
343
- # @!attribute [rw] properties
344
- # @return [Hash{String => String}]
345
- # Optional. A mapping of property names to values. Used to set Presto
346
- # [session properties](https://prestodb.io/docs/current/sql/set-session.html)
347
- # Equivalent to using the --session flag in the Presto CLI
348
- # @!attribute [rw] logging_config
349
- # @return [Google::Cloud::Dataproc::V1::LoggingConfig]
350
- # Optional. The runtime log config for job execution.
351
- class PrestoJob; end
352
-
353
- # Dataproc job config.
354
- # @!attribute [rw] cluster_name
355
- # @return [String]
356
- # Required. The name of the cluster where the job will be submitted.
357
- # @!attribute [rw] cluster_uuid
358
- # @return [String]
359
- # Output only. A cluster UUID generated by the Dataproc service when
360
- # the job is submitted.
361
- class JobPlacement; end
362
-
363
- # Dataproc job status.
364
- # @!attribute [rw] state
365
- # @return [Google::Cloud::Dataproc::V1::JobStatus::State]
366
- # Output only. A state message specifying the overall job state.
367
- # @!attribute [rw] details
368
- # @return [String]
369
- # Optional. Output only. Job state details, such as an error
370
- # description if the state is <code>ERROR</code>.
371
- # @!attribute [rw] state_start_time
372
- # @return [Google::Protobuf::Timestamp]
373
- # Output only. The time when this state was entered.
374
- # @!attribute [rw] substate
375
- # @return [Google::Cloud::Dataproc::V1::JobStatus::Substate]
376
- # Output only. Additional state information, which includes
377
- # status reported by the agent.
378
- class JobStatus
379
- # The job state.
380
- module State
381
- # The job state is unknown.
382
- STATE_UNSPECIFIED = 0
383
-
384
- # The job is pending; it has been submitted, but is not yet running.
385
- PENDING = 1
386
-
387
- # Job has been received by the service and completed initial setup;
388
- # it will soon be submitted to the cluster.
389
- SETUP_DONE = 8
390
-
391
- # The job is running on the cluster.
392
- RUNNING = 2
393
-
394
- # A CancelJob request has been received, but is pending.
395
- CANCEL_PENDING = 3
396
-
397
- # Transient in-flight resources have been canceled, and the request to
398
- # cancel the running job has been issued to the cluster.
399
- CANCEL_STARTED = 7
400
-
401
- # The job cancellation was successful.
402
- CANCELLED = 4
403
-
404
- # The job has completed successfully.
405
- DONE = 5
406
-
407
- # The job has completed, but encountered an error.
408
- ERROR = 6
409
-
410
- # Job attempt has failed. The detail field contains failure details for
411
- # this attempt.
412
- #
413
- # Applies to restartable jobs only.
414
- ATTEMPT_FAILURE = 9
415
- end
416
-
417
- # The job substate.
418
- module Substate
419
- # The job substate is unknown.
420
- UNSPECIFIED = 0
421
-
422
- # The Job is submitted to the agent.
423
- #
424
- # Applies to RUNNING state.
425
- SUBMITTED = 1
426
-
427
- # The Job has been received and is awaiting execution (it may be waiting
428
- # for a condition to be met). See the "details" field for the reason for
429
- # the delay.
430
- #
431
- # Applies to RUNNING state.
432
- QUEUED = 2
433
-
434
- # The agent-reported status is out of date, which may be caused by a
435
- # loss of communication between the agent and Dataproc. If the
436
- # agent does not send a timely update, the job will fail.
437
- #
438
- # Applies to RUNNING state.
439
- STALE_STATUS = 3
440
- end
441
- end
442
-
443
- # Encapsulates the full scoping used to reference a job.
444
- # @!attribute [rw] project_id
445
- # @return [String]
446
- # Required. The ID of the Google Cloud Platform project that the job
447
- # belongs to.
448
- # @!attribute [rw] job_id
449
- # @return [String]
450
- # Optional. The job ID, which must be unique within the project.
451
- #
452
- # The ID must contain only letters (a-z, A-Z), numbers (0-9),
453
- # underscores (_), or hyphens (-). The maximum length is 100 characters.
454
- #
455
- # If not specified by the caller, the job ID will be provided by the server.
456
- class JobReference; end
457
-
458
- # A YARN application created by a job. Application information is a subset of
459
- # <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
460
- #
461
- # **Beta Feature**: This report is available for testing purposes only. It may
462
- # be changed before final release.
463
- # @!attribute [rw] name
464
- # @return [String]
465
- # Required. The application name.
466
- # @!attribute [rw] state
467
- # @return [Google::Cloud::Dataproc::V1::YarnApplication::State]
468
- # Required. The application state.
469
- # @!attribute [rw] progress
470
- # @return [Float]
471
- # Required. The numerical progress of the application, from 1 to 100.
472
- # @!attribute [rw] tracking_url
473
- # @return [String]
474
- # Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or
475
- # TimelineServer that provides application-specific information. The URL uses
476
- # the internal hostname, and requires a proxy server for resolution and,
477
- # possibly, access.
478
- class YarnApplication
479
- # The application state, corresponding to
480
- # <code>YarnProtos.YarnApplicationStateProto</code>.
481
- module State
482
- # Status is unspecified.
483
- STATE_UNSPECIFIED = 0
484
-
485
- # Status is NEW.
486
- NEW = 1
487
-
488
- # Status is NEW_SAVING.
489
- NEW_SAVING = 2
490
-
491
- # Status is SUBMITTED.
492
- SUBMITTED = 3
493
-
494
- # Status is ACCEPTED.
495
- ACCEPTED = 4
496
-
497
- # Status is RUNNING.
498
- RUNNING = 5
499
-
500
- # Status is FINISHED.
501
- FINISHED = 6
502
-
503
- # Status is FAILED.
504
- FAILED = 7
505
-
506
- # Status is KILLED.
507
- KILLED = 8
508
- end
509
- end
510
-
511
- # A Dataproc job resource.
512
- # @!attribute [rw] reference
513
- # @return [Google::Cloud::Dataproc::V1::JobReference]
514
- # Optional. The fully qualified reference to the job, which can be used to
515
- # obtain the equivalent REST path of the job resource. If this property
516
- # is not specified when a job is created, the server generates a
517
- # <code>job_id</code>.
518
- # @!attribute [rw] placement
519
- # @return [Google::Cloud::Dataproc::V1::JobPlacement]
520
- # Required. Job information, including how, when, and where to
521
- # run the job.
522
- # @!attribute [rw] hadoop_job
523
- # @return [Google::Cloud::Dataproc::V1::HadoopJob]
524
- # Optional. Job is a Hadoop job.
525
- # @!attribute [rw] spark_job
526
- # @return [Google::Cloud::Dataproc::V1::SparkJob]
527
- # Optional. Job is a Spark job.
528
- # @!attribute [rw] pyspark_job
529
- # @return [Google::Cloud::Dataproc::V1::PySparkJob]
530
- # Optional. Job is a PySpark job.
531
- # @!attribute [rw] hive_job
532
- # @return [Google::Cloud::Dataproc::V1::HiveJob]
533
- # Optional. Job is a Hive job.
534
- # @!attribute [rw] pig_job
535
- # @return [Google::Cloud::Dataproc::V1::PigJob]
536
- # Optional. Job is a Pig job.
537
- # @!attribute [rw] spark_r_job
538
- # @return [Google::Cloud::Dataproc::V1::SparkRJob]
539
- # Optional. Job is a SparkR job.
540
- # @!attribute [rw] spark_sql_job
541
- # @return [Google::Cloud::Dataproc::V1::SparkSqlJob]
542
- # Optional. Job is a SparkSql job.
543
- # @!attribute [rw] presto_job
544
- # @return [Google::Cloud::Dataproc::V1::PrestoJob]
545
- # Optional. Job is a Presto job.
546
- # @!attribute [rw] status
547
- # @return [Google::Cloud::Dataproc::V1::JobStatus]
548
- # Output only. The job status. Additional application-specific
549
- # status information may be contained in the <code>type_job</code>
550
- # and <code>yarn_applications</code> fields.
551
- # @!attribute [rw] status_history
552
- # @return [Array<Google::Cloud::Dataproc::V1::JobStatus>]
553
- # Output only. The previous job status.
554
- # @!attribute [rw] yarn_applications
555
- # @return [Array<Google::Cloud::Dataproc::V1::YarnApplication>]
556
- # Output only. The collection of YARN applications spun up by this job.
557
- #
558
- # **Beta** Feature: This report is available for testing purposes only. It
559
- # may be changed before final release.
560
- # @!attribute [rw] driver_output_resource_uri
561
- # @return [String]
562
- # Output only. A URI pointing to the location of the stdout of the job's
563
- # driver program.
564
- # @!attribute [rw] driver_control_files_uri
565
- # @return [String]
566
- # Output only. If present, the location of miscellaneous control files
567
- # which may be used as part of job setup and handling. If not present,
568
- # control files may be placed in the same location as `driver_output_uri`.
569
- # @!attribute [rw] labels
570
- # @return [Hash{String => String}]
571
- # Optional. The labels to associate with this job.
572
- # Label **keys** must contain 1 to 63 characters, and must conform to
573
- # [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
574
- # Label **values** may be empty, but, if present, must contain 1 to 63
575
- # characters, and must conform to [RFC
576
- # 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
577
- # associated with a job.
578
- # @!attribute [rw] scheduling
579
- # @return [Google::Cloud::Dataproc::V1::JobScheduling]
580
- # Optional. Job scheduling configuration.
581
- # @!attribute [rw] job_uuid
582
- # @return [String]
583
- # Output only. A UUID that uniquely identifies a job within the project
584
- # over time. This is in contrast to a user-settable reference.job_id that
585
- # may be reused over time.
586
- class Job; end
587
-
588
- # Job scheduling options.
589
- # @!attribute [rw] max_failures_per_hour
590
- # @return [Integer]
591
- # Optional. Maximum number of times per hour a driver may be restarted as
592
- # a result of driver terminating with non-zero code before job is
593
- # reported failed.
594
- #
595
- # A job may be reported as thrashing if driver exits with non-zero code
596
- # 4 times within 10 minute window.
597
- #
598
- # Maximum value is 10.
599
- class JobScheduling; end
600
-
601
- # A request to submit a job.
602
- # @!attribute [rw] project_id
603
- # @return [String]
604
- # Required. The ID of the Google Cloud Platform project that the job
605
- # belongs to.
606
- # @!attribute [rw] region
607
- # @return [String]
608
- # Required. The Dataproc region in which to handle the request.
609
- # @!attribute [rw] job
610
- # @return [Google::Cloud::Dataproc::V1::Job]
611
- # Required. The job resource.
612
- # @!attribute [rw] request_id
613
- # @return [String]
614
- # Optional. A unique id used to identify the request. If the server
615
- # receives two {Google::Cloud::Dataproc::V1::SubmitJobRequest SubmitJobRequest} requests with the same
616
- # id, then the second request will be ignored and the
617
- # first {Google::Cloud::Dataproc::V1::Job Job} created and stored in the backend
618
- # is returned.
619
- #
620
- # It is recommended to always set this value to a
621
- # [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
622
- #
623
- # The id must contain only letters (a-z, A-Z), numbers (0-9),
624
- # underscores (_), and hyphens (-). The maximum length is 40 characters.
625
- class SubmitJobRequest; end
626
-
627
- # A request to get the resource representation for a job in a project.
628
- # @!attribute [rw] project_id
629
- # @return [String]
630
- # Required. The ID of the Google Cloud Platform project that the job
631
- # belongs to.
632
- # @!attribute [rw] region
633
- # @return [String]
634
- # Required. The Dataproc region in which to handle the request.
635
- # @!attribute [rw] job_id
636
- # @return [String]
637
- # Required. The job ID.
638
- class GetJobRequest; end
639
-
640
- # A request to list jobs in a project.
641
- # @!attribute [rw] project_id
642
- # @return [String]
643
- # Required. The ID of the Google Cloud Platform project that the job
644
- # belongs to.
645
- # @!attribute [rw] region
646
- # @return [String]
647
- # Required. The Dataproc region in which to handle the request.
648
- # @!attribute [rw] page_size
649
- # @return [Integer]
650
- # Optional. The number of results to return in each response.
651
- # @!attribute [rw] page_token
652
- # @return [String]
653
- # Optional. The page token, returned by a previous call, to request the
654
- # next page of results.
655
- # @!attribute [rw] cluster_name
656
- # @return [String]
657
- # Optional. If set, the returned jobs list includes only jobs that were
658
- # submitted to the named cluster.
659
- # @!attribute [rw] job_state_matcher
660
- # @return [Google::Cloud::Dataproc::V1::ListJobsRequest::JobStateMatcher]
661
- # Optional. Specifies enumerated categories of jobs to list.
662
- # (default = match ALL jobs).
663
- #
664
- # If `filter` is provided, `jobStateMatcher` will be ignored.
665
- # @!attribute [rw] filter
666
- # @return [String]
667
- # Optional. A filter constraining the jobs to list. Filters are
668
- # case-sensitive and have the following syntax:
669
- #
670
- # [field = value] AND [field [= value]] ...
671
- #
672
- # where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
673
- # key. **value** can be `*` to match all values.
674
- # `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
675
- # Only the logical `AND` operator is supported; space-separated items are
676
- # treated as having an implicit `AND` operator.
677
- #
678
- # Example filter:
679
- #
680
- # status.state = ACTIVE AND labels.env = staging AND labels.starred = *
681
- class ListJobsRequest
682
- # A matcher that specifies categories of job states.
683
- module JobStateMatcher
684
- # Match all jobs, regardless of state.
685
- ALL = 0
686
-
687
- # Only match jobs in non-terminal states: PENDING, RUNNING, or
688
- # CANCEL_PENDING.
689
- ACTIVE = 1
690
-
691
- # Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
692
- NON_ACTIVE = 2
693
- end
694
- end
695
-
696
- # A request to update a job.
697
- # @!attribute [rw] project_id
698
- # @return [String]
699
- # Required. The ID of the Google Cloud Platform project that the job
700
- # belongs to.
701
- # @!attribute [rw] region
702
- # @return [String]
703
- # Required. The Dataproc region in which to handle the request.
704
- # @!attribute [rw] job_id
705
- # @return [String]
706
- # Required. The job ID.
707
- # @!attribute [rw] job
708
- # @return [Google::Cloud::Dataproc::V1::Job]
709
- # Required. The changes to the job.
710
- # @!attribute [rw] update_mask
711
- # @return [Google::Protobuf::FieldMask]
712
- # Required. Specifies the path, relative to <code>Job</code>, of
713
- # the field to update. For example, to update the labels of a Job the
714
- # <code>update_mask</code> parameter would be specified as
715
- # <code>labels</code>, and the `PATCH` request body would specify the new
716
- # value. <strong>Note:</strong> Currently, <code>labels</code> is the only
717
- # field that can be updated.
718
- class UpdateJobRequest; end
719
-
720
- # A list of jobs in a project.
721
- # @!attribute [rw] jobs
722
- # @return [Array<Google::Cloud::Dataproc::V1::Job>]
723
- # Output only. Jobs list.
724
- # @!attribute [rw] next_page_token
725
- # @return [String]
726
- # Optional. This token is included in the response if there are more results
727
- # to fetch. To fetch additional results, provide this value as the
728
- # `page_token` in a subsequent <code>ListJobsRequest</code>.
729
- class ListJobsResponse; end
730
-
731
- # A request to cancel a job.
732
- # @!attribute [rw] project_id
733
- # @return [String]
734
- # Required. The ID of the Google Cloud Platform project that the job
735
- # belongs to.
736
- # @!attribute [rw] region
737
- # @return [String]
738
- # Required. The Dataproc region in which to handle the request.
739
- # @!attribute [rw] job_id
740
- # @return [String]
741
- # Required. The job ID.
742
- class CancelJobRequest; end
743
-
744
- # A request to delete a job.
745
- # @!attribute [rw] project_id
746
- # @return [String]
747
- # Required. The ID of the Google Cloud Platform project that the job
748
- # belongs to.
749
- # @!attribute [rw] region
750
- # @return [String]
751
- # Required. The Dataproc region in which to handle the request.
752
- # @!attribute [rw] job_id
753
- # @return [String]
754
- # Required. The job ID.
755
- class DeleteJobRequest; end
756
- end
757
- end
758
- end
759
- end