google-cloud-dataproc 0.10.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -2
  3. data/AUTHENTICATION.md +51 -59
  4. data/LICENSE.md +203 -0
  5. data/MIGRATING.md +310 -0
  6. data/README.md +33 -44
  7. data/lib/{google/cloud/dataproc/v1/doc/google/protobuf/empty.rb → google-cloud-dataproc.rb} +4 -14
  8. data/lib/google/cloud/dataproc.rb +167 -305
  9. data/lib/google/cloud/dataproc/version.rb +6 -2
  10. metadata +97 -100
  11. data/LICENSE +0 -201
  12. data/lib/google/cloud/dataproc/v1.rb +0 -371
  13. data/lib/google/cloud/dataproc/v1/autoscaling_policies_pb.rb +0 -80
  14. data/lib/google/cloud/dataproc/v1/autoscaling_policies_services_pb.rb +0 -59
  15. data/lib/google/cloud/dataproc/v1/autoscaling_policy_service_client.rb +0 -494
  16. data/lib/google/cloud/dataproc/v1/autoscaling_policy_service_client_config.json +0 -51
  17. data/lib/google/cloud/dataproc/v1/cluster_controller_client.rb +0 -824
  18. data/lib/google/cloud/dataproc/v1/cluster_controller_client_config.json +0 -59
  19. data/lib/google/cloud/dataproc/v1/clusters_pb.rb +0 -234
  20. data/lib/google/cloud/dataproc/v1/clusters_services_pb.rb +0 -69
  21. data/lib/google/cloud/dataproc/v1/credentials.rb +0 -41
  22. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/autoscaling_policies.rb +0 -238
  23. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/clusters.rb +0 -819
  24. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/jobs.rb +0 -759
  25. data/lib/google/cloud/dataproc/v1/doc/google/cloud/dataproc/v1/workflow_templates.rb +0 -566
  26. data/lib/google/cloud/dataproc/v1/doc/google/longrunning/operations.rb +0 -51
  27. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/any.rb +0 -131
  28. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/duration.rb +0 -91
  29. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/field_mask.rb +0 -222
  30. data/lib/google/cloud/dataproc/v1/doc/google/protobuf/timestamp.rb +0 -113
  31. data/lib/google/cloud/dataproc/v1/doc/google/rpc/status.rb +0 -39
  32. data/lib/google/cloud/dataproc/v1/job_controller_client.rb +0 -592
  33. data/lib/google/cloud/dataproc/v1/job_controller_client_config.json +0 -59
  34. data/lib/google/cloud/dataproc/v1/jobs_pb.rb +0 -273
  35. data/lib/google/cloud/dataproc/v1/jobs_services_pb.rb +0 -61
  36. data/lib/google/cloud/dataproc/v1/operations_pb.rb +0 -45
  37. data/lib/google/cloud/dataproc/v1/shared_pb.rb +0 -26
  38. data/lib/google/cloud/dataproc/v1/workflow_template_service_client.rb +0 -770
  39. data/lib/google/cloud/dataproc/v1/workflow_template_service_client_config.json +0 -64
  40. data/lib/google/cloud/dataproc/v1/workflow_templates_pb.rb +0 -184
  41. data/lib/google/cloud/dataproc/v1/workflow_templates_services_pb.rb +0 -105
  42. data/lib/google/cloud/dataproc/v1beta2.rb +0 -371
  43. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policies_pb.rb +0 -80
  44. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policies_services_pb.rb +0 -59
  45. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policy_service_client.rb +0 -494
  46. data/lib/google/cloud/dataproc/v1beta2/autoscaling_policy_service_client_config.json +0 -51
  47. data/lib/google/cloud/dataproc/v1beta2/cluster_controller_client.rb +0 -833
  48. data/lib/google/cloud/dataproc/v1beta2/cluster_controller_client_config.json +0 -59
  49. data/lib/google/cloud/dataproc/v1beta2/clusters_pb.rb +0 -241
  50. data/lib/google/cloud/dataproc/v1beta2/clusters_services_pb.rb +0 -69
  51. data/lib/google/cloud/dataproc/v1beta2/credentials.rb +0 -41
  52. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/autoscaling_policies.rb +0 -238
  53. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/clusters.rb +0 -841
  54. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/jobs.rb +0 -728
  55. data/lib/google/cloud/dataproc/v1beta2/doc/google/cloud/dataproc/v1beta2/workflow_templates.rb +0 -579
  56. data/lib/google/cloud/dataproc/v1beta2/doc/google/longrunning/operations.rb +0 -51
  57. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/any.rb +0 -131
  58. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/duration.rb +0 -91
  59. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/empty.rb +0 -29
  60. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/field_mask.rb +0 -222
  61. data/lib/google/cloud/dataproc/v1beta2/doc/google/protobuf/timestamp.rb +0 -113
  62. data/lib/google/cloud/dataproc/v1beta2/doc/google/rpc/status.rb +0 -39
  63. data/lib/google/cloud/dataproc/v1beta2/job_controller_client.rb +0 -592
  64. data/lib/google/cloud/dataproc/v1beta2/job_controller_client_config.json +0 -59
  65. data/lib/google/cloud/dataproc/v1beta2/jobs_pb.rb +0 -261
  66. data/lib/google/cloud/dataproc/v1beta2/jobs_services_pb.rb +0 -61
  67. data/lib/google/cloud/dataproc/v1beta2/operations_pb.rb +0 -44
  68. data/lib/google/cloud/dataproc/v1beta2/shared_pb.rb +0 -30
  69. data/lib/google/cloud/dataproc/v1beta2/workflow_template_service_client.rb +0 -778
  70. data/lib/google/cloud/dataproc/v1beta2/workflow_template_service_client_config.json +0 -64
  71. data/lib/google/cloud/dataproc/v1beta2/workflow_templates_pb.rb +0 -186
  72. data/lib/google/cloud/dataproc/v1beta2/workflow_templates_services_pb.rb +0 -105
@@ -1,728 +0,0 @@
1
- # Copyright 2020 Google LLC
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # https://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
-
16
- module Google
17
- module Cloud
18
- module Dataproc
19
- module V1beta2
20
- # The runtime logging config of the job.
21
- # @!attribute [rw] driver_log_levels
22
- # @return [Hash{String => Google::Cloud::Dataproc::V1beta2::LoggingConfig::Level}]
23
- # The per-package log levels for the driver. This may include
24
- # "root" package name to configure rootLogger.
25
- # Examples:
26
- # 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
27
- class LoggingConfig
28
- # The Log4j level for job execution. When running an
29
- # [Apache Hive](http://hive.apache.org/) job, Cloud
30
- # Dataproc configures the Hive client to an equivalent verbosity level.
31
- module Level
32
- # Level is unspecified. Use default level for log4j.
33
- LEVEL_UNSPECIFIED = 0
34
-
35
- # Use ALL level for log4j.
36
- ALL = 1
37
-
38
- # Use TRACE level for log4j.
39
- TRACE = 2
40
-
41
- # Use DEBUG level for log4j.
42
- DEBUG = 3
43
-
44
- # Use INFO level for log4j.
45
- INFO = 4
46
-
47
- # Use WARN level for log4j.
48
- WARN = 5
49
-
50
- # Use ERROR level for log4j.
51
- ERROR = 6
52
-
53
- # Use FATAL level for log4j.
54
- FATAL = 7
55
-
56
- # Turn off log4j.
57
- OFF = 8
58
- end
59
- end
60
-
61
- # A Dataproc job for running
62
- # [Apache Hadoop
63
- # MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
64
- # jobs on [Apache Hadoop
65
- # YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
66
- # @!attribute [rw] main_jar_file_uri
67
- # @return [String]
68
- # The HCFS URI of the jar file containing the main class.
69
- # Examples:
70
- # 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
71
- # 'hdfs:/tmp/test-samples/custom-wordcount.jar'
72
- # 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
73
- # @!attribute [rw] main_class
74
- # @return [String]
75
- # The name of the driver's main class. The jar file containing the class
76
- # must be in the default CLASSPATH or specified in `jar_file_uris`.
77
- # @!attribute [rw] args
78
- # @return [Array<String>]
79
- # Optional. The arguments to pass to the driver. Do not
80
- # include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as
81
- # job properties, since a collision may occur that causes an incorrect job
82
- # submission.
83
- # @!attribute [rw] jar_file_uris
84
- # @return [Array<String>]
85
- # Optional. Jar file URIs to add to the CLASSPATHs of the
86
- # Hadoop driver and tasks.
87
- # @!attribute [rw] file_uris
88
- # @return [Array<String>]
89
- # Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
90
- # to the working directory of Hadoop drivers and distributed tasks. Useful
91
- # for naively parallel tasks.
92
- # @!attribute [rw] archive_uris
93
- # @return [Array<String>]
94
- # Optional. HCFS URIs of archives to be extracted in the working directory of
95
- # Hadoop drivers and tasks. Supported file types:
96
- # .jar, .tar, .tar.gz, .tgz, or .zip.
97
- # @!attribute [rw] properties
98
- # @return [Hash{String => String}]
99
- # Optional. A mapping of property names to values, used to configure Hadoop.
100
- # Properties that conflict with values set by the Dataproc API may be
101
- # overwritten. Can include properties set in /etc/hadoop/conf/*-site and
102
- # classes in user code.
103
- # @!attribute [rw] logging_config
104
- # @return [Google::Cloud::Dataproc::V1beta2::LoggingConfig]
105
- # Optional. The runtime log config for job execution.
106
- class HadoopJob; end
107
-
108
- # A Dataproc job for running [Apache Spark](http://spark.apache.org/)
109
- # applications on YARN.
110
- # The specification of the main method to call to drive the job.
111
- # Specify either the jar file that contains the main class or the main class
112
- # name. To pass both a main jar and a main class in that jar, add the jar to
113
- # `CommonJob.jar_file_uris`, and then specify the main class name in
114
- # `main_class`.
115
- # @!attribute [rw] main_jar_file_uri
116
- # @return [String]
117
- # The HCFS URI of the jar file that contains the main class.
118
- # @!attribute [rw] main_class
119
- # @return [String]
120
- # The name of the driver's main class. The jar file that contains the class
121
- # must be in the default CLASSPATH or specified in `jar_file_uris`.
122
- # @!attribute [rw] args
123
- # @return [Array<String>]
124
- # Optional. The arguments to pass to the driver. Do not include arguments,
125
- # such as `--conf`, that can be set as job properties, since a collision may
126
- # occur that causes an incorrect job submission.
127
- # @!attribute [rw] jar_file_uris
128
- # @return [Array<String>]
129
- # Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
130
- # Spark driver and tasks.
131
- # @!attribute [rw] file_uris
132
- # @return [Array<String>]
133
- # Optional. HCFS URIs of files to be copied to the working directory of
134
- # Spark drivers and distributed tasks. Useful for naively parallel tasks.
135
- # @!attribute [rw] archive_uris
136
- # @return [Array<String>]
137
- # Optional. HCFS URIs of archives to be extracted in the working directory
138
- # of Spark drivers and tasks. Supported file types:
139
- # .jar, .tar, .tar.gz, .tgz, and .zip.
140
- # @!attribute [rw] properties
141
- # @return [Hash{String => String}]
142
- # Optional. A mapping of property names to values, used to configure Spark.
143
- # Properties that conflict with values set by the Dataproc API may be
144
- # overwritten. Can include properties set in
145
- # /etc/spark/conf/spark-defaults.conf and classes in user code.
146
- # @!attribute [rw] logging_config
147
- # @return [Google::Cloud::Dataproc::V1beta2::LoggingConfig]
148
- # Optional. The runtime log config for job execution.
149
- class SparkJob; end
150
-
151
- # A Dataproc job for running
152
- # [Apache
153
- # PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
154
- # applications on YARN.
155
- # @!attribute [rw] main_python_file_uri
156
- # @return [String]
157
- # Required. The HCFS URI of the main Python file to use as the driver. Must
158
- # be a .py file.
159
- # @!attribute [rw] args
160
- # @return [Array<String>]
161
- # Optional. The arguments to pass to the driver. Do not include arguments,
162
- # such as `--conf`, that can be set as job properties, since a collision may
163
- # occur that causes an incorrect job submission.
164
- # @!attribute [rw] python_file_uris
165
- # @return [Array<String>]
166
- # Optional. HCFS file URIs of Python files to pass to the PySpark
167
- # framework. Supported file types: .py, .egg, and .zip.
168
- # @!attribute [rw] jar_file_uris
169
- # @return [Array<String>]
170
- # Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
171
- # Python driver and tasks.
172
- # @!attribute [rw] file_uris
173
- # @return [Array<String>]
174
- # Optional. HCFS URIs of files to be copied to the working directory of
175
- # Python drivers and distributed tasks. Useful for naively parallel tasks.
176
- # @!attribute [rw] archive_uris
177
- # @return [Array<String>]
178
- # Optional. HCFS URIs of archives to be extracted in the working directory of
179
- # .jar, .tar, .tar.gz, .tgz, and .zip.
180
- # @!attribute [rw] properties
181
- # @return [Hash{String => String}]
182
- # Optional. A mapping of property names to values, used to configure PySpark.
183
- # Properties that conflict with values set by the Dataproc API may be
184
- # overwritten. Can include properties set in
185
- # /etc/spark/conf/spark-defaults.conf and classes in user code.
186
- # @!attribute [rw] logging_config
187
- # @return [Google::Cloud::Dataproc::V1beta2::LoggingConfig]
188
- # Optional. The runtime log config for job execution.
189
- class PySparkJob; end
190
-
191
- # A list of queries to run on a cluster.
192
- # @!attribute [rw] queries
193
- # @return [Array<String>]
194
- # Required. The queries to execute. You do not need to terminate a query
195
- # with a semicolon. Multiple queries can be specified in one string
196
- # by separating each with a semicolon. Here is an example of an Cloud
197
- # Dataproc API snippet that uses a QueryList to specify a HiveJob:
198
- #
199
- # "hiveJob": {
200
- # "queryList": {
201
- # "queries": [
202
- # "query1",
203
- # "query2",
204
- # "query3;query4",
205
- # ]
206
- # }
207
- # }
208
- class QueryList; end
209
-
210
- # A Dataproc job for running [Apache Hive](https://hive.apache.org/)
211
- # queries on YARN.
212
- # @!attribute [rw] query_file_uri
213
- # @return [String]
214
- # The HCFS URI of the script that contains Hive queries.
215
- # @!attribute [rw] query_list
216
- # @return [Google::Cloud::Dataproc::V1beta2::QueryList]
217
- # A list of queries.
218
- # @!attribute [rw] continue_on_failure
219
- # @return [true, false]
220
- # Optional. Whether to continue executing queries if a query fails.
221
- # The default value is `false`. Setting to `true` can be useful when
222
- # executing independent parallel queries.
223
- # @!attribute [rw] script_variables
224
- # @return [Hash{String => String}]
225
- # Optional. Mapping of query variable names to values (equivalent to the
226
- # Hive command: `SET name="value";`).
227
- # @!attribute [rw] properties
228
- # @return [Hash{String => String}]
229
- # Optional. A mapping of property names and values, used to configure Hive.
230
- # Properties that conflict with values set by the Dataproc API may be
231
- # overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
232
- # /etc/hive/conf/hive-site.xml, and classes in user code.
233
- # @!attribute [rw] jar_file_uris
234
- # @return [Array<String>]
235
- # Optional. HCFS URIs of jar files to add to the CLASSPATH of the
236
- # Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
237
- # and UDFs.
238
- class HiveJob; end
239
-
240
- # A Dataproc job for running [Apache Spark
241
- # SQL](http://spark.apache.org/sql/) queries.
242
- # @!attribute [rw] query_file_uri
243
- # @return [String]
244
- # The HCFS URI of the script that contains SQL queries.
245
- # @!attribute [rw] query_list
246
- # @return [Google::Cloud::Dataproc::V1beta2::QueryList]
247
- # A list of queries.
248
- # @!attribute [rw] script_variables
249
- # @return [Hash{String => String}]
250
- # Optional. Mapping of query variable names to values (equivalent to the
251
- # Spark SQL command: SET `name="value";`).
252
- # @!attribute [rw] properties
253
- # @return [Hash{String => String}]
254
- # Optional. A mapping of property names to values, used to configure
255
- # Spark SQL's SparkConf. Properties that conflict with values set by the
256
- # Dataproc API may be overwritten.
257
- # @!attribute [rw] jar_file_uris
258
- # @return [Array<String>]
259
- # Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
260
- # @!attribute [rw] logging_config
261
- # @return [Google::Cloud::Dataproc::V1beta2::LoggingConfig]
262
- # Optional. The runtime log config for job execution.
263
- class SparkSqlJob; end
264
-
265
- # A Dataproc job for running [Apache Pig](https://pig.apache.org/)
266
- # queries on YARN.
267
- # @!attribute [rw] query_file_uri
268
- # @return [String]
269
- # The HCFS URI of the script that contains the Pig queries.
270
- # @!attribute [rw] query_list
271
- # @return [Google::Cloud::Dataproc::V1beta2::QueryList]
272
- # A list of queries.
273
- # @!attribute [rw] continue_on_failure
274
- # @return [true, false]
275
- # Optional. Whether to continue executing queries if a query fails.
276
- # The default value is `false`. Setting to `true` can be useful when
277
- # executing independent parallel queries.
278
- # @!attribute [rw] script_variables
279
- # @return [Hash{String => String}]
280
- # Optional. Mapping of query variable names to values (equivalent to the Pig
281
- # command: `name=[value]`).
282
- # @!attribute [rw] properties
283
- # @return [Hash{String => String}]
284
- # Optional. A mapping of property names to values, used to configure Pig.
285
- # Properties that conflict with values set by the Dataproc API may be
286
- # overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
287
- # /etc/pig/conf/pig.properties, and classes in user code.
288
- # @!attribute [rw] jar_file_uris
289
- # @return [Array<String>]
290
- # Optional. HCFS URIs of jar files to add to the CLASSPATH of
291
- # the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
292
- # @!attribute [rw] logging_config
293
- # @return [Google::Cloud::Dataproc::V1beta2::LoggingConfig]
294
- # Optional. The runtime log config for job execution.
295
- class PigJob; end
296
-
297
- # A Dataproc job for running
298
- # [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
299
- # applications on YARN.
300
- # @!attribute [rw] main_r_file_uri
301
- # @return [String]
302
- # Required. The HCFS URI of the main R file to use as the driver.
303
- # Must be a .R file.
304
- # @!attribute [rw] args
305
- # @return [Array<String>]
306
- # Optional. The arguments to pass to the driver. Do not include arguments,
307
- # such as `--conf`, that can be set as job properties, since a collision may
308
- # occur that causes an incorrect job submission.
309
- # @!attribute [rw] file_uris
310
- # @return [Array<String>]
311
- # Optional. HCFS URIs of files to be copied to the working directory of
312
- # R drivers and distributed tasks. Useful for naively parallel tasks.
313
- # @!attribute [rw] archive_uris
314
- # @return [Array<String>]
315
- # Optional. HCFS URIs of archives to be extracted in the working directory of
316
- # Spark drivers and tasks. Supported file types:
317
- # .jar, .tar, .tar.gz, .tgz, and .zip.
318
- # @!attribute [rw] properties
319
- # @return [Hash{String => String}]
320
- # Optional. A mapping of property names to values, used to configure SparkR.
321
- # Properties that conflict with values set by the Dataproc API may be
322
- # overwritten. Can include properties set in
323
- # /etc/spark/conf/spark-defaults.conf and classes in user code.
324
- # @!attribute [rw] logging_config
325
- # @return [Google::Cloud::Dataproc::V1beta2::LoggingConfig]
326
- # Optional. The runtime log config for job execution.
327
- class SparkRJob; end
328
-
329
- # Dataproc job config.
330
- # @!attribute [rw] cluster_name
331
- # @return [String]
332
- # Required. The name of the cluster where the job will be submitted.
333
- # @!attribute [rw] cluster_uuid
334
- # @return [String]
335
- # Output only. A cluster UUID generated by the Dataproc service when
336
- # the job is submitted.
337
- class JobPlacement; end
338
-
339
- # Dataproc job status.
340
- # @!attribute [rw] state
341
- # @return [Google::Cloud::Dataproc::V1beta2::JobStatus::State]
342
- # Output only. A state message specifying the overall job state.
343
- # @!attribute [rw] details
344
- # @return [String]
345
- # Output only. Optional Job state details, such as an error
346
- # description if the state is <code>ERROR</code>.
347
- # @!attribute [rw] state_start_time
348
- # @return [Google::Protobuf::Timestamp]
349
- # Output only. The time when this state was entered.
350
- # @!attribute [rw] substate
351
- # @return [Google::Cloud::Dataproc::V1beta2::JobStatus::Substate]
352
- # Output only. Additional state information, which includes
353
- # status reported by the agent.
354
- class JobStatus
355
- # The job state.
356
- module State
357
- # The job state is unknown.
358
- STATE_UNSPECIFIED = 0
359
-
360
- # The job is pending; it has been submitted, but is not yet running.
361
- PENDING = 1
362
-
363
- # Job has been received by the service and completed initial setup;
364
- # it will soon be submitted to the cluster.
365
- SETUP_DONE = 8
366
-
367
- # The job is running on the cluster.
368
- RUNNING = 2
369
-
370
- # A CancelJob request has been received, but is pending.
371
- CANCEL_PENDING = 3
372
-
373
- # Transient in-flight resources have been canceled, and the request to
374
- # cancel the running job has been issued to the cluster.
375
- CANCEL_STARTED = 7
376
-
377
- # The job cancellation was successful.
378
- CANCELLED = 4
379
-
380
- # The job has completed successfully.
381
- DONE = 5
382
-
383
- # The job has completed, but encountered an error.
384
- ERROR = 6
385
-
386
- # Job attempt has failed. The detail field contains failure details for
387
- # this attempt.
388
- #
389
- # Applies to restartable jobs only.
390
- ATTEMPT_FAILURE = 9
391
- end
392
-
393
- # The job substate.
394
- module Substate
395
- # The job substate is unknown.
396
- UNSPECIFIED = 0
397
-
398
- # The Job is submitted to the agent.
399
- #
400
- # Applies to RUNNING state.
401
- SUBMITTED = 1
402
-
403
- # The Job has been received and is awaiting execution (it may be waiting
404
- # for a condition to be met). See the "details" field for the reason for
405
- # the delay.
406
- #
407
- # Applies to RUNNING state.
408
- QUEUED = 2
409
-
410
- # The agent-reported status is out of date, which may be caused by a
411
- # loss of communication between the agent and Dataproc. If the
412
- # agent does not send a timely update, the job will fail.
413
- #
414
- # Applies to RUNNING state.
415
- STALE_STATUS = 3
416
- end
417
- end
418
-
419
- # Encapsulates the full scoping used to reference a job.
420
- # @!attribute [rw] project_id
421
- # @return [String]
422
- # Required. The ID of the Google Cloud Platform project that the job
423
- # belongs to.
424
- # @!attribute [rw] job_id
425
- # @return [String]
426
- # Optional. The job ID, which must be unique within the project.
427
- # The ID must contain only letters (a-z, A-Z), numbers (0-9),
428
- # underscores (_), or hyphens (-). The maximum length is 100 characters.
429
- #
430
- # If not specified by the caller, the job ID will be provided by the server.
431
- class JobReference; end
432
-
433
- # A YARN application created by a job. Application information is a subset of
434
- # <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
435
- #
436
- # **Beta Feature**: This report is available for testing purposes only. It may
437
- # be changed before final release.
438
- # @!attribute [rw] name
439
- # @return [String]
440
- # Output only. The application name.
441
- # @!attribute [rw] state
442
- # @return [Google::Cloud::Dataproc::V1beta2::YarnApplication::State]
443
- # Output only. The application state.
444
- # @!attribute [rw] progress
445
- # @return [Float]
446
- # Output only. The numerical progress of the application, from 1 to 100.
447
- # @!attribute [rw] tracking_url
448
- # @return [String]
449
- # Output only. The HTTP URL of the ApplicationMaster, HistoryServer, or
450
- # TimelineServer that provides application-specific information. The URL uses
451
- # the internal hostname, and requires a proxy server for resolution and,
452
- # possibly, access.
453
- class YarnApplication
454
- # The application state, corresponding to
455
- # <code>YarnProtos.YarnApplicationStateProto</code>.
456
- module State
457
- # Status is unspecified.
458
- STATE_UNSPECIFIED = 0
459
-
460
- # Status is NEW.
461
- NEW = 1
462
-
463
- # Status is NEW_SAVING.
464
- NEW_SAVING = 2
465
-
466
- # Status is SUBMITTED.
467
- SUBMITTED = 3
468
-
469
- # Status is ACCEPTED.
470
- ACCEPTED = 4
471
-
472
- # Status is RUNNING.
473
- RUNNING = 5
474
-
475
- # Status is FINISHED.
476
- FINISHED = 6
477
-
478
- # Status is FAILED.
479
- FAILED = 7
480
-
481
- # Status is KILLED.
482
- KILLED = 8
483
- end
484
- end
485
-
486
- # A Dataproc job resource.
487
- # @!attribute [rw] reference
488
- # @return [Google::Cloud::Dataproc::V1beta2::JobReference]
489
- # Optional. The fully qualified reference to the job, which can be used to
490
- # obtain the equivalent REST path of the job resource. If this property
491
- # is not specified when a job is created, the server generates a
492
- # <code>job_id</code>.
493
- # @!attribute [rw] placement
494
- # @return [Google::Cloud::Dataproc::V1beta2::JobPlacement]
495
- # Required. Job information, including how, when, and where to
496
- # run the job.
497
- # @!attribute [rw] hadoop_job
498
- # @return [Google::Cloud::Dataproc::V1beta2::HadoopJob]
499
- # @!attribute [rw] spark_job
500
- # @return [Google::Cloud::Dataproc::V1beta2::SparkJob]
501
- # @!attribute [rw] pyspark_job
502
- # @return [Google::Cloud::Dataproc::V1beta2::PySparkJob]
503
- # @!attribute [rw] hive_job
504
- # @return [Google::Cloud::Dataproc::V1beta2::HiveJob]
505
- # @!attribute [rw] pig_job
506
- # @return [Google::Cloud::Dataproc::V1beta2::PigJob]
507
- # @!attribute [rw] spark_r_job
508
- # @return [Google::Cloud::Dataproc::V1beta2::SparkRJob]
509
- # @!attribute [rw] spark_sql_job
510
- # @return [Google::Cloud::Dataproc::V1beta2::SparkSqlJob]
511
- # @!attribute [rw] status
512
- # @return [Google::Cloud::Dataproc::V1beta2::JobStatus]
513
- # Output only. The job status. Additional application-specific
514
- # status information may be contained in the <code>type_job</code>
515
- # and <code>yarn_applications</code> fields.
516
- # @!attribute [rw] status_history
517
- # @return [Array<Google::Cloud::Dataproc::V1beta2::JobStatus>]
518
- # Output only. The previous job status.
519
- # @!attribute [rw] yarn_applications
520
- # @return [Array<Google::Cloud::Dataproc::V1beta2::YarnApplication>]
521
- # Output only. The collection of YARN applications spun up by this job.
522
- #
523
- # **Beta** Feature: This report is available for testing purposes only. It
524
- # may be changed before final release.
525
- # @!attribute [rw] submitted_by
526
- # @return [String]
527
- # Output only. The email address of the user submitting the job. For jobs
528
- # submitted on the cluster, the address is <code>username@hostname</code>.
529
- # @!attribute [rw] driver_output_resource_uri
530
- # @return [String]
531
- # Output only. A URI pointing to the location of the stdout of the job's
532
- # driver program.
533
- # @!attribute [rw] driver_control_files_uri
534
- # @return [String]
535
- # Output only. If present, the location of miscellaneous control files
536
- # which may be used as part of job setup and handling. If not present,
537
- # control files may be placed in the same location as `driver_output_uri`.
538
- # @!attribute [rw] labels
539
- # @return [Hash{String => String}]
540
- # Optional. The labels to associate with this job.
541
- # Label **keys** must contain 1 to 63 characters, and must conform to
542
- # [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
543
- # Label **values** may be empty, but, if present, must contain 1 to 63
544
- # characters, and must conform to [RFC
545
- # 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
546
- # associated with a job.
547
- # @!attribute [rw] scheduling
548
- # @return [Google::Cloud::Dataproc::V1beta2::JobScheduling]
549
- # Optional. Job scheduling configuration.
550
- # @!attribute [rw] job_uuid
551
- # @return [String]
552
- # Output only. A UUID that uniquely identifies a job within the project
553
- # over time. This is in contrast to a user-settable reference.job_id that
554
- # may be reused over time.
555
- class Job; end
556
-
557
- # Job scheduling options.
558
- # @!attribute [rw] max_failures_per_hour
559
- # @return [Integer]
560
- # Optional. Maximum number of times per hour a driver may be restarted as
561
- # a result of driver terminating with non-zero code before job is
562
- # reported failed.
563
- #
564
- # A job may be reported as thrashing if driver exits with non-zero code
565
- # 4 times within 10 minute window.
566
- #
567
- # Maximum value is 10.
568
- class JobScheduling; end
569
-
570
- # A request to submit a job.
571
- # @!attribute [rw] project_id
572
- # @return [String]
573
- # Required. The ID of the Google Cloud Platform project that the job
574
- # belongs to.
575
- # @!attribute [rw] region
576
- # @return [String]
577
- # Required. The Dataproc region in which to handle the request.
578
- # @!attribute [rw] job
579
- # @return [Google::Cloud::Dataproc::V1beta2::Job]
580
- # Required. The job resource.
581
- # @!attribute [rw] request_id
582
- # @return [String]
583
- # Optional. A unique id used to identify the request. If the server
584
- # receives two {Google::Cloud::Dataproc::V1beta2::SubmitJobRequest SubmitJobRequest} requests with the same
585
- # id, then the second request will be ignored and the
586
- # first {Google::Cloud::Dataproc::V1beta2::Job Job} created and stored in the backend
587
- # is returned.
588
- #
589
- # It is recommended to always set this value to a
590
- # [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
591
- #
592
- # The id must contain only letters (a-z, A-Z), numbers (0-9),
593
- # underscores (_), and hyphens (-). The maximum length is 40 characters.
594
- class SubmitJobRequest; end
595
-
596
- # A request to get the resource representation for a job in a project.
597
- # @!attribute [rw] project_id
598
- # @return [String]
599
- # Required. The ID of the Google Cloud Platform project that the job
600
- # belongs to.
601
- # @!attribute [rw] region
602
- # @return [String]
603
- # Required. The Dataproc region in which to handle the request.
604
- # @!attribute [rw] job_id
605
- # @return [String]
606
- # Required. The job ID.
607
- class GetJobRequest; end
608
-
609
- # A request to list jobs in a project.
610
- # @!attribute [rw] project_id
611
- # @return [String]
612
- # Required. The ID of the Google Cloud Platform project that the job
613
- # belongs to.
614
- # @!attribute [rw] region
615
- # @return [String]
616
- # Required. The Dataproc region in which to handle the request.
617
- # @!attribute [rw] page_size
618
- # @return [Integer]
619
- # Optional. The number of results to return in each response.
620
- # @!attribute [rw] page_token
621
- # @return [String]
622
- # Optional. The page token, returned by a previous call, to request the
623
- # next page of results.
624
- # @!attribute [rw] cluster_name
625
- # @return [String]
626
- # Optional. If set, the returned jobs list includes only jobs that were
627
- # submitted to the named cluster.
628
- # @!attribute [rw] job_state_matcher
629
- # @return [Google::Cloud::Dataproc::V1beta2::ListJobsRequest::JobStateMatcher]
630
- # Optional. Specifies enumerated categories of jobs to list.
631
- # (default = match ALL jobs).
632
- #
633
- # If `filter` is provided, `jobStateMatcher` will be ignored.
634
- # @!attribute [rw] filter
635
- # @return [String]
636
- # Optional. A filter constraining the jobs to list. Filters are
637
- # case-sensitive and have the following syntax:
638
- #
639
- # [field = value] AND [field [= value]] ...
640
- #
641
- # where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
642
- # key. **value** can be `*` to match all values.
643
- # `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
644
- # Only the logical `AND` operator is supported; space-separated items are
645
- # treated as having an implicit `AND` operator.
646
- #
647
- # Example filter:
648
- #
649
- # status.state = ACTIVE AND labels.env = staging AND labels.starred = *
650
- class ListJobsRequest
651
- # A matcher that specifies categories of job states.
652
- module JobStateMatcher
653
- # Match all jobs, regardless of state.
654
- ALL = 0
655
-
656
- # Only match jobs in non-terminal states: PENDING, RUNNING, or
657
- # CANCEL_PENDING.
658
- ACTIVE = 1
659
-
660
- # Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
661
- NON_ACTIVE = 2
662
- end
663
- end
664
-
665
- # A request to update a job.
666
- # @!attribute [rw] project_id
667
- # @return [String]
668
- # Required. The ID of the Google Cloud Platform project that the job
669
- # belongs to.
670
- # @!attribute [rw] region
671
- # @return [String]
672
- # Required. The Dataproc region in which to handle the request.
673
- # @!attribute [rw] job_id
674
- # @return [String]
675
- # Required. The job ID.
676
- # @!attribute [rw] job
677
- # @return [Google::Cloud::Dataproc::V1beta2::Job]
678
- # Required. The changes to the job.
679
- # @!attribute [rw] update_mask
680
- # @return [Google::Protobuf::FieldMask]
681
- # Required. Specifies the path, relative to <code>Job</code>, of
682
- # the field to update. For example, to update the labels of a Job the
683
- # <code>update_mask</code> parameter would be specified as
684
- # <code>labels</code>, and the `PATCH` request body would specify the new
685
- # value. <strong>Note:</strong> Currently, <code>labels</code> is the only
686
- # field that can be updated.
687
- class UpdateJobRequest; end
688
-
689
- # A list of jobs in a project.
690
- # @!attribute [rw] jobs
691
- # @return [Array<Google::Cloud::Dataproc::V1beta2::Job>]
692
- # Output only. Jobs list.
693
- # @!attribute [rw] next_page_token
694
- # @return [String]
695
- # Optional. This token is included in the response if there are more results
696
- # to fetch. To fetch additional results, provide this value as the
697
- # `page_token` in a subsequent <code>ListJobsRequest</code>.
698
- class ListJobsResponse; end
699
-
700
- # A request to cancel a job.
701
- # @!attribute [rw] project_id
702
- # @return [String]
703
- # Required. The ID of the Google Cloud Platform project that the job
704
- # belongs to.
705
- # @!attribute [rw] region
706
- # @return [String]
707
- # Required. The Dataproc region in which to handle the request.
708
- # @!attribute [rw] job_id
709
- # @return [String]
710
- # Required. The job ID.
711
- class CancelJobRequest; end
712
-
713
- # A request to delete a job.
714
- # @!attribute [rw] project_id
715
- # @return [String]
716
- # Required. The ID of the Google Cloud Platform project that the job
717
- # belongs to.
718
- # @!attribute [rw] region
719
- # @return [String]
720
- # Required. The Dataproc region in which to handle the request.
721
- # @!attribute [rw] job_id
722
- # @return [String]
723
- # Required. The job ID.
724
- class DeleteJobRequest; end
725
- end
726
- end
727
- end
728
- end