google-cloud-dataplex-v1 0.5.1 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2023 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # Auto-generated by gapic-generator-ruby. DO NOT EDIT!
18
+
19
+
20
+ module Google
21
+ module Cloud
22
+ module Dataplex
23
+ module V1
24
+ # DataProfileScan related setting.
25
+ class DataProfileSpec
26
+ include ::Google::Protobuf::MessageExts
27
+ extend ::Google::Protobuf::MessageExts::ClassMethods
28
+ end
29
+
30
+ # DataProfileResult defines the output of DataProfileScan.
31
+ # Each field of the table will have field type specific profile result.
32
+ # @!attribute [rw] row_count
33
+ # @return [::Integer]
34
+ # The count of all rows in the sampled data.
35
+ # Return 0, if zero rows.
36
+ # @!attribute [rw] profile
37
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile]
38
+ # This represents the profile information per field.
39
+ # @!attribute [rw] scanned_data
40
+ # @return [::Google::Cloud::Dataplex::V1::ScannedData]
41
+ # The data scanned for this profile.
42
+ class DataProfileResult
43
+ include ::Google::Protobuf::MessageExts
44
+ extend ::Google::Protobuf::MessageExts::ClassMethods
45
+
46
+ # Profile information describing the structure and layout of the data
47
+ # and contains the profile info.
48
+ # @!attribute [rw] fields
49
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field>]
50
+ # The sequence of fields describing data in table entities.
51
+ class Profile
52
+ include ::Google::Protobuf::MessageExts
53
+ extend ::Google::Protobuf::MessageExts::ClassMethods
54
+
55
+ # Represents a column field within a table schema.
56
+ # @!attribute [rw] name
57
+ # @return [::String]
58
+ # The name of the field.
59
+ # @!attribute [rw] type
60
+ # @return [::String]
61
+ # The field data type. Possible values include:
62
+ #
63
+ # * STRING
64
+ # * BYTE
65
+ # * INT64
66
+ # * INT32
67
+ # * INT16
68
+ # * DOUBLE
69
+ # * FLOAT
70
+ # * DECIMAL
71
+ # * BOOLEAN
72
+ # * BINARY
73
+ # * TIMESTAMP
74
+ # * DATE
75
+ # * TIME
76
+ # * NULL
77
+ # * RECORD
78
+ # @!attribute [rw] mode
79
+ # @return [::String]
80
+ # The mode of the field. Its value will be:
81
+ # REQUIRED, if it is a required field.
82
+ # NULLABLE, if it is an optional field.
83
+ # REPEATED, if it is a repeated field.
84
+ # @!attribute [rw] profile
85
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo]
86
+ # The profile information for the corresponding field.
87
+ class Field
88
+ include ::Google::Protobuf::MessageExts
89
+ extend ::Google::Protobuf::MessageExts::ClassMethods
90
+
91
+ # ProfileInfo defines the profile information for each schema field type.
92
+ # @!attribute [rw] null_ratio
93
+ # @return [::Float]
94
+ # The ratio of null rows against the rows in the sampled data.
95
+ # @!attribute [rw] distinct_ratio
96
+ # @return [::Float]
97
+ # The ratio of rows that are distinct against the rows in the sampled
98
+ # data.
99
+ # @!attribute [rw] top_n_values
100
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::TopNValue>]
101
+ # The array of top N values of the field in the sampled data.
102
+ # Currently N is set as 10 or equal to distinct values in the field,
103
+ # whichever is smaller. This will be optional for complex non-groupable
104
+ # data-types such as JSON, ARRAY, JSON, STRUCT.
105
+ # @!attribute [rw] string_profile
106
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::StringFieldInfo]
107
+ # The corresponding string field profile.
108
+ # @!attribute [rw] integer_profile
109
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::IntegerFieldInfo]
110
+ # The corresponding integer field profile.
111
+ # @!attribute [rw] double_profile
112
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::DoubleFieldInfo]
113
+ # The corresponding double field profile.
114
+ class ProfileInfo
115
+ include ::Google::Protobuf::MessageExts
116
+ extend ::Google::Protobuf::MessageExts::ClassMethods
117
+
118
+ # StringFieldInfo defines output info for any string type field.
119
+ # @!attribute [rw] min_length
120
+ # @return [::Integer]
121
+ # The minimum length of the string field in the sampled data.
122
+ # Optional if zero non-null rows.
123
+ # @!attribute [rw] max_length
124
+ # @return [::Integer]
125
+ # The maximum length of a string field in the sampled data.
126
+ # Optional if zero non-null rows.
127
+ # @!attribute [rw] average_length
128
+ # @return [::Float]
129
+ # The average length of a string field in the sampled data.
130
+ # Optional if zero non-null rows.
131
+ class StringFieldInfo
132
+ include ::Google::Protobuf::MessageExts
133
+ extend ::Google::Protobuf::MessageExts::ClassMethods
134
+ end
135
+
136
+ # IntegerFieldInfo defines output for any integer type field.
137
+ # @!attribute [rw] average
138
+ # @return [::Float]
139
+ # The average of non-null values of integer field in the sampled
140
+ # data. Return NaN, if the field has a NaN. Optional if zero non-null
141
+ # rows.
142
+ # @!attribute [rw] standard_deviation
143
+ # @return [::Float]
144
+ # The standard deviation of non-null of integer field in the sampled
145
+ # data. Return NaN, if the field has a NaN. Optional if zero non-null
146
+ # rows.
147
+ # @!attribute [rw] min
148
+ # @return [::Integer]
149
+ # The minimum value of an integer field in the sampled data.
150
+ # Return NaN, if the field has a NaN. Optional if zero non-null
151
+ # rows.
152
+ # @!attribute [rw] quartiles
153
+ # @return [::Array<::Integer>]
154
+ # A quartile divide the number of data points into four parts, or
155
+ # quarters, of more-or-less equal size. Three main quartiles used
156
+ # are: The first quartile (Q1) splits off the lowest 25% of data from
157
+ # the highest 75%. It is also known as the lower or 25th empirical
158
+ # quartile, as 25% of the data is below this point. The second
159
+ # quartile (Q2) is the median of a data set. So, 50% of the data lies
160
+ # below this point. The third quartile (Q3) splits off the highest
161
+ # 25% of data from the lowest 75%. It is known as the upper or 75th
162
+ # empirical quartile, as 75% of the data lies below this point. So,
163
+ # here the quartiles is provided as an ordered list of quartile
164
+ # values, occurring in order Q1, median, Q3.
165
+ # @!attribute [rw] max
166
+ # @return [::Integer]
167
+ # The maximum value of an integer field in the sampled data.
168
+ # Return NaN, if the field has a NaN. Optional if zero non-null
169
+ # rows.
170
+ class IntegerFieldInfo
171
+ include ::Google::Protobuf::MessageExts
172
+ extend ::Google::Protobuf::MessageExts::ClassMethods
173
+ end
174
+
175
+ # DoubleFieldInfo defines output for any double type field.
176
+ # @!attribute [rw] average
177
+ # @return [::Float]
178
+ # The average of non-null values of double field in the sampled data.
179
+ # Return NaN, if the field has a NaN. Optional if zero non-null rows.
180
+ # @!attribute [rw] standard_deviation
181
+ # @return [::Float]
182
+ # The standard deviation of non-null of double field in the sampled
183
+ # data. Return NaN, if the field has a NaN. Optional if zero non-null
184
+ # rows.
185
+ # @!attribute [rw] min
186
+ # @return [::Float]
187
+ # The minimum value of a double field in the sampled data.
188
+ # Return NaN, if the field has a NaN. Optional if zero non-null
189
+ # rows.
190
+ # @!attribute [rw] quartiles
191
+ # @return [::Array<::Float>]
192
+ # A quartile divide the numebr of data points into four parts, or
193
+ # quarters, of more-or-less equal size. Three main quartiles used
194
+ # are: The first quartile (Q1) splits off the lowest 25% of data from
195
+ # the highest 75%. It is also known as the lower or 25th empirical
196
+ # quartile, as 25% of the data is below this point. The second
197
+ # quartile (Q2) is the median of a data set. So, 50% of the data lies
198
+ # below this point. The third quartile (Q3) splits off the highest
199
+ # 25% of data from the lowest 75%. It is known as the upper or 75th
200
+ # empirical quartile, as 75% of the data lies below this point. So,
201
+ # here the quartiles is provided as an ordered list of quartile
202
+ # values, occurring in order Q1, median, Q3.
203
+ # @!attribute [rw] max
204
+ # @return [::Float]
205
+ # The maximum value of a double field in the sampled data.
206
+ # Return NaN, if the field has a NaN. Optional if zero non-null
207
+ # rows.
208
+ class DoubleFieldInfo
209
+ include ::Google::Protobuf::MessageExts
210
+ extend ::Google::Protobuf::MessageExts::ClassMethods
211
+ end
212
+
213
+ # The TopNValue defines the structure of output of top N values of a
214
+ # field.
215
+ # @!attribute [rw] value
216
+ # @return [::String]
217
+ # The value is the string value of the actual value from the field.
218
+ # @!attribute [rw] count
219
+ # @return [::Integer]
220
+ # The frequency count of the corresponding value in the field.
221
+ class TopNValue
222
+ include ::Google::Protobuf::MessageExts
223
+ extend ::Google::Protobuf::MessageExts::ClassMethods
224
+ end
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,274 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2023 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # Auto-generated by gapic-generator-ruby. DO NOT EDIT!
18
+
19
+
20
+ module Google
21
+ module Cloud
22
+ module Dataplex
23
+ module V1
24
+ # DataQualityScan related setting.
25
+ # @!attribute [rw] rules
26
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataQualityRule>]
27
+ # The list of rules to evaluate against a data source. At least one rule is
28
+ # required.
29
+ class DataQualitySpec
30
+ include ::Google::Protobuf::MessageExts
31
+ extend ::Google::Protobuf::MessageExts::ClassMethods
32
+ end
33
+
34
+ # The output of a DataQualityScan.
35
+ # @!attribute [rw] passed
36
+ # @return [::Boolean]
37
+ # Overall data quality result -- `true` if all rules passed.
38
+ # @!attribute [rw] dimensions
39
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataQualityDimensionResult>]
40
+ # A list of results at the dimension-level.
41
+ # @!attribute [rw] rules
42
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataQualityRuleResult>]
43
+ # A list of all the rules in a job, and their results.
44
+ # @!attribute [rw] row_count
45
+ # @return [::Integer]
46
+ # The count of rows processed.
47
+ # @!attribute [rw] scanned_data
48
+ # @return [::Google::Cloud::Dataplex::V1::ScannedData]
49
+ # The data scanned for this result.
50
+ class DataQualityResult
51
+ include ::Google::Protobuf::MessageExts
52
+ extend ::Google::Protobuf::MessageExts::ClassMethods
53
+ end
54
+
55
+ # DataQualityRuleResult provides a more detailed, per-rule level view of the
56
+ # results.
57
+ # @!attribute [rw] rule
58
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule]
59
+ # The rule specified in the DataQualitySpec, as is.
60
+ # @!attribute [rw] passed
61
+ # @return [::Boolean]
62
+ # Whether the rule passed or failed.
63
+ # @!attribute [rw] evaluated_count
64
+ # @return [::Integer]
65
+ # The number of rows a rule was evaluated against.
66
+ # This field is only valid for ColumnMap type rules.
67
+ # Evaluated count can be configured to either
68
+ # (1) include all rows (default) - with null rows automatically failing rule
69
+ # evaluation OR (2) exclude null rows from the evaluated_count, by setting
70
+ # ignore_nulls = true
71
+ # @!attribute [rw] passed_count
72
+ # @return [::Integer]
73
+ # The number of rows which passed a rule evaluation.
74
+ # This field is only valid for ColumnMap type rules.
75
+ # @!attribute [rw] null_count
76
+ # @return [::Integer]
77
+ # The number of rows with null values in the specified column.
78
+ # @!attribute [rw] pass_ratio
79
+ # @return [::Float]
80
+ # The ratio of passed_count / evaluated_count.
81
+ # This field is only valid for ColumnMap type rules.
82
+ # @!attribute [rw] failing_rows_query
83
+ # @return [::String]
84
+ # The query to find rows that did not pass this rule.
85
+ # Only applies to ColumnMap and RowCondition rules.
86
+ class DataQualityRuleResult
87
+ include ::Google::Protobuf::MessageExts
88
+ extend ::Google::Protobuf::MessageExts::ClassMethods
89
+ end
90
+
91
+ # DataQualityDimensionResult provides a more detailed, per-dimension level view
92
+ # of the results.
93
+ # @!attribute [rw] passed
94
+ # @return [::Boolean]
95
+ # Whether the dimension passed or failed.
96
+ class DataQualityDimensionResult
97
+ include ::Google::Protobuf::MessageExts
98
+ extend ::Google::Protobuf::MessageExts::ClassMethods
99
+ end
100
+
101
+ # A rule captures data quality intent about a data source.
102
+ # @!attribute [rw] range_expectation
103
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::RangeExpectation]
104
+ # ColumnMap rule which evaluates whether each column value lies between a
105
+ # specified range.
106
+ # @!attribute [rw] non_null_expectation
107
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::NonNullExpectation]
108
+ # ColumnMap rule which evaluates whether each column value is null.
109
+ # @!attribute [rw] set_expectation
110
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::SetExpectation]
111
+ # ColumnMap rule which evaluates whether each column value is contained by
112
+ # a specified set.
113
+ # @!attribute [rw] regex_expectation
114
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::RegexExpectation]
115
+ # ColumnMap rule which evaluates whether each column value matches a
116
+ # specified regex.
117
+ # @!attribute [rw] uniqueness_expectation
118
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::UniquenessExpectation]
119
+ # ColumnAggregate rule which evaluates whether the column has duplicates.
120
+ # @!attribute [rw] statistic_range_expectation
121
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::StatisticRangeExpectation]
122
+ # ColumnAggregate rule which evaluates whether the column aggregate
123
+ # statistic lies between a specified range.
124
+ # @!attribute [rw] row_condition_expectation
125
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::RowConditionExpectation]
126
+ # Table rule which evaluates whether each row passes the specified
127
+ # condition.
128
+ # @!attribute [rw] table_condition_expectation
129
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::TableConditionExpectation]
130
+ # Table rule which evaluates whether the provided expression is true.
131
+ # @!attribute [rw] column
132
+ # @return [::String]
133
+ # Optional. The unnested column which this rule is evaluated against.
134
+ # @!attribute [rw] ignore_null
135
+ # @return [::Boolean]
136
+ # Optional. Rows with null values will automatically fail a rule, unless
137
+ # ignore_null is true. In that case, such null rows are trivially considered
138
+ # passing. Only applicable to ColumnMap rules.
139
+ # @!attribute [rw] dimension
140
+ # @return [::String]
141
+ # Required. The dimension a rule belongs to. Results are also aggregated at
142
+ # the dimension-level. Supported dimensions are ["COMPLETENESS", "ACCURACY",
143
+ # "CONSISTENCY", "VALIDITY", "UNIQUENESS", "INTEGRITY"]
144
+ # @!attribute [rw] threshold
145
+ # @return [::Float]
146
+ # Optional. The minimum ratio of passing_rows / total_rows required to pass
147
+ # this rule, with a range of [0.0, 1.0]
148
+ #
149
+ # 0 indicates default value (i.e. 1.0)
150
+ class DataQualityRule
151
+ include ::Google::Protobuf::MessageExts
152
+ extend ::Google::Protobuf::MessageExts::ClassMethods
153
+
154
+ # Evaluates whether each column value lies between a specified range.
155
+ # @!attribute [rw] min_value
156
+ # @return [::String]
157
+ # Optional. The minimum column value allowed for a row to pass this
158
+ # validation. At least one of min_value and max_value need to be provided.
159
+ # @!attribute [rw] max_value
160
+ # @return [::String]
161
+ # Optional. The maximum column value allowed for a row to pass this
162
+ # validation. At least one of min_value and max_value need to be provided.
163
+ # @!attribute [rw] strict_min_enabled
164
+ # @return [::Boolean]
165
+ # Optional. Whether each value needs to be strictly greater than ('>') the
166
+ # minimum, or if equality is allowed. Only relevant if a min_value has been
167
+ # defined. Default = false.
168
+ # @!attribute [rw] strict_max_enabled
169
+ # @return [::Boolean]
170
+ # Optional. Whether each value needs to be strictly lesser than ('<') the
171
+ # maximum, or if equality is allowed. Only relevant if a max_value has been
172
+ # defined. Default = false.
173
+ class RangeExpectation
174
+ include ::Google::Protobuf::MessageExts
175
+ extend ::Google::Protobuf::MessageExts::ClassMethods
176
+ end
177
+
178
+ # Evaluates whether each column value is null.
179
+ class NonNullExpectation
180
+ include ::Google::Protobuf::MessageExts
181
+ extend ::Google::Protobuf::MessageExts::ClassMethods
182
+ end
183
+
184
+ # Evaluates whether each column value is contained by a specified set.
185
+ # @!attribute [rw] values
186
+ # @return [::Array<::String>]
187
+ class SetExpectation
188
+ include ::Google::Protobuf::MessageExts
189
+ extend ::Google::Protobuf::MessageExts::ClassMethods
190
+ end
191
+
192
+ # Evaluates whether each column value matches a specified regex.
193
+ # @!attribute [rw] regex
194
+ # @return [::String]
195
+ class RegexExpectation
196
+ include ::Google::Protobuf::MessageExts
197
+ extend ::Google::Protobuf::MessageExts::ClassMethods
198
+ end
199
+
200
+ # Evaluates whether the column has duplicates.
201
+ class UniquenessExpectation
202
+ include ::Google::Protobuf::MessageExts
203
+ extend ::Google::Protobuf::MessageExts::ClassMethods
204
+ end
205
+
206
+ # Evaluates whether the column aggregate statistic lies between a specified
207
+ # range.
208
+ # @!attribute [rw] statistic
209
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::StatisticRangeExpectation::ColumnStatistic]
210
+ # @!attribute [rw] min_value
211
+ # @return [::String]
212
+ # The minimum column statistic value allowed for a row to pass this
213
+ # validation.
214
+ # At least one of min_value and max_value need to be provided.
215
+ # @!attribute [rw] max_value
216
+ # @return [::String]
217
+ # The maximum column statistic value allowed for a row to pass this
218
+ # validation.
219
+ # At least one of min_value and max_value need to be provided.
220
+ # @!attribute [rw] strict_min_enabled
221
+ # @return [::Boolean]
222
+ # Whether column statistic needs to be strictly greater than ('>')
223
+ # the minimum, or if equality is allowed. Only relevant if a min_value has
224
+ # been defined. Default = false.
225
+ # @!attribute [rw] strict_max_enabled
226
+ # @return [::Boolean]
227
+ # Whether column statistic needs to be strictly lesser than ('<') the
228
+ # maximum, or if equality is allowed. Only relevant if a max_value has been
229
+ # defined. Default = false.
230
+ class StatisticRangeExpectation
231
+ include ::Google::Protobuf::MessageExts
232
+ extend ::Google::Protobuf::MessageExts::ClassMethods
233
+
234
+ module ColumnStatistic
235
+ # Unspecified statistic type
236
+ STATISTIC_UNDEFINED = 0
237
+
238
+ # Evaluate the column mean
239
+ MEAN = 1
240
+
241
+ # Evaluate the column min
242
+ MIN = 2
243
+
244
+ # Evaluate the column max
245
+ MAX = 3
246
+ end
247
+ end
248
+
249
+ # Evaluates whether each row passes the specified condition.
250
+ # The SQL expression needs to use BigQuery standard SQL syntax and should
251
+ # produce a boolean per row as the result.
252
+ # Example: col1 >= 0 AND col2 < 10
253
+ # @!attribute [rw] sql_expression
254
+ # @return [::String]
255
+ class RowConditionExpectation
256
+ include ::Google::Protobuf::MessageExts
257
+ extend ::Google::Protobuf::MessageExts::ClassMethods
258
+ end
259
+
260
+ # Evaluates whether the provided expression is true.
261
+ # The SQL expression needs to use BigQuery standard SQL syntax and should
262
+ # produce a scalar boolean result.
263
+ # Example: MIN(col1) >= 0
264
+ # @!attribute [rw] sql_expression
265
+ # @return [::String]
266
+ class TableConditionExpectation
267
+ include ::Google::Protobuf::MessageExts
268
+ extend ::Google::Protobuf::MessageExts::ClassMethods
269
+ end
270
+ end
271
+ end
272
+ end
273
+ end
274
+ end