google-cloud-dataplex-v1 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2023 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # Auto-generated by gapic-generator-ruby. DO NOT EDIT!
18
+
19
+
20
+ module Google
21
+ module Cloud
22
+ module Dataplex
23
+ module V1
24
+ # DataProfileScan related setting.
25
+ class DataProfileSpec
26
+ include ::Google::Protobuf::MessageExts
27
+ extend ::Google::Protobuf::MessageExts::ClassMethods
28
+ end
29
+
30
+ # DataProfileResult defines the output of DataProfileScan.
31
+ # Each field of the table will have field type specific profile result.
32
+ # @!attribute [rw] row_count
33
+ # @return [::Integer]
34
+ # The count of all rows in the sampled data.
35
+ # Return 0, if zero rows.
36
+ # @!attribute [rw] profile
37
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile]
38
+ # This represents the profile information per field.
39
+ # @!attribute [rw] scanned_data
40
+ # @return [::Google::Cloud::Dataplex::V1::ScannedData]
41
+ # The data scanned for this profile.
42
+ class DataProfileResult
43
+ include ::Google::Protobuf::MessageExts
44
+ extend ::Google::Protobuf::MessageExts::ClassMethods
45
+
46
+ # Profile information describing the structure and layout of the data
47
+ # and contains the profile info.
48
+ # @!attribute [rw] fields
49
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field>]
50
+ # The sequence of fields describing data in table entities.
51
+ class Profile
52
+ include ::Google::Protobuf::MessageExts
53
+ extend ::Google::Protobuf::MessageExts::ClassMethods
54
+
55
+ # Represents a column field within a table schema.
56
+ # @!attribute [rw] name
57
+ # @return [::String]
58
+ # The name of the field.
59
+ # @!attribute [rw] type
60
+ # @return [::String]
61
+ # The field data type. Possible values include:
62
+ #
63
+ # * STRING
64
+ # * BYTE
65
+ # * INT64
66
+ # * INT32
67
+ # * INT16
68
+ # * DOUBLE
69
+ # * FLOAT
70
+ # * DECIMAL
71
+ # * BOOLEAN
72
+ # * BINARY
73
+ # * TIMESTAMP
74
+ # * DATE
75
+ # * TIME
76
+ # * NULL
77
+ # * RECORD
78
+ # @!attribute [rw] mode
79
+ # @return [::String]
80
+ # The mode of the field. Its value will be:
81
+ # REQUIRED, if it is a required field.
82
+ # NULLABLE, if it is an optional field.
83
+ # REPEATED, if it is a repeated field.
84
+ # @!attribute [rw] profile
85
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo]
86
+ # The profile information for the corresponding field.
87
+ class Field
88
+ include ::Google::Protobuf::MessageExts
89
+ extend ::Google::Protobuf::MessageExts::ClassMethods
90
+
91
+ # ProfileInfo defines the profile information for each schema field type.
92
+ # @!attribute [rw] null_ratio
93
+ # @return [::Float]
94
+ # The ratio of null rows against the rows in the sampled data.
95
+ # @!attribute [rw] distinct_ratio
96
+ # @return [::Float]
97
+ # The ratio of rows that are distinct against the rows in the sampled
98
+ # data.
99
+ # @!attribute [rw] top_n_values
100
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::TopNValue>]
101
+ # The array of top N values of the field in the sampled data.
102
+ # Currently N is set as 10 or equal to distinct values in the field,
103
+ # whichever is smaller. This will be optional for complex non-groupable
104
+ # data-types such as JSON, ARRAY, JSON, STRUCT.
105
+ # @!attribute [rw] string_profile
106
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::StringFieldInfo]
107
+ # The corresponding string field profile.
108
+ # @!attribute [rw] integer_profile
109
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::IntegerFieldInfo]
110
+ # The corresponding integer field profile.
111
+ # @!attribute [rw] double_profile
112
+ # @return [::Google::Cloud::Dataplex::V1::DataProfileResult::Profile::Field::ProfileInfo::DoubleFieldInfo]
113
+ # The corresponding double field profile.
114
+ class ProfileInfo
115
+ include ::Google::Protobuf::MessageExts
116
+ extend ::Google::Protobuf::MessageExts::ClassMethods
117
+
118
+ # StringFieldInfo defines output info for any string type field.
119
+ # @!attribute [rw] min_length
120
+ # @return [::Integer]
121
+ # The minimum length of the string field in the sampled data.
122
+ # Optional if zero non-null rows.
123
+ # @!attribute [rw] max_length
124
+ # @return [::Integer]
125
+ # The maximum length of a string field in the sampled data.
126
+ # Optional if zero non-null rows.
127
+ # @!attribute [rw] average_length
128
+ # @return [::Float]
129
+ # The average length of a string field in the sampled data.
130
+ # Optional if zero non-null rows.
131
+ class StringFieldInfo
132
+ include ::Google::Protobuf::MessageExts
133
+ extend ::Google::Protobuf::MessageExts::ClassMethods
134
+ end
135
+
136
+ # IntegerFieldInfo defines output for any integer type field.
137
+ # @!attribute [rw] average
138
+ # @return [::Float]
139
+ # The average of non-null values of integer field in the sampled
140
+ # data. Return NaN, if the field has a NaN. Optional if zero non-null
141
+ # rows.
142
+ # @!attribute [rw] standard_deviation
143
+ # @return [::Float]
144
+ # The standard deviation of non-null of integer field in the sampled
145
+ # data. Return NaN, if the field has a NaN. Optional if zero non-null
146
+ # rows.
147
+ # @!attribute [rw] min
148
+ # @return [::Integer]
149
+ # The minimum value of an integer field in the sampled data.
150
+ # Return NaN, if the field has a NaN. Optional if zero non-null
151
+ # rows.
152
+ # @!attribute [rw] quartiles
153
+ # @return [::Array<::Integer>]
154
+ # A quartile divide the number of data points into four parts, or
155
+ # quarters, of more-or-less equal size. Three main quartiles used
156
+ # are: The first quartile (Q1) splits off the lowest 25% of data from
157
+ # the highest 75%. It is also known as the lower or 25th empirical
158
+ # quartile, as 25% of the data is below this point. The second
159
+ # quartile (Q2) is the median of a data set. So, 50% of the data lies
160
+ # below this point. The third quartile (Q3) splits off the highest
161
+ # 25% of data from the lowest 75%. It is known as the upper or 75th
162
+ # empirical quartile, as 75% of the data lies below this point. So,
163
+ # here the quartiles is provided as an ordered list of quartile
164
+ # values, occurring in order Q1, median, Q3.
165
+ # @!attribute [rw] max
166
+ # @return [::Integer]
167
+ # The maximum value of an integer field in the sampled data.
168
+ # Return NaN, if the field has a NaN. Optional if zero non-null
169
+ # rows.
170
+ class IntegerFieldInfo
171
+ include ::Google::Protobuf::MessageExts
172
+ extend ::Google::Protobuf::MessageExts::ClassMethods
173
+ end
174
+
175
+ # DoubleFieldInfo defines output for any double type field.
176
+ # @!attribute [rw] average
177
+ # @return [::Float]
178
+ # The average of non-null values of double field in the sampled data.
179
+ # Return NaN, if the field has a NaN. Optional if zero non-null rows.
180
+ # @!attribute [rw] standard_deviation
181
+ # @return [::Float]
182
+ # The standard deviation of non-null of double field in the sampled
183
+ # data. Return NaN, if the field has a NaN. Optional if zero non-null
184
+ # rows.
185
+ # @!attribute [rw] min
186
+ # @return [::Float]
187
+ # The minimum value of a double field in the sampled data.
188
+ # Return NaN, if the field has a NaN. Optional if zero non-null
189
+ # rows.
190
+ # @!attribute [rw] quartiles
191
+ # @return [::Array<::Float>]
192
+ # A quartile divide the numebr of data points into four parts, or
193
+ # quarters, of more-or-less equal size. Three main quartiles used
194
+ # are: The first quartile (Q1) splits off the lowest 25% of data from
195
+ # the highest 75%. It is also known as the lower or 25th empirical
196
+ # quartile, as 25% of the data is below this point. The second
197
+ # quartile (Q2) is the median of a data set. So, 50% of the data lies
198
+ # below this point. The third quartile (Q3) splits off the highest
199
+ # 25% of data from the lowest 75%. It is known as the upper or 75th
200
+ # empirical quartile, as 75% of the data lies below this point. So,
201
+ # here the quartiles is provided as an ordered list of quartile
202
+ # values, occurring in order Q1, median, Q3.
203
+ # @!attribute [rw] max
204
+ # @return [::Float]
205
+ # The maximum value of a double field in the sampled data.
206
+ # Return NaN, if the field has a NaN. Optional if zero non-null
207
+ # rows.
208
+ class DoubleFieldInfo
209
+ include ::Google::Protobuf::MessageExts
210
+ extend ::Google::Protobuf::MessageExts::ClassMethods
211
+ end
212
+
213
+ # The TopNValue defines the structure of output of top N values of a
214
+ # field.
215
+ # @!attribute [rw] value
216
+ # @return [::String]
217
+ # The value is the string value of the actual value from the field.
218
+ # @!attribute [rw] count
219
+ # @return [::Integer]
220
+ # The frequency count of the corresponding value in the field.
221
+ class TopNValue
222
+ include ::Google::Protobuf::MessageExts
223
+ extend ::Google::Protobuf::MessageExts::ClassMethods
224
+ end
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,274 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Copyright 2023 Google LLC
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # https://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # Auto-generated by gapic-generator-ruby. DO NOT EDIT!
18
+
19
+
20
+ module Google
21
+ module Cloud
22
+ module Dataplex
23
+ module V1
24
+ # DataQualityScan related setting.
25
+ # @!attribute [rw] rules
26
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataQualityRule>]
27
+ # The list of rules to evaluate against a data source. At least one rule is
28
+ # required.
29
+ class DataQualitySpec
30
+ include ::Google::Protobuf::MessageExts
31
+ extend ::Google::Protobuf::MessageExts::ClassMethods
32
+ end
33
+
34
+ # The output of a DataQualityScan.
35
+ # @!attribute [rw] passed
36
+ # @return [::Boolean]
37
+ # Overall data quality result -- `true` if all rules passed.
38
+ # @!attribute [rw] dimensions
39
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataQualityDimensionResult>]
40
+ # A list of results at the dimension-level.
41
+ # @!attribute [rw] rules
42
+ # @return [::Array<::Google::Cloud::Dataplex::V1::DataQualityRuleResult>]
43
+ # A list of all the rules in a job, and their results.
44
+ # @!attribute [rw] row_count
45
+ # @return [::Integer]
46
+ # The count of rows processed.
47
+ # @!attribute [rw] scanned_data
48
+ # @return [::Google::Cloud::Dataplex::V1::ScannedData]
49
+ # The data scanned for this result.
50
+ class DataQualityResult
51
+ include ::Google::Protobuf::MessageExts
52
+ extend ::Google::Protobuf::MessageExts::ClassMethods
53
+ end
54
+
55
+ # DataQualityRuleResult provides a more detailed, per-rule level view of the
56
+ # results.
57
+ # @!attribute [rw] rule
58
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule]
59
+ # The rule specified in the DataQualitySpec, as is.
60
+ # @!attribute [rw] passed
61
+ # @return [::Boolean]
62
+ # Whether the rule passed or failed.
63
+ # @!attribute [rw] evaluated_count
64
+ # @return [::Integer]
65
+ # The number of rows a rule was evaluated against.
66
+ # This field is only valid for ColumnMap type rules.
67
+ # Evaluated count can be configured to either
68
+ # (1) include all rows (default) - with null rows automatically failing rule
69
+ # evaluation OR (2) exclude null rows from the evaluated_count, by setting
70
+ # ignore_nulls = true
71
+ # @!attribute [rw] passed_count
72
+ # @return [::Integer]
73
+ # The number of rows which passed a rule evaluation.
74
+ # This field is only valid for ColumnMap type rules.
75
+ # @!attribute [rw] null_count
76
+ # @return [::Integer]
77
+ # The number of rows with null values in the specified column.
78
+ # @!attribute [rw] pass_ratio
79
+ # @return [::Float]
80
+ # The ratio of passed_count / evaluated_count.
81
+ # This field is only valid for ColumnMap type rules.
82
+ # @!attribute [rw] failing_rows_query
83
+ # @return [::String]
84
+ # The query to find rows that did not pass this rule.
85
+ # Only applies to ColumnMap and RowCondition rules.
86
+ class DataQualityRuleResult
87
+ include ::Google::Protobuf::MessageExts
88
+ extend ::Google::Protobuf::MessageExts::ClassMethods
89
+ end
90
+
91
+ # DataQualityDimensionResult provides a more detailed, per-dimension level view
92
+ # of the results.
93
+ # @!attribute [rw] passed
94
+ # @return [::Boolean]
95
+ # Whether the dimension passed or failed.
96
+ class DataQualityDimensionResult
97
+ include ::Google::Protobuf::MessageExts
98
+ extend ::Google::Protobuf::MessageExts::ClassMethods
99
+ end
100
+
101
+ # A rule captures data quality intent about a data source.
102
+ # @!attribute [rw] range_expectation
103
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::RangeExpectation]
104
+ # ColumnMap rule which evaluates whether each column value lies between a
105
+ # specified range.
106
+ # @!attribute [rw] non_null_expectation
107
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::NonNullExpectation]
108
+ # ColumnMap rule which evaluates whether each column value is null.
109
+ # @!attribute [rw] set_expectation
110
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::SetExpectation]
111
+ # ColumnMap rule which evaluates whether each column value is contained by
112
+ # a specified set.
113
+ # @!attribute [rw] regex_expectation
114
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::RegexExpectation]
115
+ # ColumnMap rule which evaluates whether each column value matches a
116
+ # specified regex.
117
+ # @!attribute [rw] uniqueness_expectation
118
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::UniquenessExpectation]
119
+ # ColumnAggregate rule which evaluates whether the column has duplicates.
120
+ # @!attribute [rw] statistic_range_expectation
121
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::StatisticRangeExpectation]
122
+ # ColumnAggregate rule which evaluates whether the column aggregate
123
+ # statistic lies between a specified range.
124
+ # @!attribute [rw] row_condition_expectation
125
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::RowConditionExpectation]
126
+ # Table rule which evaluates whether each row passes the specified
127
+ # condition.
128
+ # @!attribute [rw] table_condition_expectation
129
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::TableConditionExpectation]
130
+ # Table rule which evaluates whether the provided expression is true.
131
+ # @!attribute [rw] column
132
+ # @return [::String]
133
+ # Optional. The unnested column which this rule is evaluated against.
134
+ # @!attribute [rw] ignore_null
135
+ # @return [::Boolean]
136
+ # Optional. Rows with null values will automatically fail a rule, unless
137
+ # ignore_null is true. In that case, such null rows are trivially considered
138
+ # passing. Only applicable to ColumnMap rules.
139
+ # @!attribute [rw] dimension
140
+ # @return [::String]
141
+ # Required. The dimension a rule belongs to. Results are also aggregated at
142
+ # the dimension-level. Supported dimensions are ["COMPLETENESS", "ACCURACY",
143
+ # "CONSISTENCY", "VALIDITY", "UNIQUENESS", "INTEGRITY"]
144
+ # @!attribute [rw] threshold
145
+ # @return [::Float]
146
+ # Optional. The minimum ratio of passing_rows / total_rows required to pass
147
+ # this rule, with a range of [0.0, 1.0]
148
+ #
149
+ # 0 indicates default value (i.e. 1.0)
150
+ class DataQualityRule
151
+ include ::Google::Protobuf::MessageExts
152
+ extend ::Google::Protobuf::MessageExts::ClassMethods
153
+
154
+ # Evaluates whether each column value lies between a specified range.
155
+ # @!attribute [rw] min_value
156
+ # @return [::String]
157
+ # Optional. The minimum column value allowed for a row to pass this
158
+ # validation. At least one of min_value and max_value need to be provided.
159
+ # @!attribute [rw] max_value
160
+ # @return [::String]
161
+ # Optional. The maximum column value allowed for a row to pass this
162
+ # validation. At least one of min_value and max_value need to be provided.
163
+ # @!attribute [rw] strict_min_enabled
164
+ # @return [::Boolean]
165
+ # Optional. Whether each value needs to be strictly greater than ('>') the
166
+ # minimum, or if equality is allowed. Only relevant if a min_value has been
167
+ # defined. Default = false.
168
+ # @!attribute [rw] strict_max_enabled
169
+ # @return [::Boolean]
170
+ # Optional. Whether each value needs to be strictly lesser than ('<') the
171
+ # maximum, or if equality is allowed. Only relevant if a max_value has been
172
+ # defined. Default = false.
173
+ class RangeExpectation
174
+ include ::Google::Protobuf::MessageExts
175
+ extend ::Google::Protobuf::MessageExts::ClassMethods
176
+ end
177
+
178
+ # Evaluates whether each column value is null.
179
+ class NonNullExpectation
180
+ include ::Google::Protobuf::MessageExts
181
+ extend ::Google::Protobuf::MessageExts::ClassMethods
182
+ end
183
+
184
+ # Evaluates whether each column value is contained by a specified set.
185
+ # @!attribute [rw] values
186
+ # @return [::Array<::String>]
187
+ class SetExpectation
188
+ include ::Google::Protobuf::MessageExts
189
+ extend ::Google::Protobuf::MessageExts::ClassMethods
190
+ end
191
+
192
+ # Evaluates whether each column value matches a specified regex.
193
+ # @!attribute [rw] regex
194
+ # @return [::String]
195
+ class RegexExpectation
196
+ include ::Google::Protobuf::MessageExts
197
+ extend ::Google::Protobuf::MessageExts::ClassMethods
198
+ end
199
+
200
+ # Evaluates whether the column has duplicates.
201
+ class UniquenessExpectation
202
+ include ::Google::Protobuf::MessageExts
203
+ extend ::Google::Protobuf::MessageExts::ClassMethods
204
+ end
205
+
206
+ # Evaluates whether the column aggregate statistic lies between a specified
207
+ # range.
208
+ # @!attribute [rw] statistic
209
+ # @return [::Google::Cloud::Dataplex::V1::DataQualityRule::StatisticRangeExpectation::ColumnStatistic]
210
+ # @!attribute [rw] min_value
211
+ # @return [::String]
212
+ # The minimum column statistic value allowed for a row to pass this
213
+ # validation.
214
+ # At least one of min_value and max_value need to be provided.
215
+ # @!attribute [rw] max_value
216
+ # @return [::String]
217
+ # The maximum column statistic value allowed for a row to pass this
218
+ # validation.
219
+ # At least one of min_value and max_value need to be provided.
220
+ # @!attribute [rw] strict_min_enabled
221
+ # @return [::Boolean]
222
+ # Whether column statistic needs to be strictly greater than ('>')
223
+ # the minimum, or if equality is allowed. Only relevant if a min_value has
224
+ # been defined. Default = false.
225
+ # @!attribute [rw] strict_max_enabled
226
+ # @return [::Boolean]
227
+ # Whether column statistic needs to be strictly lesser than ('<') the
228
+ # maximum, or if equality is allowed. Only relevant if a max_value has been
229
+ # defined. Default = false.
230
+ class StatisticRangeExpectation
231
+ include ::Google::Protobuf::MessageExts
232
+ extend ::Google::Protobuf::MessageExts::ClassMethods
233
+
234
+ module ColumnStatistic
235
+ # Unspecified statistic type
236
+ STATISTIC_UNDEFINED = 0
237
+
238
+ # Evaluate the column mean
239
+ MEAN = 1
240
+
241
+ # Evaluate the column min
242
+ MIN = 2
243
+
244
+ # Evaluate the column max
245
+ MAX = 3
246
+ end
247
+ end
248
+
249
+ # Evaluates whether each row passes the specified condition.
250
+ # The SQL expression needs to use BigQuery standard SQL syntax and should
251
+ # produce a boolean per row as the result.
252
+ # Example: col1 >= 0 AND col2 < 10
253
+ # @!attribute [rw] sql_expression
254
+ # @return [::String]
255
+ class RowConditionExpectation
256
+ include ::Google::Protobuf::MessageExts
257
+ extend ::Google::Protobuf::MessageExts::ClassMethods
258
+ end
259
+
260
+ # Evaluates whether the provided expression is true.
261
+ # The SQL expression needs to use BigQuery standard SQL syntax and should
262
+ # produce a scalar boolean result.
263
+ # Example: MIN(col1) >= 0
264
+ # @!attribute [rw] sql_expression
265
+ # @return [::String]
266
+ class TableConditionExpectation
267
+ include ::Google::Protobuf::MessageExts
268
+ extend ::Google::Protobuf::MessageExts::ClassMethods
269
+ end
270
+ end
271
+ end
272
+ end
273
+ end
274
+ end