google-cloud-dlp 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,365 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Google
16
+ module Privacy
17
+ module Dlp
18
+ module V2
19
+ # Type of information detected by the API.
20
+ # @!attribute [rw] name
21
+ # @return [String]
22
+ # Name of the information type.
23
+ class InfoType; end
24
+
25
+ # Custom information type provided by the user. Used to find domain-specific
26
+ # sensitive information configurable to the data in question.
27
+ # @!attribute [rw] info_type
28
+ # @return [Google::Privacy::Dlp::V2::InfoType]
29
+ # Info type configuration. All custom info types must have configurations
30
+ # that do not conflict with built-in info types or other custom info types.
31
+ # @!attribute [rw] likelihood
32
+ # @return [Google::Privacy::Dlp::V2::Likelihood]
33
+ # Likelihood to return for this custom info type. This base value can be
34
+ # altered by a detection rule if the finding meets the criteria specified by
35
+ # the rule. Defaults to +VERY_LIKELY+ if not specified.
36
+ # @!attribute [rw] dictionary
37
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Dictionary]
38
+ # Dictionary-based custom info type.
39
+ # @!attribute [rw] regex
40
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Regex]
41
+ # Regex-based custom info type.
42
+ # @!attribute [rw] surrogate_type
43
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::SurrogateType]
44
+ # Surrogate info type.
45
+ # @!attribute [rw] detection_rules
46
+ # @return [Array<Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule>]
47
+ # Set of detection rules to apply to all findings of this custom info type.
48
+ # Rules are applied in order that they are specified. Not supported for the
49
+ # +surrogate_type+ custom info type.
50
+ class CustomInfoType
51
+ # Custom information type based on a dictionary of words or phrases. This can
52
+ # be used to match sensitive information specific to the data, such as a list
53
+ # of employee IDs or job titles.
54
+ #
55
+ # Dictionary words are case-insensitive and all characters other than letters
56
+ # and digits in the unicode [Basic Multilingual
57
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
58
+ # will be replaced with whitespace when scanning for matches, so the
59
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
60
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
61
+ # surrounding any match must be of a different type than the adjacent
62
+ # characters within the word, so letters must be next to non-letters and
63
+ # digits next to non-digits. For example, the dictionary word "jen" will
64
+ # match the first three letters of the text "jen123" but will return no
65
+ # matches for "jennifer".
66
+ #
67
+ # Dictionary words containing a large number of characters that are not
68
+ # letters or digits may result in unexpected findings because such characters
69
+ # are treated as whitespace.
70
+ # @!attribute [rw] word_list
71
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Dictionary::WordList]
72
+ # List of words or phrases to search for.
73
+ class Dictionary
74
+ # Message defining a list of words or phrases to search for in the data.
75
+ # @!attribute [rw] words
76
+ # @return [Array<String>]
77
+ # Words or phrases defining the dictionary. The dictionary must contain
78
+ # at least one phrase and every phrase must contain at least 2 characters
79
+ # that are letters or digits. [required]
80
+ class WordList; end
81
+ end
82
+
83
+ # Message defining a custom regular expression.
84
+ # @!attribute [rw] pattern
85
+ # @return [String]
86
+ # Pattern defining the regular expression.
87
+ class Regex; end
88
+
89
+ # Message for detecting output from deidentification transformations
90
+ # such as
91
+ # [+CryptoReplaceFfxFpeConfig+](https://cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify#CryptoReplaceFfxFpeConfig).
92
+ # These types of transformations are
93
+ # those that perform pseudonymization, thereby producing a "surrogate" as
94
+ # output. This should be used in conjunction with a field on the
95
+ # transformation such as +surrogate_info_type+. This custom info type does
96
+ # not support the use of +detection_rules+.
97
+ class SurrogateType; end
98
+
99
+ # Rule for modifying a custom info type to alter behavior under certain
100
+ # circumstances, depending on the specific details of the rule. Not supported
101
+ # for the +surrogate_type+ custom info type.
102
+ # @!attribute [rw] hotword_rule
103
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule::HotwordRule]
104
+ # Hotword-based detection rule.
105
+ class DetectionRule
106
+ # Message for specifying a window around a finding to apply a detection
107
+ # rule.
108
+ # @!attribute [rw] window_before
109
+ # @return [Integer]
110
+ # Number of characters before the finding to consider.
111
+ # @!attribute [rw] window_after
112
+ # @return [Integer]
113
+ # Number of characters after the finding to consider.
114
+ class Proximity; end
115
+
116
+ # Message for specifying an adjustment to the likelihood of a finding as
117
+ # part of a detection rule.
118
+ # @!attribute [rw] fixed_likelihood
119
+ # @return [Google::Privacy::Dlp::V2::Likelihood]
120
+ # Set the likelihood of a finding to a fixed value.
121
+ # @!attribute [rw] relative_likelihood
122
+ # @return [Integer]
123
+ # Increase or decrease the likelihood by the specified number of
124
+ # levels. For example, if a finding would be +POSSIBLE+ without the
125
+ # detection rule and +relative_likelihood+ is 1, then it is upgraded to
126
+ # +LIKELY+, while a value of -1 would downgrade it to +UNLIKELY+.
127
+ # Likelihood may never drop below +VERY_UNLIKELY+ or exceed
128
+ # +VERY_LIKELY+, so applying an adjustment of 1 followed by an
129
+ # adjustment of -1 when base likelihood is +VERY_LIKELY+ will result in
130
+ # a final likelihood of +LIKELY+.
131
+ class LikelihoodAdjustment; end
132
+
133
+ # Detection rule that adjusts the likelihood of findings within a certain
134
+ # proximity of hotwords.
135
+ # @!attribute [rw] hotword_regex
136
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Regex]
137
+ # Regex pattern defining what qualifies as a hotword.
138
+ # @!attribute [rw] proximity
139
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule::Proximity]
140
+ # Proximity of the finding within which the entire hotword must reside.
141
+ # The total length of the window cannot exceed 1000 characters. Note that
142
+ # the finding itself will be included in the window, so that hotwords may
143
+ # be used to match substrings of the finding itself. For example, the
144
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
145
+ # adjusted upwards if the area code is known to be the local area code of
146
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
147
+ # is the area code in question.
148
+ # @!attribute [rw] likelihood_adjustment
149
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule::LikelihoodAdjustment]
150
+ # Likelihood adjustment to apply to all matching findings.
151
+ class HotwordRule; end
152
+ end
153
+ end
154
+
155
+ # General identifier of a data field in a storage service.
156
+ # @!attribute [rw] name
157
+ # @return [String]
158
+ # Name describing the field.
159
+ class FieldId; end
160
+
161
+ # Datastore partition ID.
162
+ # A partition ID identifies a grouping of entities. The grouping is always
163
+ # by project and namespace, however the namespace ID may be empty.
164
+ #
165
+ # A partition ID contains several dimensions:
166
+ # project ID and namespace ID.
167
+ # @!attribute [rw] project_id
168
+ # @return [String]
169
+ # The ID of the project to which the entities belong.
170
+ # @!attribute [rw] namespace_id
171
+ # @return [String]
172
+ # If not empty, the ID of the namespace to which the entities belong.
173
+ class PartitionId; end
174
+
175
+ # A representation of a Datastore kind.
176
+ # @!attribute [rw] name
177
+ # @return [String]
178
+ # The name of the kind.
179
+ class KindExpression; end
180
+
181
+ # Options defining a data set within Google Cloud Datastore.
182
+ # @!attribute [rw] partition_id
183
+ # @return [Google::Privacy::Dlp::V2::PartitionId]
184
+ # A partition ID identifies a grouping of entities. The grouping is always
185
+ # by project and namespace, however the namespace ID may be empty.
186
+ # @!attribute [rw] kind
187
+ # @return [Google::Privacy::Dlp::V2::KindExpression]
188
+ # The kind to process.
189
+ class DatastoreOptions; end
190
+
191
+ # Options defining a file or a set of files (path ending with *) within
192
+ # a Google Cloud Storage bucket.
193
+ # @!attribute [rw] file_set
194
+ # @return [Google::Privacy::Dlp::V2::CloudStorageOptions::FileSet]
195
+ # @!attribute [rw] bytes_limit_per_file
196
+ # @return [Integer]
197
+ # Max number of bytes to scan from a file. If a scanned file's size is bigger
198
+ # than this value then the rest of the bytes are omitted.
199
+ class CloudStorageOptions
200
+ # Set of files to scan.
201
+ # @!attribute [rw] url
202
+ # @return [String]
203
+ # The url, in the format +gs://<bucket>/<path>+. Trailing wildcard in the
204
+ # path is allowed.
205
+ class FileSet; end
206
+ end
207
+
208
+ # Options defining BigQuery table and row identifiers.
209
+ # @!attribute [rw] table_reference
210
+ # @return [Google::Privacy::Dlp::V2::BigQueryTable]
211
+ # Complete BigQuery table reference.
212
+ # @!attribute [rw] identifying_fields
213
+ # @return [Array<Google::Privacy::Dlp::V2::FieldId>]
214
+ # References to fields uniquely identifying rows within the table.
215
+ # Nested fields in the format, like +person.birthdate.year+, are allowed.
216
+ class BigQueryOptions; end
217
+
218
+ # Shared message indicating Cloud storage type.
219
+ # @!attribute [rw] datastore_options
220
+ # @return [Google::Privacy::Dlp::V2::DatastoreOptions]
221
+ # Google Cloud Datastore options specification.
222
+ # @!attribute [rw] cloud_storage_options
223
+ # @return [Google::Privacy::Dlp::V2::CloudStorageOptions]
224
+ # Google Cloud Storage options specification.
225
+ # @!attribute [rw] big_query_options
226
+ # @return [Google::Privacy::Dlp::V2::BigQueryOptions]
227
+ # BigQuery options specification.
228
+ # @!attribute [rw] timespan_config
229
+ # @return [Google::Privacy::Dlp::V2::StorageConfig::TimespanConfig]
230
+ class StorageConfig
231
+ # Configuration of the timespan of the items to include in scanning.
232
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
233
+ # @!attribute [rw] start_time
234
+ # @return [Google::Protobuf::Timestamp]
235
+ # Exclude files older than this value.
236
+ # @!attribute [rw] end_time
237
+ # @return [Google::Protobuf::Timestamp]
238
+ # Exclude files newer than this value.
239
+ # If set to zero, no upper time limit is applied.
240
+ # @!attribute [rw] timestamp_field
241
+ # @return [Google::Privacy::Dlp::V2::FieldId]
242
+ # Specification of the field containing the timestamp of scanned items.
243
+ # Required for data sources like Datastore or BigQuery.
244
+ # The valid data types of the timestamp field are:
245
+ # for BigQuery - timestamp, date, datetime;
246
+ # for Datastore - timestamp.
247
+ # Datastore entity will be scanned if the timestamp property does not exist
248
+ # or its value is empty or invalid.
249
+ # @!attribute [rw] enable_auto_population_of_timespan_config
250
+ # @return [true, false]
251
+ # When the job is started by a JobTrigger we will automatically figure out
252
+ # a valid start_time to avoid scanning files that have not been modified
253
+ # since the last time the JobTrigger executed. This will be based on the
254
+ # time of the execution of the last run of the JobTrigger.
255
+ class TimespanConfig; end
256
+ end
257
+
258
+ # Row key for identifying a record in BigQuery table.
259
+ # @!attribute [rw] table_reference
260
+ # @return [Google::Privacy::Dlp::V2::BigQueryTable]
261
+ # Complete BigQuery table reference.
262
+ # @!attribute [rw] row_number
263
+ # @return [Integer]
264
+ # Absolute number of the row from the beginning of the table at the time
265
+ # of scanning.
266
+ class BigQueryKey; end
267
+
268
+ # Record key for a finding in Cloud Datastore.
269
+ # @!attribute [rw] entity_key
270
+ # @return [Google::Privacy::Dlp::V2::Key]
271
+ # Datastore entity key.
272
+ class DatastoreKey; end
273
+
274
+ # A unique identifier for a Datastore entity.
275
+ # If a key's partition ID or any of its path kinds or names are
276
+ # reserved/read-only, the key is reserved/read-only.
277
+ # A reserved/read-only key is forbidden in certain documented contexts.
278
+ # @!attribute [rw] partition_id
279
+ # @return [Google::Privacy::Dlp::V2::PartitionId]
280
+ # Entities are partitioned into subsets, currently identified by a project
281
+ # ID and namespace ID.
282
+ # Queries are scoped to a single partition.
283
+ # @!attribute [rw] path
284
+ # @return [Array<Google::Privacy::Dlp::V2::Key::PathElement>]
285
+ # The entity path.
286
+ # An entity path consists of one or more elements composed of a kind and a
287
+ # string or numerical identifier, which identify entities. The first
288
+ # element identifies a _root entity_, the second element identifies
289
+ # a _child_ of the root entity, the third element identifies a child of the
290
+ # second entity, and so forth. The entities identified by all prefixes of
291
+ # the path are called the element's _ancestors_.
292
+ #
293
+ # A path can never be empty, and a path can have at most 100 elements.
294
+ class Key
295
+ # A (kind, ID/name) pair used to construct a key path.
296
+ #
297
+ # If either name or ID is set, the element is complete.
298
+ # If neither is set, the element is incomplete.
299
+ # @!attribute [rw] kind
300
+ # @return [String]
301
+ # The kind of the entity.
302
+ # A kind matching regex +__.*__+ is reserved/read-only.
303
+ # A kind must not contain more than 1500 bytes when UTF-8 encoded.
304
+ # Cannot be +""+.
305
+ # @!attribute [rw] id
306
+ # @return [Integer]
307
+ # The auto-allocated ID of the entity.
308
+ # Never equal to zero. Values less than zero are discouraged and may not
309
+ # be supported in the future.
310
+ # @!attribute [rw] name
311
+ # @return [String]
312
+ # The name of the entity.
313
+ # A name matching regex +__.*__+ is reserved/read-only.
314
+ # A name must not be more than 1500 bytes when UTF-8 encoded.
315
+ # Cannot be +""+.
316
+ class PathElement; end
317
+ end
318
+
319
+ # Message for a unique key indicating a record that contains a finding.
320
+ # @!attribute [rw] datastore_key
321
+ # @return [Google::Privacy::Dlp::V2::DatastoreKey]
322
+ # @!attribute [rw] big_query_key
323
+ # @return [Google::Privacy::Dlp::V2::BigQueryKey]
324
+ class RecordKey; end
325
+
326
+ # Message defining the location of a BigQuery table. A table is uniquely
327
+ # identified by its project_id, dataset_id, and table_name. Within a query
328
+ # a table is often referenced with a string in the format of:
329
+ # +<project_id>:<dataset_id>.<table_id>+ or
330
+ # +<project_id>.<dataset_id>.<table_id>+.
331
+ # @!attribute [rw] project_id
332
+ # @return [String]
333
+ # The Google Cloud Platform project ID of the project containing the table.
334
+ # If omitted, project ID is inferred from the API call.
335
+ # @!attribute [rw] dataset_id
336
+ # @return [String]
337
+ # Dataset ID of the table.
338
+ # @!attribute [rw] table_id
339
+ # @return [String]
340
+ # Name of the table.
341
+ class BigQueryTable; end
342
+
343
+ # Categorization of results based on how likely they are to represent a match,
344
+ # based on the number of elements they contain which imply a match.
345
+ module Likelihood
346
+ # Default value; information with all likelihoods is included.
347
+ LIKELIHOOD_UNSPECIFIED = 0
348
+
349
+ # Few matching elements.
350
+ VERY_UNLIKELY = 1
351
+
352
+ UNLIKELY = 2
353
+
354
+ # Some matching elements.
355
+ POSSIBLE = 3
356
+
357
+ LIKELY = 4
358
+
359
+ # Many matching elements.
360
+ VERY_LIKELY = 5
361
+ end
362
+ end
363
+ end
364
+ end
365
+ end
@@ -0,0 +1,124 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Google
16
+ module Protobuf
17
+ # +Any+ contains an arbitrary serialized protocol buffer message along with a
18
+ # URL that describes the type of the serialized message.
19
+ #
20
+ # Protobuf library provides support to pack/unpack Any values in the form
21
+ # of utility functions or additional generated methods of the Any type.
22
+ #
23
+ # Example 1: Pack and unpack a message in C++.
24
+ #
25
+ # Foo foo = ...;
26
+ # Any any;
27
+ # any.PackFrom(foo);
28
+ # ...
29
+ # if (any.UnpackTo(&foo)) {
30
+ # ...
31
+ # }
32
+ #
33
+ # Example 2: Pack and unpack a message in Java.
34
+ #
35
+ # Foo foo = ...;
36
+ # Any any = Any.pack(foo);
37
+ # ...
38
+ # if (any.is(Foo.class)) {
39
+ # foo = any.unpack(Foo.class);
40
+ # }
41
+ #
42
+ # Example 3: Pack and unpack a message in Python.
43
+ #
44
+ # foo = Foo(...)
45
+ # any = Any()
46
+ # any.Pack(foo)
47
+ # ...
48
+ # if any.Is(Foo.DESCRIPTOR):
49
+ # any.Unpack(foo)
50
+ # ...
51
+ #
52
+ # Example 4: Pack and unpack a message in Go
53
+ #
54
+ # foo := &pb.Foo{...}
55
+ # any, err := ptypes.MarshalAny(foo)
56
+ # ...
57
+ # foo := &pb.Foo{}
58
+ # if err := ptypes.UnmarshalAny(any, foo); err != nil {
59
+ # ...
60
+ # }
61
+ #
62
+ # The pack methods provided by protobuf library will by default use
63
+ # 'type.googleapis.com/full.type.name' as the type URL and the unpack
64
+ # methods only use the fully qualified type name after the last '/'
65
+ # in the type URL, for example "foo.bar.com/x/y.z" will yield type
66
+ # name "y.z".
67
+ #
68
+ #
69
+ # = JSON
70
+ #
71
+ # The JSON representation of an +Any+ value uses the regular
72
+ # representation of the deserialized, embedded message, with an
73
+ # additional field +@type+ which contains the type URL. Example:
74
+ #
75
+ # package google.profile;
76
+ # message Person {
77
+ # string first_name = 1;
78
+ # string last_name = 2;
79
+ # }
80
+ #
81
+ # {
82
+ # "@type": "type.googleapis.com/google.profile.Person",
83
+ # "firstName": <string>,
84
+ # "lastName": <string>
85
+ # }
86
+ #
87
+ # If the embedded message type is well-known and has a custom JSON
88
+ # representation, that representation will be embedded adding a field
89
+ # +value+ which holds the custom JSON in addition to the +@type+
90
+ # field. Example (for message {Google::Protobuf::Duration}):
91
+ #
92
+ # {
93
+ # "@type": "type.googleapis.com/google.protobuf.Duration",
94
+ # "value": "1.212s"
95
+ # }
96
+ # @!attribute [rw] type_url
97
+ # @return [String]
98
+ # A URL/resource name whose content describes the type of the
99
+ # serialized protocol buffer message.
100
+ #
101
+ # For URLs which use the scheme +http+, +https+, or no scheme, the
102
+ # following restrictions and interpretations apply:
103
+ #
104
+ # * If no scheme is provided, +https+ is assumed.
105
+ # * The last segment of the URL's path must represent the fully
106
+ # qualified name of the type (as in +path/google.protobuf.Duration+).
107
+ # The name should be in a canonical form (e.g., leading "." is
108
+ # not accepted).
109
+ # * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
110
+ # value in binary format, or produce an error.
111
+ # * Applications are allowed to cache lookup results based on the
112
+ # URL, or have them precompiled into a binary to avoid any
113
+ # lookup. Therefore, binary compatibility needs to be preserved
114
+ # on changes to types. (Use versioned type names to manage
115
+ # breaking changes.)
116
+ #
117
+ # Schemes other than +http+, +https+ (or the empty scheme) might be
118
+ # used with implementation specific semantics.
119
+ # @!attribute [rw] value
120
+ # @return [String]
121
+ # Must be a valid serialized protocol buffer of the above specified type.
122
+ class Any; end
123
+ end
124
+ end