google-cloud-dlp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,365 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Google
16
+ module Privacy
17
+ module Dlp
18
+ module V2
19
+ # Type of information detected by the API.
20
+ # @!attribute [rw] name
21
+ # @return [String]
22
+ # Name of the information type.
23
+ class InfoType; end
24
+
25
+ # Custom information type provided by the user. Used to find domain-specific
26
+ # sensitive information configurable to the data in question.
27
+ # @!attribute [rw] info_type
28
+ # @return [Google::Privacy::Dlp::V2::InfoType]
29
+ # Info type configuration. All custom info types must have configurations
30
+ # that do not conflict with built-in info types or other custom info types.
31
+ # @!attribute [rw] likelihood
32
+ # @return [Google::Privacy::Dlp::V2::Likelihood]
33
+ # Likelihood to return for this custom info type. This base value can be
34
+ # altered by a detection rule if the finding meets the criteria specified by
35
+ # the rule. Defaults to +VERY_LIKELY+ if not specified.
36
+ # @!attribute [rw] dictionary
37
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Dictionary]
38
+ # Dictionary-based custom info type.
39
+ # @!attribute [rw] regex
40
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Regex]
41
+ # Regex-based custom info type.
42
+ # @!attribute [rw] surrogate_type
43
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::SurrogateType]
44
+ # Surrogate info type.
45
+ # @!attribute [rw] detection_rules
46
+ # @return [Array<Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule>]
47
+ # Set of detection rules to apply to all findings of this custom info type.
48
+ # Rules are applied in order that they are specified. Not supported for the
49
+ # +surrogate_type+ custom info type.
50
+ class CustomInfoType
51
+ # Custom information type based on a dictionary of words or phrases. This can
52
+ # be used to match sensitive information specific to the data, such as a list
53
+ # of employee IDs or job titles.
54
+ #
55
+ # Dictionary words are case-insensitive and all characters other than letters
56
+ # and digits in the unicode [Basic Multilingual
57
+ # Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
58
+ # will be replaced with whitespace when scanning for matches, so the
59
+ # dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
60
+ # "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
61
+ # surrounding any match must be of a different type than the adjacent
62
+ # characters within the word, so letters must be next to non-letters and
63
+ # digits next to non-digits. For example, the dictionary word "jen" will
64
+ # match the first three letters of the text "jen123" but will return no
65
+ # matches for "jennifer".
66
+ #
67
+ # Dictionary words containing a large number of characters that are not
68
+ # letters or digits may result in unexpected findings because such characters
69
+ # are treated as whitespace.
70
+ # @!attribute [rw] word_list
71
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Dictionary::WordList]
72
+ # List of words or phrases to search for.
73
+ class Dictionary
74
+ # Message defining a list of words or phrases to search for in the data.
75
+ # @!attribute [rw] words
76
+ # @return [Array<String>]
77
+ # Words or phrases defining the dictionary. The dictionary must contain
78
+ # at least one phrase and every phrase must contain at least 2 characters
79
+ # that are letters or digits. [required]
80
+ class WordList; end
81
+ end
82
+
83
+ # Message defining a custom regular expression.
84
+ # @!attribute [rw] pattern
85
+ # @return [String]
86
+ # Pattern defining the regular expression.
87
+ class Regex; end
88
+
89
+ # Message for detecting output from deidentification transformations
90
+ # such as
91
+ # [+CryptoReplaceFfxFpeConfig+](https://cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify#CryptoReplaceFfxFpeConfig).
92
+ # These types of transformations are
93
+ # those that perform pseudonymization, thereby producing a "surrogate" as
94
+ # output. This should be used in conjunction with a field on the
95
+ # transformation such as +surrogate_info_type+. This custom info type does
96
+ # not support the use of +detection_rules+.
97
+ class SurrogateType; end
98
+
99
+ # Rule for modifying a custom info type to alter behavior under certain
100
+ # circumstances, depending on the specific details of the rule. Not supported
101
+ # for the +surrogate_type+ custom info type.
102
+ # @!attribute [rw] hotword_rule
103
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule::HotwordRule]
104
+ # Hotword-based detection rule.
105
+ class DetectionRule
106
+ # Message for specifying a window around a finding to apply a detection
107
+ # rule.
108
+ # @!attribute [rw] window_before
109
+ # @return [Integer]
110
+ # Number of characters before the finding to consider.
111
+ # @!attribute [rw] window_after
112
+ # @return [Integer]
113
+ # Number of characters after the finding to consider.
114
+ class Proximity; end
115
+
116
+ # Message for specifying an adjustment to the likelihood of a finding as
117
+ # part of a detection rule.
118
+ # @!attribute [rw] fixed_likelihood
119
+ # @return [Google::Privacy::Dlp::V2::Likelihood]
120
+ # Set the likelihood of a finding to a fixed value.
121
+ # @!attribute [rw] relative_likelihood
122
+ # @return [Integer]
123
+ # Increase or decrease the likelihood by the specified number of
124
+ # levels. For example, if a finding would be +POSSIBLE+ without the
125
+ # detection rule and +relative_likelihood+ is 1, then it is upgraded to
126
+ # +LIKELY+, while a value of -1 would downgrade it to +UNLIKELY+.
127
+ # Likelihood may never drop below +VERY_UNLIKELY+ or exceed
128
+ # +VERY_LIKELY+, so applying an adjustment of 1 followed by an
129
+ # adjustment of -1 when base likelihood is +VERY_LIKELY+ will result in
130
+ # a final likelihood of +LIKELY+.
131
+ class LikelihoodAdjustment; end
132
+
133
+ # Detection rule that adjusts the likelihood of findings within a certain
134
+ # proximity of hotwords.
135
+ # @!attribute [rw] hotword_regex
136
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::Regex]
137
+ # Regex pattern defining what qualifies as a hotword.
138
+ # @!attribute [rw] proximity
139
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule::Proximity]
140
+ # Proximity of the finding within which the entire hotword must reside.
141
+ # The total length of the window cannot exceed 1000 characters. Note that
142
+ # the finding itself will be included in the window, so that hotwords may
143
+ # be used to match substrings of the finding itself. For example, the
144
+ # certainty of a phone number regex "\(\d{3}\) \d{3}-\d{4}" could be
145
+ # adjusted upwards if the area code is known to be the local area code of
146
+ # a company office using the hotword regex "\(xxx\)", where "xxx"
147
+ # is the area code in question.
148
+ # @!attribute [rw] likelihood_adjustment
149
+ # @return [Google::Privacy::Dlp::V2::CustomInfoType::DetectionRule::LikelihoodAdjustment]
150
+ # Likelihood adjustment to apply to all matching findings.
151
+ class HotwordRule; end
152
+ end
153
+ end
154
+
155
+ # General identifier of a data field in a storage service.
156
+ # @!attribute [rw] name
157
+ # @return [String]
158
+ # Name describing the field.
159
+ class FieldId; end
160
+
161
+ # Datastore partition ID.
162
+ # A partition ID identifies a grouping of entities. The grouping is always
163
+ # by project and namespace, however the namespace ID may be empty.
164
+ #
165
+ # A partition ID contains several dimensions:
166
+ # project ID and namespace ID.
167
+ # @!attribute [rw] project_id
168
+ # @return [String]
169
+ # The ID of the project to which the entities belong.
170
+ # @!attribute [rw] namespace_id
171
+ # @return [String]
172
+ # If not empty, the ID of the namespace to which the entities belong.
173
+ class PartitionId; end
174
+
175
+ # A representation of a Datastore kind.
176
+ # @!attribute [rw] name
177
+ # @return [String]
178
+ # The name of the kind.
179
+ class KindExpression; end
180
+
181
+ # Options defining a data set within Google Cloud Datastore.
182
+ # @!attribute [rw] partition_id
183
+ # @return [Google::Privacy::Dlp::V2::PartitionId]
184
+ # A partition ID identifies a grouping of entities. The grouping is always
185
+ # by project and namespace, however the namespace ID may be empty.
186
+ # @!attribute [rw] kind
187
+ # @return [Google::Privacy::Dlp::V2::KindExpression]
188
+ # The kind to process.
189
+ class DatastoreOptions; end
190
+
191
+ # Options defining a file or a set of files (path ending with *) within
192
+ # a Google Cloud Storage bucket.
193
+ # @!attribute [rw] file_set
194
+ # @return [Google::Privacy::Dlp::V2::CloudStorageOptions::FileSet]
195
+ # @!attribute [rw] bytes_limit_per_file
196
+ # @return [Integer]
197
+ # Max number of bytes to scan from a file. If a scanned file's size is bigger
198
+ # than this value then the rest of the bytes are omitted.
199
+ class CloudStorageOptions
200
+ # Set of files to scan.
201
+ # @!attribute [rw] url
202
+ # @return [String]
203
+ # The url, in the format +gs://<bucket>/<path>+. Trailing wildcard in the
204
+ # path is allowed.
205
+ class FileSet; end
206
+ end
207
+
208
+ # Options defining BigQuery table and row identifiers.
209
+ # @!attribute [rw] table_reference
210
+ # @return [Google::Privacy::Dlp::V2::BigQueryTable]
211
+ # Complete BigQuery table reference.
212
+ # @!attribute [rw] identifying_fields
213
+ # @return [Array<Google::Privacy::Dlp::V2::FieldId>]
214
+ # References to fields uniquely identifying rows within the table.
215
+ # Nested fields in the format, like +person.birthdate.year+, are allowed.
216
+ class BigQueryOptions; end
217
+
218
+ # Shared message indicating Cloud storage type.
219
+ # @!attribute [rw] datastore_options
220
+ # @return [Google::Privacy::Dlp::V2::DatastoreOptions]
221
+ # Google Cloud Datastore options specification.
222
+ # @!attribute [rw] cloud_storage_options
223
+ # @return [Google::Privacy::Dlp::V2::CloudStorageOptions]
224
+ # Google Cloud Storage options specification.
225
+ # @!attribute [rw] big_query_options
226
+ # @return [Google::Privacy::Dlp::V2::BigQueryOptions]
227
+ # BigQuery options specification.
228
+ # @!attribute [rw] timespan_config
229
+ # @return [Google::Privacy::Dlp::V2::StorageConfig::TimespanConfig]
230
+ class StorageConfig
231
+ # Configuration of the timespan of the items to include in scanning.
232
+ # Currently only supported when inspecting Google Cloud Storage and BigQuery.
233
+ # @!attribute [rw] start_time
234
+ # @return [Google::Protobuf::Timestamp]
235
+ # Exclude files older than this value.
236
+ # @!attribute [rw] end_time
237
+ # @return [Google::Protobuf::Timestamp]
238
+ # Exclude files newer than this value.
239
+ # If set to zero, no upper time limit is applied.
240
+ # @!attribute [rw] timestamp_field
241
+ # @return [Google::Privacy::Dlp::V2::FieldId]
242
+ # Specification of the field containing the timestamp of scanned items.
243
+ # Required for data sources like Datastore or BigQuery.
244
+ # The valid data types of the timestamp field are:
245
+ # for BigQuery - timestamp, date, datetime;
246
+ # for Datastore - timestamp.
247
+ # Datastore entity will be scanned if the timestamp property does not exist
248
+ # or its value is empty or invalid.
249
+ # @!attribute [rw] enable_auto_population_of_timespan_config
250
+ # @return [true, false]
251
+ # When the job is started by a JobTrigger we will automatically figure out
252
+ # a valid start_time to avoid scanning files that have not been modified
253
+ # since the last time the JobTrigger executed. This will be based on the
254
+ # time of the execution of the last run of the JobTrigger.
255
+ class TimespanConfig; end
256
+ end
257
+
258
+ # Row key for identifying a record in BigQuery table.
259
+ # @!attribute [rw] table_reference
260
+ # @return [Google::Privacy::Dlp::V2::BigQueryTable]
261
+ # Complete BigQuery table reference.
262
+ # @!attribute [rw] row_number
263
+ # @return [Integer]
264
+ # Absolute number of the row from the beginning of the table at the time
265
+ # of scanning.
266
+ class BigQueryKey; end
267
+
268
+ # Record key for a finding in Cloud Datastore.
269
+ # @!attribute [rw] entity_key
270
+ # @return [Google::Privacy::Dlp::V2::Key]
271
+ # Datastore entity key.
272
+ class DatastoreKey; end
273
+
274
+ # A unique identifier for a Datastore entity.
275
+ # If a key's partition ID or any of its path kinds or names are
276
+ # reserved/read-only, the key is reserved/read-only.
277
+ # A reserved/read-only key is forbidden in certain documented contexts.
278
+ # @!attribute [rw] partition_id
279
+ # @return [Google::Privacy::Dlp::V2::PartitionId]
280
+ # Entities are partitioned into subsets, currently identified by a project
281
+ # ID and namespace ID.
282
+ # Queries are scoped to a single partition.
283
+ # @!attribute [rw] path
284
+ # @return [Array<Google::Privacy::Dlp::V2::Key::PathElement>]
285
+ # The entity path.
286
+ # An entity path consists of one or more elements composed of a kind and a
287
+ # string or numerical identifier, which identify entities. The first
288
+ # element identifies a _root entity_, the second element identifies
289
+ # a _child_ of the root entity, the third element identifies a child of the
290
+ # second entity, and so forth. The entities identified by all prefixes of
291
+ # the path are called the element's _ancestors_.
292
+ #
293
+ # A path can never be empty, and a path can have at most 100 elements.
294
+ class Key
295
+ # A (kind, ID/name) pair used to construct a key path.
296
+ #
297
+ # If either name or ID is set, the element is complete.
298
+ # If neither is set, the element is incomplete.
299
+ # @!attribute [rw] kind
300
+ # @return [String]
301
+ # The kind of the entity.
302
+ # A kind matching regex +__.*__+ is reserved/read-only.
303
+ # A kind must not contain more than 1500 bytes when UTF-8 encoded.
304
+ # Cannot be +""+.
305
+ # @!attribute [rw] id
306
+ # @return [Integer]
307
+ # The auto-allocated ID of the entity.
308
+ # Never equal to zero. Values less than zero are discouraged and may not
309
+ # be supported in the future.
310
+ # @!attribute [rw] name
311
+ # @return [String]
312
+ # The name of the entity.
313
+ # A name matching regex +__.*__+ is reserved/read-only.
314
+ # A name must not be more than 1500 bytes when UTF-8 encoded.
315
+ # Cannot be +""+.
316
+ class PathElement; end
317
+ end
318
+
319
+ # Message for a unique key indicating a record that contains a finding.
320
+ # @!attribute [rw] datastore_key
321
+ # @return [Google::Privacy::Dlp::V2::DatastoreKey]
322
+ # @!attribute [rw] big_query_key
323
+ # @return [Google::Privacy::Dlp::V2::BigQueryKey]
324
+ class RecordKey; end
325
+
326
+ # Message defining the location of a BigQuery table. A table is uniquely
327
+ # identified by its project_id, dataset_id, and table_name. Within a query
328
+ # a table is often referenced with a string in the format of:
329
+ # +<project_id>:<dataset_id>.<table_id>+ or
330
+ # +<project_id>.<dataset_id>.<table_id>+.
331
+ # @!attribute [rw] project_id
332
+ # @return [String]
333
+ # The Google Cloud Platform project ID of the project containing the table.
334
+ # If omitted, project ID is inferred from the API call.
335
+ # @!attribute [rw] dataset_id
336
+ # @return [String]
337
+ # Dataset ID of the table.
338
+ # @!attribute [rw] table_id
339
+ # @return [String]
340
+ # Name of the table.
341
+ class BigQueryTable; end
342
+
343
+ # Categorization of results based on how likely they are to represent a match,
344
+ # based on the number of elements they contain which imply a match.
345
+ module Likelihood
346
+ # Default value; information with all likelihoods is included.
347
+ LIKELIHOOD_UNSPECIFIED = 0
348
+
349
+ # Few matching elements.
350
+ VERY_UNLIKELY = 1
351
+
352
+ UNLIKELY = 2
353
+
354
+ # Some matching elements.
355
+ POSSIBLE = 3
356
+
357
+ LIKELY = 4
358
+
359
+ # Many matching elements.
360
+ VERY_LIKELY = 5
361
+ end
362
+ end
363
+ end
364
+ end
365
+ end
@@ -0,0 +1,124 @@
1
+ # Copyright 2018 Google LLC
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # https://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ module Google
16
+ module Protobuf
17
+ # +Any+ contains an arbitrary serialized protocol buffer message along with a
18
+ # URL that describes the type of the serialized message.
19
+ #
20
+ # Protobuf library provides support to pack/unpack Any values in the form
21
+ # of utility functions or additional generated methods of the Any type.
22
+ #
23
+ # Example 1: Pack and unpack a message in C++.
24
+ #
25
+ # Foo foo = ...;
26
+ # Any any;
27
+ # any.PackFrom(foo);
28
+ # ...
29
+ # if (any.UnpackTo(&foo)) {
30
+ # ...
31
+ # }
32
+ #
33
+ # Example 2: Pack and unpack a message in Java.
34
+ #
35
+ # Foo foo = ...;
36
+ # Any any = Any.pack(foo);
37
+ # ...
38
+ # if (any.is(Foo.class)) {
39
+ # foo = any.unpack(Foo.class);
40
+ # }
41
+ #
42
+ # Example 3: Pack and unpack a message in Python.
43
+ #
44
+ # foo = Foo(...)
45
+ # any = Any()
46
+ # any.Pack(foo)
47
+ # ...
48
+ # if any.Is(Foo.DESCRIPTOR):
49
+ # any.Unpack(foo)
50
+ # ...
51
+ #
52
+ # Example 4: Pack and unpack a message in Go
53
+ #
54
+ # foo := &pb.Foo{...}
55
+ # any, err := ptypes.MarshalAny(foo)
56
+ # ...
57
+ # foo := &pb.Foo{}
58
+ # if err := ptypes.UnmarshalAny(any, foo); err != nil {
59
+ # ...
60
+ # }
61
+ #
62
+ # The pack methods provided by protobuf library will by default use
63
+ # 'type.googleapis.com/full.type.name' as the type URL and the unpack
64
+ # methods only use the fully qualified type name after the last '/'
65
+ # in the type URL, for example "foo.bar.com/x/y.z" will yield type
66
+ # name "y.z".
67
+ #
68
+ #
69
+ # = JSON
70
+ #
71
+ # The JSON representation of an +Any+ value uses the regular
72
+ # representation of the deserialized, embedded message, with an
73
+ # additional field +@type+ which contains the type URL. Example:
74
+ #
75
+ # package google.profile;
76
+ # message Person {
77
+ # string first_name = 1;
78
+ # string last_name = 2;
79
+ # }
80
+ #
81
+ # {
82
+ # "@type": "type.googleapis.com/google.profile.Person",
83
+ # "firstName": <string>,
84
+ # "lastName": <string>
85
+ # }
86
+ #
87
+ # If the embedded message type is well-known and has a custom JSON
88
+ # representation, that representation will be embedded adding a field
89
+ # +value+ which holds the custom JSON in addition to the +@type+
90
+ # field. Example (for message {Google::Protobuf::Duration}):
91
+ #
92
+ # {
93
+ # "@type": "type.googleapis.com/google.protobuf.Duration",
94
+ # "value": "1.212s"
95
+ # }
96
+ # @!attribute [rw] type_url
97
+ # @return [String]
98
+ # A URL/resource name whose content describes the type of the
99
+ # serialized protocol buffer message.
100
+ #
101
+ # For URLs which use the scheme +http+, +https+, or no scheme, the
102
+ # following restrictions and interpretations apply:
103
+ #
104
+ # * If no scheme is provided, +https+ is assumed.
105
+ # * The last segment of the URL's path must represent the fully
106
+ # qualified name of the type (as in +path/google.protobuf.Duration+).
107
+ # The name should be in a canonical form (e.g., leading "." is
108
+ # not accepted).
109
+ # * An HTTP GET on the URL must yield a {Google::Protobuf::Type}
110
+ # value in binary format, or produce an error.
111
+ # * Applications are allowed to cache lookup results based on the
112
+ # URL, or have them precompiled into a binary to avoid any
113
+ # lookup. Therefore, binary compatibility needs to be preserved
114
+ # on changes to types. (Use versioned type names to manage
115
+ # breaking changes.)
116
+ #
117
+ # Schemes other than +http+, +https+ (or the empty scheme) might be
118
+ # used with implementation specific semantics.
119
+ # @!attribute [rw] value
120
+ # @return [String]
121
+ # Must be a valid serialized protocol buffer of the above specified type.
122
+ class Any; end
123
+ end
124
+ end