@google-cloud/dlp 5.0.1 → 5.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- // Copyright 2022 Google LLC
1
+ // Copyright 2023 Google LLC
2
2
  //
3
3
  // Licensed under the Apache License, Version 2.0 (the "License");
4
4
  // you may not use this file except in compliance with the License.
@@ -38,51 +38,73 @@ message InfoType {
38
38
 
39
39
  // Optional version name for this InfoType.
40
40
  string version = 2;
41
+
42
+ // Optional custom sensitivity for this InfoType.
43
+ // This only applies to data profiling.
44
+ SensitivityScore sensitivity_score = 3;
41
45
  }
42
46
 
43
- // Score is a summary of all elements in the data profile.
44
- // A higher number means more sensitive.
47
+ // Score is calculated from of all elements in the data profile.
48
+ // A higher level means the data is more sensitive.
45
49
  message SensitivityScore {
46
- // Various score levels for resources.
50
+ // Various sensitivity score levels for resources.
47
51
  enum SensitivityScoreLevel {
48
52
  // Unused.
49
53
  SENSITIVITY_SCORE_UNSPECIFIED = 0;
50
54
 
51
- // No sensitive information detected. Limited access.
55
+ // No sensitive information detected. The resource isn't publicly
56
+ // accessible.
52
57
  SENSITIVITY_LOW = 10;
53
58
 
54
- // Medium risk - PII, potentially sensitive data, or fields with free-text
55
- // data that are at higher risk of having intermittent sensitive data.
56
- // Consider limiting access.
59
+ // Medium risk. Contains personally identifiable information (PII),
60
+ // potentially sensitive data, or fields with free-text data that are at a
61
+ // higher risk of having intermittent sensitive data. Consider limiting
62
+ // access.
57
63
  SENSITIVITY_MODERATE = 20;
58
64
 
59
- // High risk – SPII may be present. Exfiltration of data may lead to user
60
- // data loss. Re-identification of users may be possible. Consider limiting
61
- // usage and or removing SPII.
65
+ // High risk. Sensitive personally identifiable information (SPII) can be
66
+ // present. Exfiltration of data can lead to user data loss.
67
+ // Re-identification of users might be possible. Consider limiting usage and
68
+ // or removing SPII.
62
69
  SENSITIVITY_HIGH = 30;
63
70
  }
64
71
 
65
- // The score applied to the resource.
72
+ // The sensitivity score applied to the resource.
66
73
  SensitivityScoreLevel score = 1;
67
74
  }
68
75
 
69
- // Categorization of results based on how likely they are to represent a match,
70
- // based on the number of elements they contain which imply a match.
76
+ // Coarse-grained confidence level of how well a particular finding
77
+ // satisfies the criteria to match a particular infoType.
78
+ //
79
+ // Likelihood is calculated based on the number of signals a
80
+ // finding has that implies that the finding matches the infoType. For
81
+ // example, a string that has an '@' and a '.com' is more likely to be a
82
+ // match for an email address than a string that only has an '@'.
83
+ //
84
+ // In general, the highest likelihood level has the strongest signals that
85
+ // indicate a match. That is, a finding with a high likelihood has a low chance
86
+ // of being a false positive.
87
+ //
88
+ // For more information about each likelihood level
89
+ // and how likelihood works, see [Match
90
+ // likelihood](https://cloud.google.com/dlp/docs/likelihood).
71
91
  enum Likelihood {
72
92
  // Default value; same as POSSIBLE.
73
93
  LIKELIHOOD_UNSPECIFIED = 0;
74
94
 
75
- // Few matching elements.
95
+ // Highest chance of a false positive.
76
96
  VERY_UNLIKELY = 1;
77
97
 
98
+ // High chance of a false positive.
78
99
  UNLIKELY = 2;
79
100
 
80
- // Some matching elements.
101
+ // Some matching signals. The default value.
81
102
  POSSIBLE = 3;
82
103
 
104
+ // Low chance of a false positive.
83
105
  LIKELY = 4;
84
106
 
85
- // Many matching elements.
107
+ // Confidence level is high. Lowest chance of a false positive.
86
108
  VERY_LIKELY = 5;
87
109
  }
88
110
 
@@ -163,9 +185,7 @@ message CustomInfoType {
163
185
  // output. This should be used in conjunction with a field on the
164
186
  // transformation such as `surrogate_info_type`. This CustomInfoType does
165
187
  // not support the use of `detection_rules`.
166
- message SurrogateType {
167
-
168
- }
188
+ message SurrogateType {}
169
189
 
170
190
  // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
171
191
  // `CustomInfoType` to alter behavior under certain circumstances, depending
@@ -282,6 +302,13 @@ message CustomInfoType {
282
302
  // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
283
303
  // to be returned. It still can be used for rules matching.
284
304
  ExclusionType exclusion_type = 8;
305
+
306
+ // Sensitivity for this CustomInfoType. If this CustomInfoType extends an
307
+ // existing InfoType, the sensitivity here will take precedence over that of
308
+ // the original InfoType. If unset for a CustomInfoType, it will default to
309
+ // HIGH.
310
+ // This only applies to data profiling.
311
+ SensitivityScore sensitivity_score = 9;
285
312
  }
286
313
 
287
314
  // General identifier of a data field in a storage service.
@@ -330,7 +357,7 @@ enum FileType {
330
357
  // scanning attempts to convert the content of the file to utf_8 to scan
331
358
  // the file.
332
359
  // If you wish to avoid this fall back, specify one or more of the other
333
- // FileType's in your storage scan.
360
+ // file types in your storage scan.
334
361
  BINARY_FILE = 1;
335
362
 
336
363
  // Included file extensions:
@@ -343,19 +370,24 @@ enum FileType {
343
370
  TEXT_FILE = 2;
344
371
 
345
372
  // Included file extensions:
346
- // bmp, gif, jpg, jpeg, jpe, png.
347
- // bytes_limit_per_file has no effect on image files.
348
- // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
373
+ // bmp, gif, jpg, jpeg, jpe, png. Setting
374
+ // [bytes_limit_per_file][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
375
+ // or
376
+ // [bytes_limit_per_file_percent][google.privacy.dlp.v2.CloudStorageOptions.bytes_limit_per_file]
377
+ // has no effect on image files. Image inspection is restricted to the
378
+ // `global`, `us`, `asia`, and `europe` regions.
349
379
  IMAGE = 3;
350
380
 
351
- // Word files >30 MB will be scanned as binary files.
381
+ // Microsoft Word files larger than 30 MB will be scanned as binary files.
352
382
  // Included file extensions:
353
- // docx, dotx, docm, dotm
383
+ // docx, dotx, docm, dotm. Setting `bytes_limit_per_file` or
384
+ // `bytes_limit_per_file_percent` has no effect on Word files.
354
385
  WORD = 5;
355
386
 
356
- // PDF files >30 MB will be scanned as binary files.
387
+ // PDF files larger than 30 MB will be scanned as binary files.
357
388
  // Included file extensions:
358
- // pdf
389
+ // pdf. Setting `bytes_limit_per_file` or `bytes_limit_per_file_percent`
390
+ // has no effect on PDF files.
359
391
  PDF = 6;
360
392
 
361
393
  // Included file extensions:
@@ -370,14 +402,16 @@ enum FileType {
370
402
  // tsv
371
403
  TSV = 9;
372
404
 
373
- // Powerpoint files >30 MB will be scanned as binary files.
374
- // Included file extensions:
375
- // pptx, pptm, potx, potm, pot
405
+ // Microsoft PowerPoint files larger than 30 MB will be scanned as binary
406
+ // files. Included file extensions:
407
+ // pptx, pptm, potx, potm, pot. Setting `bytes_limit_per_file` or
408
+ // `bytes_limit_per_file_percent` has no effect on PowerPoint files.
376
409
  POWERPOINT = 11;
377
410
 
378
- // Excel files >30 MB will be scanned as binary files.
411
+ // Microsoft Excel files larger than 30 MB will be scanned as binary files.
379
412
  // Included file extensions:
380
- // xlsx, xlsm, xltx, xltm
413
+ // xlsx, xlsm, xltx, xltm. Setting `bytes_limit_per_file` or
414
+ // `bytes_limit_per_file_percent` has no effect on Excel files.
381
415
  EXCEL = 12;
382
416
  }
383
417
 
@@ -478,16 +512,22 @@ message CloudStorageOptions {
478
512
  FileSet file_set = 1;
479
513
 
480
514
  // Max number of bytes to scan from a file. If a scanned file's size is bigger
481
- // than this value then the rest of the bytes are omitted. Only one
482
- // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
483
- // Cannot be set if de-identification is requested.
515
+ // than this value then the rest of the bytes are omitted. Only one of
516
+ // `bytes_limit_per_file` and `bytes_limit_per_file_percent` can be specified.
517
+ // This field can't be set if de-identification is requested. For certain file
518
+ // types, setting this field has no effect. For more information, see [Limits
519
+ // on bytes scanned per
520
+ // file](https://cloud.google.com/dlp/docs/supported-file-types#max-byte-size-per-file).
484
521
  int64 bytes_limit_per_file = 4;
485
522
 
486
523
  // Max percentage of bytes to scan from a file. The rest are omitted. The
487
524
  // number of bytes scanned is rounded down. Must be between 0 and 100,
488
- // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
489
- // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
490
- // Cannot be set if de-identification is requested.
525
+ // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one of
526
+ // bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
527
+ // This field can't be set if de-identification is requested. For certain file
528
+ // types, setting this field has no effect. For more information, see [Limits
529
+ // on bytes scanned per
530
+ // file](https://cloud.google.com/dlp/docs/supported-file-types#max-byte-size-per-file).
491
531
  int32 bytes_limit_per_file_percent = 8;
492
532
 
493
533
  // List of file type groups to include in the scan.
@@ -565,9 +605,15 @@ message BigQueryOptions {
565
605
 
566
606
  // References to fields excluded from scanning. This allows you to skip
567
607
  // inspection of entire columns which you know have no findings.
608
+ // When inspecting a table, we recommend that you inspect all columns.
609
+ // Otherwise, findings might be affected because hints from excluded columns
610
+ // will not be used.
568
611
  repeated FieldId excluded_fields = 5;
569
612
 
570
613
  // Limit scanning only to these fields.
614
+ // When inspecting a table, we recommend that you inspect all columns.
615
+ // Otherwise, findings might be affected because hints from excluded columns
616
+ // will not be used.
571
617
  repeated FieldId included_fields = 7;
572
618
  }
573
619