idscrub 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
idscrub/scrub.py CHANGED
@@ -138,52 +138,51 @@ class IDScrub:
138
138
 
139
139
  return grouped
140
140
 
141
- def log_message(self, removed_label) -> None:
141
+ def log_message(self, label) -> None:
142
142
  """
143
143
  Log message with count of PII-type scrubbed.
144
144
 
145
145
  Args:
146
- removed_label (str): Label for the PII-type removed. Must be prefixed with `scrubbed_`.
146
+ label (str): Label for the personal data removed.
147
147
  Returns:
148
148
  int: The count of PII-type scrubbed.
149
149
  """
150
150
 
151
- if any(removed_label in key for key in self.scrubbed_data):
151
+ if any(label in key for key in self.scrubbed_data):
152
152
  scrubbed_data = self.get_scrubbed_data()
153
- count = scrubbed_data[removed_label].dropna().apply(len).sum()
153
+ count = scrubbed_data[label].dropna().apply(len).sum()
154
154
  else:
155
155
  count = 0
156
156
 
157
- label_name = removed_label.removeprefix("scrubbed_").replace("_", " ")
158
- self.logger.info(f"{count} {label_name} scrubbed.")
157
+ self.logger.info(f"{count} {label} scrubbed.")
159
158
 
160
159
  return count
161
160
 
162
- def scrub_and_collect(self, match, text, replacement_text, i, removed_label) -> str:
161
+ def scrub_and_collect(self, match, text, replacement_text, i, label) -> str:
163
162
  """
164
163
  Scrub pattern match and collect scrubbed name.
165
164
 
166
165
  Args:
167
166
  match (str): The regex match passed from `re.sub()`.
168
167
  i (int): the enumerate id of the string.
169
- removed_label (str): Label for the PII-type removed. Must be prefixed with `scrubbed_`.
168
+ label (str): Label for the personal data removed.
170
169
 
171
170
  Returns:
172
171
  str: The replacement text.
173
172
  """
174
173
 
175
- self.scrubbed_data.append({self.text_id_name: i, removed_label: match.group()})
174
+ self.scrubbed_data.append({self.text_id_name: i, label: match.group()})
176
175
 
177
176
  return replacement_text
178
177
 
179
- def scrub_regex(self, pattern, replacement_text, removed_label) -> list[str]:
178
+ def scrub_regex(self, pattern, replacement_text, label) -> list[str]:
180
179
  """
181
180
  General method to clean text using a regex pattern.
182
181
 
183
182
  Args:
184
183
  pattern (str): Regex pattern to apply.
185
184
  replacement_text (str): The replacement text for the removed text.
186
- removed_label (str): Label for the PII-type removed. Must be prefixed with `scrubbed_`.
185
+ label (str): Label for the personal data removed.
187
186
 
188
187
  Returns:
189
188
  list[str]: Cleaned texts.
@@ -203,7 +202,7 @@ class IDScrub:
203
202
  text=text,
204
203
  replacement_text=replacement_text,
205
204
  i=i,
206
- removed_label=removed_label,
205
+ label=label,
207
206
  ),
208
207
  text,
209
208
  )
@@ -212,7 +211,7 @@ class IDScrub:
212
211
 
213
212
  self.cleaned_texts = cleaned_texts
214
213
 
215
- self.log_message(removed_label)
214
+ self.log_message(label)
216
215
 
217
216
  return cleaned_texts
218
217
 
@@ -220,6 +219,7 @@ class IDScrub:
220
219
  self,
221
220
  custom_regex_patterns: list[str] = None,
222
221
  custom_replacement_texts: list[str] = None,
222
+ labels: list[str] = None,
223
223
  ) -> list[str]:
224
224
  """
225
225
  Remove text matching a custom regex pattern.
@@ -228,6 +228,7 @@ class IDScrub:
228
228
  custom_regex_patterns list[str]: Regex(s) pattern to apply.
229
229
  custom_replacement_texts list[str]: The replacement texts for the removed text.
230
230
  Defaults to '[REDACTED]' for all.
231
+ labels list[str]: Labels for patterns removed.
231
232
 
232
233
  Returns:
233
234
  list[str]: Cleaned texts.
@@ -243,17 +244,22 @@ class IDScrub:
243
244
  custom_replacement_texts = ["[REDACTED]"] * len(custom_regex_patterns)
244
245
 
245
246
  for i, (pattern, replacement_text) in enumerate(zip(custom_regex_patterns, custom_replacement_texts)):
246
- self.scrub_regex(pattern, replacement_text, removed_label=f"scrubbed_custom_regex_{i + 1}")
247
+ if labels:
248
+ assert len(custom_regex_patterns) == len(labels), "There must be a label for each pattern."
249
+ self.scrub_regex(pattern, replacement_text, label=f"{labels[i]}")
250
+ else:
251
+ self.scrub_regex(pattern, replacement_text, label=f"custom_regex_{i + 1}")
247
252
 
248
253
  return self.cleaned_texts
249
254
 
250
- def email_addresses(self, replacement_text="[EMAIL_ADDRESS]") -> list[str]:
255
+ def email_addresses(self, replacement_text: str = "[EMAIL_ADDRESS]", label: str = "email_address") -> list[str]:
251
256
  """
252
257
  Remove email addresses using regex.
253
258
  e.g. `johnsmith@gmail.com` scrubbed
254
259
 
255
260
  Args:
256
261
  replacement_text (str): The replacement text for the removed text.
262
+ label (str): Label for the personal data removed.
257
263
 
258
264
  Returns:
259
265
  list[str]: The input list of text with email addresses replaced.
@@ -262,15 +268,16 @@ class IDScrub:
262
268
  self.logger.info("Scrubbing email addresses using regex...")
263
269
  pattern = r"\b\S+@\S+\.\S+\b"
264
270
 
265
- return self.scrub_regex(pattern, replacement_text, removed_label="scrubbed_email_addresses")
271
+ return self.scrub_regex(pattern, replacement_text, label=label)
266
272
 
267
- def handles(self, replacement_text: str = "[HANDLE]") -> list[str]:
273
+ def handles(self, replacement_text: str = "[HANDLE]", label: str = "handle") -> list[str]:
268
274
  """
269
275
  Remove `@` user handles using regex
270
276
  e.g. `@username` scrubbed
271
277
 
272
278
  Args:
273
279
  replacement_text (str): The replacement text for the removed text.
280
+ label (str): Label for the personal data removed.
274
281
 
275
282
  Returns:
276
283
  list[str]: The input list of text with handles replaced.
@@ -279,9 +286,11 @@ class IDScrub:
279
286
  self.logger.info("Scrubbing @user handles using regex...")
280
287
  pattern = r"@[\w.-]+(?=\b)"
281
288
 
282
- return self.scrub_regex(pattern, replacement_text, removed_label="scrubbed_handles")
289
+ return self.scrub_regex(pattern, replacement_text, label=label)
283
290
 
284
- def google_phone_numbers(self, region: str = "GB", replacement_text: str = "[PHONENO]") -> list[str]:
291
+ def google_phone_numbers(
292
+ self, region: str = "GB", replacement_text: str = "[PHONENO]", label: str = "phone_number"
293
+ ) -> list[str]:
285
294
  """
286
295
  Remove phone numbers using Google's `phonenumbers`.
287
296
  e.g. `+441234567891` scrubbed
@@ -289,6 +298,7 @@ class IDScrub:
289
298
  Args:
290
299
  region (str): The region to find phone numbers for. See `phonenumbers` regions.
291
300
  replacement_text (str): The replacement text for the removed text.
301
+ label (str): Label for the personal data removed.
292
302
 
293
303
  Returns:
294
304
  list[str]: The input list of text with phone numbers replaced.
@@ -308,7 +318,7 @@ class IDScrub:
308
318
  phone_nos = [match.raw_string for match in matches]
309
319
 
310
320
  for phone_no in phone_nos:
311
- self.scrubbed_data.append({self.text_id_name: i, "scrubbed_phone_numbers": phone_no})
321
+ self.scrubbed_data.append({self.text_id_name: i, label: phone_no})
312
322
 
313
323
  cleaned = text
314
324
  for match in reversed(matches):
@@ -318,17 +328,18 @@ class IDScrub:
318
328
 
319
329
  self.cleaned_texts = cleaned_texts
320
330
 
321
- self.log_message(f"scrubbed_{region.lower()}_phone_numbers")
331
+ self.log_message(label)
322
332
 
323
333
  return cleaned_texts
324
334
 
325
- def uk_phone_numbers(self, replacement_text: str = "[PHONENO]") -> list[str]:
335
+ def uk_phone_numbers(self, replacement_text: str = "[PHONENO]", label: str = "uk_phone_number") -> list[str]:
326
336
  """
327
337
  Remove phone numbers using regex.
328
338
  e.g. `+441234567891` scrubbed
329
339
 
330
340
  Args:
331
341
  replacement_text (str): The replacement text for the removed text.
342
+ label (str): Label for the personal data removed.
332
343
 
333
344
  Returns:
334
345
  list[str]: The input list of text with phone numbers replaced.
@@ -337,9 +348,9 @@ class IDScrub:
337
348
  self.logger.info("Scrubbing phone numbers using regex...")
338
349
  pattern = r"(\+?\d[\d\s]{7,}\d)"
339
350
 
340
- return self.scrub_regex(pattern, replacement_text, removed_label="scrubbed_uk_phone_numbers")
351
+ return self.scrub_regex(pattern, replacement_text, label=label)
341
352
 
342
- def titles(self, strict: bool = False, replacement_text: str = "[TITLE]") -> list[str]:
353
+ def titles(self, strict: bool = False, replacement_text: str = "[TITLE]", label: str = "title") -> list[str]:
343
354
  """
344
355
  Remove titles using regex.
345
356
 
@@ -347,6 +358,7 @@ class IDScrub:
347
358
  strict (bool): Whether to use all of the titles or only essential titles.
348
359
  If strict, you may find scrubbing of common words, such as general.
349
360
  replacement_text (str): The replacement text for the removed text.
361
+ label (str): Label for the personal data removed.
350
362
 
351
363
  Returns:
352
364
  list[str]: The input list of text with names after titles replaced.
@@ -394,7 +406,7 @@ class IDScrub:
394
406
  ]
395
407
 
396
408
  if not strict:
397
- titles_to_remove = ["General", "Major", "Judge", "Master", "Father", "Sister"]
409
+ titles_to_remove = ["General", "Major", "Judge", "Master", "Father", "Sister", "Miss"]
398
410
  titles = [title for title in titles if title not in titles_to_remove]
399
411
 
400
412
  # Add dotted versions
@@ -404,9 +416,9 @@ class IDScrub:
404
416
  self.logger.info("Scrubbing titles using regex...")
405
417
  pattern = r"\b(?:{})\b".format("|".join(re.escape(t) for t in titles))
406
418
 
407
- return self.scrub_regex(pattern, replacement_text, removed_label="scrubbed_titles")
419
+ return self.scrub_regex(pattern, replacement_text, label=label)
408
420
 
409
- def ip_addresses(self, replacement_text: str = "[IPADDRESS]") -> list[str]:
421
+ def ip_addresses(self, replacement_text: str = "[IPADDRESS]", label: str = "ip_address") -> list[str]:
410
422
  """
411
423
  Removes IP addresses.
412
424
  e.g. `192.168.1.1` scrubbed
@@ -421,26 +433,27 @@ class IDScrub:
421
433
  self.logger.info("Scrubbing IP addresses using regex...")
422
434
  pattern = r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"
423
435
 
424
- return self.scrub_regex(pattern, replacement_text, removed_label="scrubbed_ip_addresses")
436
+ return self.scrub_regex(pattern, replacement_text, label=label)
425
437
 
426
- def uk_postcodes(self, replacement_text: str = "[POSTCODE]") -> list[str]:
438
+ def uk_postcodes(self, replacement_text: str = "[POSTCODE]", label: str = "uk_postcode") -> list[str]:
427
439
  """
428
- Removes UK postcodes.
440
+ Removes postcodes.
429
441
  e.g. `A11 1AA` scrubbed
430
442
 
431
443
  Args:
432
444
  replacement_text (str): The replacement text for the removed text.
445
+ label (str): Label for the personal data removed.
433
446
 
434
447
  Returns:
435
448
  list[str]: The input list of text with postcodes replaced.
436
449
  """
437
450
 
438
- self.logger.info("Scrubbing UK postcodes using regex...")
451
+ self.logger.info("Scrubbing postcodes using regex...")
439
452
  pattern = r"\b(?:(?:[A-Z][A-HJ-Y]?[0-9][0-9A-Z]?)[ \t]*[0-9][A-Z]{2}|GIR[ \t]*0A{2}|SAN[ \t]*TA1|ASCN[ \t]*1ZZ|STHL[ \t]*1ZZ|TDCU[ \t]*1ZZ|BBND[ \t]*1ZZ|[BFS]IQ{2}[ \t]*1ZZ|GX11[ \t]*1AA|PCRN[ \t]*1ZZ|TKCA[ \t]*1ZZ|AI-?[0-9]{4}|BFPO[ \t-]?[0-9]{2,4}|MSR[ \t-]?1(?:1[12]|[23][135])0|VG[ \t-]?11[1-6]0|KY[1-3][ \t-]?[0-2][0-9]{3})\b"
440
453
 
441
- return self.scrub_regex(pattern, replacement_text, removed_label="scrubbed_uk_postcodes")
454
+ return self.scrub_regex(pattern, replacement_text, label=label)
442
455
 
443
- def claimants(self, replacement_text="[CLAIMANT]") -> list[str]:
456
+ def claimants(self, replacement_text="[CLAIMANT]", label: str = "claimant") -> list[str]:
444
457
  """
445
458
  Removes claimant names from employment tribunal texts.
446
459
  e.g. `Claimant: Jim Smith` scrubbed
@@ -472,9 +485,7 @@ class IDScrub:
472
485
 
473
486
  if claimant_name:
474
487
  cleaned = re.sub(re.escape(claimant_name), replacement_text, cleaned)
475
- self.scrubbed_data.append({self.text_id_name: i, "scrubbed_claimants": claimant_name})
476
- # self.scrubbed_data[self.text_id_name].append(i)
477
- # self.scrubbed_data['scrubbed_claimant'].append(claimant_name)
488
+ self.scrubbed_data.append({self.text_id_name: i, label: claimant_name})
478
489
 
479
490
  cleaned_texts.append(cleaned)
480
491
 
@@ -523,6 +534,7 @@ class IDScrub:
523
534
  n_process: int = 1,
524
535
  batch_size: int = 1000,
525
536
  replacement_text: str = "[PERSON]",
537
+ label: str = "person",
526
538
  ) -> list[str]:
527
539
  """
528
540
  Remove PERSON entities using a Spacy model.
@@ -533,6 +545,7 @@ class IDScrub:
533
545
  n_process (int): Number of parallel processes.
534
546
  batch_size (int): The number of texts in each batch.
535
547
  replacement_text (str): The replacement text for the removed text.
548
+ label (str): Label for the personal data removed.
536
549
 
537
550
  Returns:
538
551
  list[str]: The input list of text with PERSON entities scrubbed.
@@ -561,9 +574,7 @@ class IDScrub:
561
574
  person_entities = [
562
575
  ent for ent in doc.ents if ent.label_ == "PERSON" and ent.text not in {"PERSON", "HANDLE"}
563
576
  ]
564
- self.scrubbed_data.extend(
565
- {self.text_id_name: ids, "scrubbed_spacy_person": ent.text} for ent in person_entities
566
- )
577
+ self.scrubbed_data.extend({self.text_id_name: ids, label: ent.text} for ent in person_entities)
567
578
 
568
579
  # Remove person entities
569
580
  cleaned = stripped_text
@@ -574,7 +585,7 @@ class IDScrub:
574
585
 
575
586
  self.cleaned_texts = cleaned_texts
576
587
 
577
- self.log_message("scrubbed_spacy_person")
588
+ self.log_message(label)
578
589
 
579
590
  return cleaned_texts
580
591
 
@@ -618,6 +629,7 @@ class IDScrub:
618
629
  hf_model_path: str = "dbmdz/bert-large-cased-finetuned-conll03-english",
619
630
  download_directory: str = f"{DOWNLOAD_DIR}/huggingface/",
620
631
  replacement_text: str = "[PERSON]",
632
+ label: str = "person",
621
633
  batch_size: int = 8,
622
634
  ) -> list[str]:
623
635
  """
@@ -630,6 +642,7 @@ class IDScrub:
630
642
  download_directory (str): Directory in which to save the model.
631
643
  Default is current working directory.
632
644
  replacement_text (str): The replacement text for the removed text.
645
+ label (str): Label for the personal data removed.
633
646
  batch_size (int): Number of texts passed to the model in each batch.
634
647
  Memory (instance size) dependent.
635
648
 
@@ -668,9 +681,7 @@ class IDScrub:
668
681
  person_entities = [
669
682
  ent for ent in entities if ent["entity_group"] == "PER" and ent["word"] not in {"HANDLE", "PERSON"}
670
683
  ]
671
- self.scrubbed_data.extend(
672
- {self.text_id_name: ids, "scrubbed_hf_person": ent["word"]} for ent in person_entities
673
- )
684
+ self.scrubbed_data.extend({self.text_id_name: ids, label: ent["word"]} for ent in person_entities)
674
685
 
675
686
  cleaned = stripped_text
676
687
  for ent in sorted(person_entities, key=lambda x: x["start"], reverse=True):
@@ -680,14 +691,14 @@ class IDScrub:
680
691
 
681
692
  self.cleaned_texts = cleaned_texts
682
693
 
683
- self.log_message("scrubbed_hf_person")
694
+ self.log_message(label)
684
695
 
685
696
  return cleaned_texts
686
697
 
687
698
  def presidio(
688
699
  self,
689
- model_name="en_core_web_trf",
690
- entities_to_scrub=[
700
+ model_name: str = "en_core_web_trf",
701
+ entities_to_scrub: list[str] = [
691
702
  "PERSON",
692
703
  "UK_NINO",
693
704
  "UK_NHS",
@@ -697,7 +708,8 @@ class IDScrub:
697
708
  "URL",
698
709
  "IBAN_CODE",
699
710
  ],
700
- replacement_map=None,
711
+ replacement_map: str = None,
712
+ label_prefix: str = None,
701
713
  ) -> list[str]:
702
714
  """
703
715
  Scrub specified entities from texts using Presidio.
@@ -708,6 +720,7 @@ class IDScrub:
708
720
  model_name (str): spaCy model to use
709
721
  entities_to_scrub (list[str]): Entity types to scrub (e.g. ["PERSON", "IP_ADDRESS"])
710
722
  replacement_map (dict): Mapping of entity_type to replacement string (e.g. {'PERSON': '[PERSON]'})
723
+ label_prefix (str): Prefix for the Presidio personal data type removed, e.g. `{label}_person`.
711
724
 
712
725
  Returns:
713
726
  list[str]: The input list of text with entities replaced.
@@ -743,7 +756,11 @@ class IDScrub:
743
756
  results = analyzer.analyze(text=stripped_text, language="en")
744
757
  results = [r for r in results if r.entity_type in entities_to_scrub]
745
758
 
746
- labels = [f"scrubbed_presidio_{res.entity_type.lower()}" for res in results]
759
+ if label_prefix:
760
+ labels = [f"{label_prefix}_{res.entity_type.lower()}" for res in results]
761
+ else:
762
+ labels = [f"{res.entity_type.lower()}" for res in results]
763
+
747
764
  unique_labels.append(list(set(labels)))
748
765
 
749
766
  self.scrubbed_data.extend(
@@ -847,38 +864,6 @@ class IDScrub:
847
864
 
848
865
  return self.cleaned_texts
849
866
 
850
- def call_scrub_method(self, scrub_method: str) -> list[str]:
851
- """
852
- Calls a given scrub method based on its matching string name.
853
- Uses default values for the given scrub method.
854
-
855
- Example:
856
- "all" == scrub.all() and "email_addresses" == scrub.email_addresses().
857
-
858
- Args:
859
- scrub_method (str): string name of scrub method.
860
-
861
- Returns:
862
- list[str]: The input list of text with personal information replaced.
863
-
864
- """
865
-
866
- scrub_methods = {
867
- "all": self.all,
868
- "spacy_persons": self.spacy_persons,
869
- "huggingface_persons": self.huggingface_persons,
870
- "email_addresses": self.email_addresses,
871
- "handles": self.handles,
872
- "ip_addresses": self.ip_addresses,
873
- "uk_phone_numbers": self.uk_phone_numbers,
874
- "google_phone_numbers": self.google_phone_numbers,
875
- "uk_postcodes": self.uk_postcodes,
876
- "titles": self.titles,
877
- "presidio": self.presidio,
878
- }
879
-
880
- return scrub_methods.get(scrub_method, lambda: "Unknown method.")()
881
-
882
867
  def scrub(self, scrub_methods: list[str] = ["all"]) -> list[str]:
883
868
  """
884
869
  Scrubs text using given methods (in order).
@@ -907,7 +892,11 @@ class IDScrub:
907
892
  """
908
893
 
909
894
  for scrub_method in scrub_methods:
910
- self.call_scrub_method(scrub_method)
895
+ try:
896
+ method = getattr(self, scrub_method)
897
+ method()
898
+ except AttributeError:
899
+ self.logger.warning("Not a scrub method.")
911
900
 
912
901
  return self.cleaned_texts
913
902
 
@@ -915,7 +904,7 @@ class IDScrub:
915
904
  def dataframe(
916
905
  df: pd.DataFrame = None,
917
906
  id_col: str = None,
918
- exclude_cols: list[str] = None,
907
+ exclude_cols: list = None,
919
908
  scrub_methods: list[str] = ["all"],
920
909
  ) -> tuple[pd.DataFrame, pd.DataFrame]:
921
910
  """
@@ -960,13 +949,9 @@ class IDScrub:
960
949
  scrubbed_df[col] = scrubbed_df[col].astype(str)
961
950
 
962
951
  scrub = IDScrub(texts=scrubbed_df[col].to_list(), text_id_name=id_col, text_ids=ids)
952
+ scrub.logger.info(f"Scrubbing column `{col}`...")
963
953
 
964
- for i, scrub_method in enumerate(scrub_methods):
965
- if i == len(scrub_methods) - 1:
966
- scrubbed_texts = scrub.call_scrub_method(scrub_method)
967
- else:
968
- scrub.call_scrub_method(scrub_method)
969
-
954
+ scrubbed_texts = scrub.scrub(scrub_methods)
970
955
  scrubbed_df[col] = scrubbed_texts
971
956
 
972
957
  scrubbed_data = scrub.get_scrubbed_data()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: idscrub
3
- Version: 0.2.2
3
+ Version: 1.0.0
4
4
  Author: Department for Business and Trade
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -21,7 +21,7 @@ Dynamic: license-file
21
21
 
22
22
  # idscrub 🧽✨
23
23
 
24
- * Names and other personally identifying information are often present in text.
24
+ * Names and other personally identifying information are often present in text, even if they are not clearly visible or requested.
25
25
  * This information may need to be removed prior to further analysis in many cases.
26
26
  * `idscrub` identifies and removes (*✨scrubs✨*) personal data from text using [regular expressions](https://en.wikipedia.org/wiki/Regular_expression) and [named-entity recognition](https://en.wikipedia.org/wiki/Named-entity_recognition).
27
27
 
@@ -0,0 +1,22 @@
1
+ idscrub/__init__.py,sha256=cRugJv27q1q--bl-VNLpfiScJb_ROlUxyLFhaF55S1w,38
2
+ idscrub/locations.py,sha256=7fMNOcGMYe7sX8TrfhMW6oYGAlc1WVYVQKQbpxE3pqo,217
3
+ idscrub/scrub.py,sha256=VqVqcChbbxMEKJR6Aci971dqG-RmD48otrp9sG2dX0o,34443
4
+ idscrub-1.0.0.dist-info/licenses/LICENSE,sha256=JJnuf10NSx7YXglte1oH_N9ZP3AcWR_Y8irvQb_wnsg,1090
5
+ notebooks/basic_usage.ipynb,sha256=XTBxdtu2F0S99V2lntUEeFj6SN4GRVm4qKvqOhs7nec,38777
6
+ test/conftest.py,sha256=y-pwGXpdg7bbFc36HtE3wQtZkeI0JM77fcMYjej5veY,557
7
+ test/test_all.py,sha256=ifuXAI0Hq3ETNXzdITjNGCnuFyozhN5TpJC2hOtA2bM,1103
8
+ test/test_chain.py,sha256=tGxcG5zRMcX22RfcrimqX6Le2iFPH9NqfZy7Idhelps,1808
9
+ test/test_dataframe.py,sha256=1LhtkQQpXblQ18ppI1s1nNyse0YCwGHbhtrKGkdppBw,6413
10
+ test/test_huggingface.py,sha256=OGwWSz_tzcynuRFXOdV4H4ProKnekYMdtZJviXEejiA,836
11
+ test/test_id.py,sha256=TPsvz4Kw1z_Fiek2BV79Hc2q3N37xU3oQra6Y7Ke11Q,989
12
+ test/test_label.py,sha256=aTGmtAWSLHrgoVBbCFUCqj52LmlCEKN6owycOyfVNpQ,669
13
+ test/test_log.py,sha256=tGAGOv4aeHT4E_pB9rq_nNA1CDHNoINpkVrCKaP4d3U,645
14
+ test/test_persidio.py,sha256=rkqiUr-vYnfCf7Xt0gNo2VQK2gi5JKP7ThSlT803swc,1558
15
+ test/test_phonenumbers.py,sha256=hZsXgwhn5R-7426TTWwCH9gWQwhyHtjLUstN10jnX6c,607
16
+ test/test_regex.py,sha256=zuq8g_8F_P5oCA2ChU5wUIFEWjT9LSYB0S_U1rBpTn4,4388
17
+ test/test_scrub.py,sha256=MWpan5cWIGeNPJCvTwtYe-iZeoIjS_fZMIg46ZVrkJo,1377
18
+ test/test_spacy.py,sha256=KHalx16GYHmCaQUU1O5bLMP95SLTu1007fJK1oq__v4,932
19
+ idscrub-1.0.0.dist-info/METADATA,sha256=fo7FUBAHDei63EWPRUrfNS05p3bnZWSY2GPVrho0vjo,5403
20
+ idscrub-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ idscrub-1.0.0.dist-info/top_level.txt,sha256=D4EEodXGCjGiX35ObiBTmjjBAdouN-eCvH-LezGGtks,23
22
+ idscrub-1.0.0.dist-info/RECORD,,