raindrop-ai 0.0.25__py3-none-any.whl → 0.0.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
raindrop/redact.py CHANGED
@@ -8,6 +8,10 @@ class PIIRedactor:
8
8
  """PII redactor that uses regex patterns to identify and replace PII."""
9
9
 
10
10
  def __init__(self):
11
+ # Load well-known names
12
+ well_known_names_path = os.path.join(os.path.dirname(__file__), 'well-known-names.json')
13
+ with open(well_known_names_path, 'r') as f:
14
+ self.well_known_names = json.load(f)
11
15
 
12
16
  # Build regex patterns
13
17
  self._build_patterns()
@@ -64,6 +68,9 @@ class PIIRedactor:
64
68
  re.MULTILINE
65
69
  )
66
70
 
71
+ # Well-known names pattern
72
+ names_pattern_str = r'\b(' + '|'.join(re.escape(name) for name in self.well_known_names) + r')\b'
73
+ self.well_known_names_pattern = re.compile(names_pattern_str, re.IGNORECASE)
67
74
 
68
75
  # Credentials pattern (API keys, tokens, etc.)
69
76
  self.credentials_pattern = re.compile(
@@ -71,6 +78,61 @@ class PIIRedactor:
71
78
  re.IGNORECASE
72
79
  )
73
80
 
81
+
82
+ def redact_names(self, text: str) -> str:
83
+ """Redact names using greeting/closing context and well-known names."""
84
+ if not isinstance(text, str):
85
+ return text
86
+
87
+ # First, redact well-known names
88
+ text = self.well_known_names_pattern.sub('<REDACTED_NAME>', text)
89
+
90
+ # Find names after greetings
91
+ greeting_matches = list(self.greeting_pattern.finditer(text))
92
+ for match in reversed(greeting_matches): # Process in reverse to maintain positions
93
+ # Look for capitalized words after the greeting
94
+ start_pos = match.end()
95
+ # Find the next word(s) that could be a name
96
+ name_match = re.match(r'\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)', text[start_pos:])
97
+ if name_match:
98
+ # Replace the name
99
+ name_start = start_pos + name_match.start(1)
100
+ name_end = start_pos + name_match.end(1)
101
+ text = text[:name_start] + '<REDACTED_NAME>' + text[name_end:]
102
+
103
+ # Find names before closings
104
+ lines = text.split('\n')
105
+ for i, line in enumerate(lines):
106
+ closing_match = self.closing_pattern.search(line)
107
+ if closing_match:
108
+ # Look for names before the closing
109
+ before_closing = line[:closing_match.start()]
110
+ # Check if there's a name at the end of the text before closing
111
+ name_before_closing = re.search(r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s*$', before_closing)
112
+ if name_before_closing:
113
+ lines[i] = (before_closing[:name_before_closing.start(1)] +
114
+ '<REDACTED_NAME>' +
115
+ before_closing[name_before_closing.end(1):] +
116
+ line[closing_match.start():])
117
+
118
+ text = '\n'.join(lines)
119
+
120
+ # Use generic name pattern for standalone names (like signatures)
121
+ # Only apply to lines that look like signatures (short lines with just names)
122
+ lines = text.split('\n')
123
+ for i, line in enumerate(lines):
124
+ stripped = line.strip()
125
+ # Check if line looks like a signature (short, starts with capital, no other context)
126
+ if (len(stripped) < 50 and
127
+ re.match(r'^[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*[,.]?$', stripped) and
128
+ '<REDACTED_NAME>' not in line):
129
+ lines[i] = line.replace(stripped, '<REDACTED_NAME>')
130
+
131
+ text = '\n'.join(lines)
132
+
133
+ return text
134
+
135
+
74
136
  def redact(self, text: str) -> str:
75
137
  """Redact PII from the given text using regex patterns."""
76
138
  if not isinstance(text, str):
@@ -98,6 +160,10 @@ class PIIRedactor:
98
160
  # Street addresses
99
161
  text = self.address_pattern.sub('<REDACTED_ADDRESS>', text)
100
162
 
163
+ # Names
164
+ text = self.redact_names(text)
165
+
166
+
101
167
  # Note: IPs, URLs, usernames, and zipcodes are disabled by default
102
168
  # to match JS SDK behavior
103
169
 
raindrop/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "0.0.25"
1
+ VERSION = "0.0.26"