philiprehberger-string_kit 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +48 -0
- data/lib/philiprehberger/string_kit/version.rb +1 -1
- data/lib/philiprehberger/string_kit.rb +103 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f38ef0d79a14fb2d1ee763b52cd9e01ade47d478944f766676f6e32ac1d89eda
|
|
4
|
+
data.tar.gz: b7f0c00dee4c9fd5d7287a16b41b9e7bd5f701068d7c2d52a878a6991cc261ef
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ddb00275a54597ae4a4b0b9265acdf50360ce8b537787d7c7fef89705dbc9173e73dfb67ded292672151ddde4dac6798bb027d0806b6fdf270df68b52489f715
|
|
7
|
+
data.tar.gz: 9652f8595c23648bfe3a42b633e04f758c16d49db5c912237b669d80e8053a28db67ddf7b7758cf08d32be4e0a4ef7a858dcf9ea15b0439b9207223b091dca0a
|
data/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,23 @@ and this gem adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.4.0] - 2026-05-30
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `StringKit.mask(str, show_first:, show_last:, mask_char:)` for partial string obfuscation
|
|
14
|
+
- `StringKit.between(str, left, right)` to extract text between delimiters
|
|
15
|
+
- `StringKit.truncate_words(str, max_words, omission:)` for word-aware truncation
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
- README now includes the standard package card image after the badges
|
|
19
|
+
|
|
20
|
+
## [0.3.0] - 2026-04-25
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
- `StringKit.strip_zero_width` removes zero-width and invisible Unicode characters
|
|
24
|
+
- `StringKit.levenshtein` returns Levenshtein edit distance between two strings
|
|
25
|
+
- `StringKit.similarity` returns a 0.0–1.0 similarity score derived from Levenshtein distance
|
|
26
|
+
|
|
10
27
|
## [0.2.1] - 2026-04-15
|
|
11
28
|
|
|
12
29
|
### Fixed
|
data/README.md
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
[](https://rubygems.org/gems/philiprehberger-string_kit)
|
|
5
5
|
[](https://github.com/philiprehberger/rb-string-kit/commits/main)
|
|
6
6
|
|
|
7
|
+

|
|
8
|
+
|
|
7
9
|
Comprehensive string utilities without ActiveSupport dependency
|
|
8
10
|
|
|
9
11
|
## Requirements
|
|
@@ -75,6 +77,46 @@ Philiprehberger::StringKit.indent("hello\nworld", 2) # => " hello\n
|
|
|
75
77
|
Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworld"
|
|
76
78
|
```
|
|
77
79
|
|
|
80
|
+
### Zero-Width Characters
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
require "philiprehberger/string_kit"
|
|
84
|
+
|
|
85
|
+
raw = "helloworld"
|
|
86
|
+
Philiprehberger::StringKit.strip_zero_width(raw) # => "helloworld"
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### String Similarity
|
|
90
|
+
|
|
91
|
+
```ruby
|
|
92
|
+
Philiprehberger::StringKit.levenshtein('kitten', 'sitting') # => 3
|
|
93
|
+
Philiprehberger::StringKit.similarity('kitten', 'sitting') # => ~0.571
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Masking
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
Philiprehberger::StringKit.mask('4242424242424242', show_last: 4) # => "************4242"
|
|
100
|
+
Philiprehberger::StringKit.mask('alice@example.com', show_first: 2, show_last: 4) # => "al***********.com"
|
|
101
|
+
Philiprehberger::StringKit.mask('password123', show_last: 3, mask_char: '#') # => "########123"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Between Delimiters
|
|
105
|
+
|
|
106
|
+
```ruby
|
|
107
|
+
Philiprehberger::StringKit.between('hello [world] there', '[', ']') # => "world"
|
|
108
|
+
Philiprehberger::StringKit.between('a(b)c(d)', '(', ')') # => "b"
|
|
109
|
+
Philiprehberger::StringKit.between('no brackets here', '[', ']') # => nil
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Word-Aware Truncation
|
|
113
|
+
|
|
114
|
+
```ruby
|
|
115
|
+
Philiprehberger::StringKit.truncate_words('The quick brown fox jumps', 3) # => "The quick brown…"
|
|
116
|
+
Philiprehberger::StringKit.truncate_words('Two words', 5) # => "Two words"
|
|
117
|
+
Philiprehberger::StringKit.truncate_words('a b c d e', 2, omission: '...') # => "a b..."
|
|
118
|
+
```
|
|
119
|
+
|
|
78
120
|
## API
|
|
79
121
|
|
|
80
122
|
| Method | Description |
|
|
@@ -99,6 +141,12 @@ Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworl
|
|
|
99
141
|
| `StringKit.squeeze(str)` | Remove consecutive duplicate characters |
|
|
100
142
|
| `StringKit.indent(str, n)` | Indent each line by n spaces |
|
|
101
143
|
| `StringKit.dedent(str)` | Remove common leading whitespace |
|
|
144
|
+
| `.strip_zero_width(str)` | Remove zero-width and invisible Unicode characters |
|
|
145
|
+
| `.levenshtein(a, b)` | Edit distance between two strings |
|
|
146
|
+
| `.similarity(a, b)` | 0.0–1.0 similarity derived from Levenshtein distance |
|
|
147
|
+
| `StringKit.mask(str, show_first:, show_last:, mask_char:)` | Mask the middle of a string for partial obfuscation |
|
|
148
|
+
| `StringKit.between(str, left, right)` | Extract text between the first occurrence of two delimiters |
|
|
149
|
+
| `StringKit.truncate_words(str, max_words, omission:)` | Truncate to the first `max_words` words with an omission marker |
|
|
102
150
|
|
|
103
151
|
## Development
|
|
104
152
|
|
|
@@ -278,6 +278,109 @@ module Philiprehberger
|
|
|
278
278
|
str.swapcase
|
|
279
279
|
end
|
|
280
280
|
|
|
281
|
+
# Removes zero-width and invisible Unicode characters from `str`.
|
|
282
|
+
# Useful when ingesting content copied from web pages.
|
|
283
|
+
#
|
|
284
|
+
# @param str [String]
|
|
285
|
+
# @return [String]
|
|
286
|
+
def self.strip_zero_width(str)
|
|
287
|
+
str.gsub(/[]/, '')
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
# Levenshtein edit distance between `a` and `b`.
|
|
291
|
+
#
|
|
292
|
+
# @param a [String]
|
|
293
|
+
# @param b [String]
|
|
294
|
+
# @return [Integer]
|
|
295
|
+
def self.levenshtein(a, b)
|
|
296
|
+
return b.length if a.empty?
|
|
297
|
+
return a.length if b.empty?
|
|
298
|
+
|
|
299
|
+
prev = (0..b.length).to_a
|
|
300
|
+
curr = Array.new(b.length + 1)
|
|
301
|
+
|
|
302
|
+
a.each_char.with_index do |ac, i|
|
|
303
|
+
curr[0] = i + 1
|
|
304
|
+
b.each_char.with_index do |bc, j|
|
|
305
|
+
cost = ac == bc ? 0 : 1
|
|
306
|
+
curr[j + 1] = [curr[j] + 1, prev[j + 1] + 1, prev[j] + cost].min
|
|
307
|
+
end
|
|
308
|
+
prev = curr.dup
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
prev[b.length]
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
# Similarity score between 0.0 and 1.0 derived from Levenshtein distance.
|
|
315
|
+
# Returns 1.0 for identical strings, 1.0 for two empty strings, and
|
|
316
|
+
# `1 - distance / max_length` otherwise.
|
|
317
|
+
#
|
|
318
|
+
# @param a [String]
|
|
319
|
+
# @param b [String]
|
|
320
|
+
# @return [Float]
|
|
321
|
+
def self.similarity(a, b)
|
|
322
|
+
max = [a.length, b.length].max
|
|
323
|
+
return 1.0 if max.zero?
|
|
324
|
+
|
|
325
|
+
1.0 - (levenshtein(a, b).to_f / max)
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Mask a string by replacing its middle portion with `mask_char`,
|
|
329
|
+
# leaving `show_first` characters at the start and `show_last` at the end.
|
|
330
|
+
# Returns `str` unchanged when there is not enough room to mask at least
|
|
331
|
+
# two characters in the middle.
|
|
332
|
+
#
|
|
333
|
+
# @param str [String]
|
|
334
|
+
# @param show_first [Integer] number of characters to leave visible at the start
|
|
335
|
+
# @param show_last [Integer] number of characters to leave visible at the end
|
|
336
|
+
# @param mask_char [String] character used to mask the hidden portion (default: '*')
|
|
337
|
+
# @return [String]
|
|
338
|
+
def self.mask(str, show_first: 0, show_last: 0, mask_char: '*')
|
|
339
|
+
validate!(str)
|
|
340
|
+
return str if show_first + show_last >= str.length - 1
|
|
341
|
+
|
|
342
|
+
masked_length = str.length - show_first - show_last
|
|
343
|
+
str[0, show_first] + (mask_char * masked_length) + str[str.length - show_last, show_last].to_s
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
# Returns the substring strictly between the first occurrence of `left`
|
|
347
|
+
# and the first occurrence of `right` after `left`. Returns `nil` when
|
|
348
|
+
# either delimiter is missing.
|
|
349
|
+
#
|
|
350
|
+
# @param str [String]
|
|
351
|
+
# @param left [String]
|
|
352
|
+
# @param right [String]
|
|
353
|
+
# @return [String, nil]
|
|
354
|
+
def self.between(str, left, right)
|
|
355
|
+
validate!(str)
|
|
356
|
+
left_index = str.index(left)
|
|
357
|
+
return nil if left_index.nil?
|
|
358
|
+
|
|
359
|
+
start_pos = left_index + left.length
|
|
360
|
+
right_index = str.index(right, start_pos)
|
|
361
|
+
return nil if right_index.nil?
|
|
362
|
+
|
|
363
|
+
str[start_pos...right_index]
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
# Truncate a string to the first `max_words` words. When truncation
|
|
367
|
+
# happens, append `omission` to the result. The string is unchanged
|
|
368
|
+
# when the word count is less than or equal to `max_words`.
|
|
369
|
+
#
|
|
370
|
+
# @param str [String]
|
|
371
|
+
# @param max_words [Integer] maximum number of words to keep (must be positive)
|
|
372
|
+
# @param omission [String] string appended when truncation occurs (default: '…')
|
|
373
|
+
# @return [String]
|
|
374
|
+
def self.truncate_words(str, max_words, omission: '…')
|
|
375
|
+
validate!(str)
|
|
376
|
+
raise Error, 'max_words must be a positive Integer' unless max_words.is_a?(Integer) && max_words.positive?
|
|
377
|
+
|
|
378
|
+
words = str.split(/\s+/).reject(&:empty?)
|
|
379
|
+
return str if words.length <= max_words
|
|
380
|
+
|
|
381
|
+
"#{words.first(max_words).join(' ')}#{omission}"
|
|
382
|
+
end
|
|
383
|
+
|
|
281
384
|
class << self
|
|
282
385
|
private
|
|
283
386
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: philiprehberger-string_kit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Philip Rehberger
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-05-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: String case conversion, slug generation, transliteration, padding, HTML
|
|
14
14
|
stripping, whitespace normalization, word counting, reading time estimation, excerpt
|