philiprehberger-string_kit 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ffce546205ecc50ec2bcdadc79aa6c62808e4501144ba4379a62c1baacd35e43
4
- data.tar.gz: 588a32c97d8bca9e364ba44e96cd7343641f43bc7dc31884f833914953c667b6
3
+ metadata.gz: f38ef0d79a14fb2d1ee763b52cd9e01ade47d478944f766676f6e32ac1d89eda
4
+ data.tar.gz: b7f0c00dee4c9fd5d7287a16b41b9e7bd5f701068d7c2d52a878a6991cc261ef
5
5
  SHA512:
6
- metadata.gz: f58dd4dd8318d552374e7280f29a8b2864d8749dac9160ef22af88dbf3a58b7e4833fb14334dbb4b9ea82d4403302189b3d9188914c0bb787baf8d2e54c62710
7
- data.tar.gz: 6af93996e58d274244114a6b82f804bb1e43ec9016b7a9e43c126ec3fff549a4e803242fc809f446eec61608286f1a8a838aadbc6475a6b45b413f42a53227eb
6
+ metadata.gz: ddb00275a54597ae4a4b0b9265acdf50360ce8b537787d7c7fef89705dbc9173e73dfb67ded292672151ddde4dac6798bb027d0806b6fdf270df68b52489f715
7
+ data.tar.gz: 9652f8595c23648bfe3a42b633e04f758c16d49db5c912237b669d80e8053a28db67ddf7b7758cf08d32be4e0a4ef7a858dcf9ea15b0439b9207223b091dca0a
data/CHANGELOG.md CHANGED
@@ -7,6 +7,23 @@ and this gem adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.4.0] - 2026-05-30
11
+
12
+ ### Added
13
+ - `StringKit.mask(str, show_first:, show_last:, mask_char:)` for partial string obfuscation
14
+ - `StringKit.between(str, left, right)` to extract text between delimiters
15
+ - `StringKit.truncate_words(str, max_words, omission:)` for word-aware truncation
16
+
17
+ ### Fixed
18
+ - README now includes the standard package card image after the badges
19
+
20
+ ## [0.3.0] - 2026-04-25
21
+
22
+ ### Added
23
+ - `StringKit.strip_zero_width` removes zero-width and invisible Unicode characters
24
+ - `StringKit.levenshtein` returns Levenshtein edit distance between two strings
25
+ - `StringKit.similarity` returns a 0.0–1.0 similarity score derived from Levenshtein distance
26
+
10
27
  ## [0.2.1] - 2026-04-15
11
28
 
12
29
  ### Fixed
data/README.md CHANGED
@@ -4,6 +4,8 @@
4
4
  [![Gem Version](https://badge.fury.io/rb/philiprehberger-string_kit.svg)](https://rubygems.org/gems/philiprehberger-string_kit)
5
5
  [![Last updated](https://img.shields.io/github/last-commit/philiprehberger/rb-string-kit)](https://github.com/philiprehberger/rb-string-kit/commits/main)
6
6
 
7
+ ![philiprehberger-string_kit](https://raw.githubusercontent.com/philiprehberger/rb-string-kit/main/package-card.webp)
8
+
7
9
  Comprehensive string utilities without ActiveSupport dependency
8
10
 
9
11
  ## Requirements
@@ -75,6 +77,46 @@ Philiprehberger::StringKit.indent("hello\nworld", 2) # => " hello\n
75
77
  Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworld"
76
78
  ```
77
79
 
80
+ ### Zero-Width Characters
81
+
82
+ ```ruby
83
+ require "philiprehberger/string_kit"
84
+
85
+ raw = "hello​world"
86
+ Philiprehberger::StringKit.strip_zero_width(raw) # => "helloworld"
87
+ ```
88
+
89
+ ### String Similarity
90
+
91
+ ```ruby
92
+ Philiprehberger::StringKit.levenshtein('kitten', 'sitting') # => 3
93
+ Philiprehberger::StringKit.similarity('kitten', 'sitting') # => ~0.571
94
+ ```
95
+
96
+ ### Masking
97
+
98
+ ```ruby
99
+ Philiprehberger::StringKit.mask('4242424242424242', show_last: 4) # => "************4242"
100
+ Philiprehberger::StringKit.mask('alice@example.com', show_first: 2, show_last: 4) # => "al***********.com"
101
+ Philiprehberger::StringKit.mask('password123', show_last: 3, mask_char: '#') # => "########123"
102
+ ```
103
+
104
+ ### Between Delimiters
105
+
106
+ ```ruby
107
+ Philiprehberger::StringKit.between('hello [world] there', '[', ']') # => "world"
108
+ Philiprehberger::StringKit.between('a(b)c(d)', '(', ')') # => "b"
109
+ Philiprehberger::StringKit.between('no brackets here', '[', ']') # => nil
110
+ ```
111
+
112
+ ### Word-Aware Truncation
113
+
114
+ ```ruby
115
+ Philiprehberger::StringKit.truncate_words('The quick brown fox jumps', 3) # => "The quick brown…"
116
+ Philiprehberger::StringKit.truncate_words('Two words', 5) # => "Two words"
117
+ Philiprehberger::StringKit.truncate_words('a b c d e', 2, omission: '...') # => "a b..."
118
+ ```
119
+
78
120
  ## API
79
121
 
80
122
  | Method | Description |
@@ -99,6 +141,12 @@ Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworl
99
141
  | `StringKit.squeeze(str)` | Remove consecutive duplicate characters |
100
142
  | `StringKit.indent(str, n)` | Indent each line by n spaces |
101
143
  | `StringKit.dedent(str)` | Remove common leading whitespace |
144
+ | `.strip_zero_width(str)` | Remove zero-width and invisible Unicode characters |
145
+ | `.levenshtein(a, b)` | Edit distance between two strings |
146
+ | `.similarity(a, b)` | 0.0–1.0 similarity derived from Levenshtein distance |
147
+ | `StringKit.mask(str, show_first:, show_last:, mask_char:)` | Mask the middle of a string for partial obfuscation |
148
+ | `StringKit.between(str, left, right)` | Extract text between the first occurrence of two delimiters |
149
+ | `StringKit.truncate_words(str, max_words, omission:)` | Truncate to the first `max_words` words with an omission marker |
102
150
 
103
151
  ## Development
104
152
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Philiprehberger
4
4
  module StringKit
5
- VERSION = '0.2.1'
5
+ VERSION = '0.4.0'
6
6
  end
7
7
  end
@@ -278,6 +278,109 @@ module Philiprehberger
278
278
  str.swapcase
279
279
  end
280
280
 
281
+ # Removes zero-width and invisible Unicode characters from `str`.
282
+ # Useful when ingesting content copied from web pages.
283
+ #
284
+ # @param str [String]
285
+ # @return [String]
286
+ def self.strip_zero_width(str)
287
+ str.gsub(/[​‌‍⁠؜]/, '')
288
+ end
289
+
290
+ # Levenshtein edit distance between `a` and `b`.
291
+ #
292
+ # @param a [String]
293
+ # @param b [String]
294
+ # @return [Integer]
295
+ def self.levenshtein(a, b)
296
+ return b.length if a.empty?
297
+ return a.length if b.empty?
298
+
299
+ prev = (0..b.length).to_a
300
+ curr = Array.new(b.length + 1)
301
+
302
+ a.each_char.with_index do |ac, i|
303
+ curr[0] = i + 1
304
+ b.each_char.with_index do |bc, j|
305
+ cost = ac == bc ? 0 : 1
306
+ curr[j + 1] = [curr[j] + 1, prev[j + 1] + 1, prev[j] + cost].min
307
+ end
308
+ prev = curr.dup
309
+ end
310
+
311
+ prev[b.length]
312
+ end
313
+
314
+ # Similarity score between 0.0 and 1.0 derived from Levenshtein distance.
315
+ # Returns 1.0 for identical strings, 1.0 for two empty strings, and
316
+ # `1 - distance / max_length` otherwise.
317
+ #
318
+ # @param a [String]
319
+ # @param b [String]
320
+ # @return [Float]
321
+ def self.similarity(a, b)
322
+ max = [a.length, b.length].max
323
+ return 1.0 if max.zero?
324
+
325
+ 1.0 - (levenshtein(a, b).to_f / max)
326
+ end
327
+
328
+ # Mask a string by replacing its middle portion with `mask_char`,
329
+ # leaving `show_first` characters at the start and `show_last` at the end.
330
+ # Returns `str` unchanged when there is not enough room to mask at least
331
+ # two characters in the middle.
332
+ #
333
+ # @param str [String]
334
+ # @param show_first [Integer] number of characters to leave visible at the start
335
+ # @param show_last [Integer] number of characters to leave visible at the end
336
+ # @param mask_char [String] character used to mask the hidden portion (default: '*')
337
+ # @return [String]
338
+ def self.mask(str, show_first: 0, show_last: 0, mask_char: '*')
339
+ validate!(str)
340
+ return str if show_first + show_last >= str.length - 1
341
+
342
+ masked_length = str.length - show_first - show_last
343
+ str[0, show_first] + (mask_char * masked_length) + str[str.length - show_last, show_last].to_s
344
+ end
345
+
346
+ # Returns the substring strictly between the first occurrence of `left`
347
+ # and the first occurrence of `right` after `left`. Returns `nil` when
348
+ # either delimiter is missing.
349
+ #
350
+ # @param str [String]
351
+ # @param left [String]
352
+ # @param right [String]
353
+ # @return [String, nil]
354
+ def self.between(str, left, right)
355
+ validate!(str)
356
+ left_index = str.index(left)
357
+ return nil if left_index.nil?
358
+
359
+ start_pos = left_index + left.length
360
+ right_index = str.index(right, start_pos)
361
+ return nil if right_index.nil?
362
+
363
+ str[start_pos...right_index]
364
+ end
365
+
366
+ # Truncate a string to the first `max_words` words. When truncation
367
+ # happens, append `omission` to the result. The string is unchanged
368
+ # when the word count is less than or equal to `max_words`.
369
+ #
370
+ # @param str [String]
371
+ # @param max_words [Integer] maximum number of words to keep (must be positive)
372
+ # @param omission [String] string appended when truncation occurs (default: '…')
373
+ # @return [String]
374
+ def self.truncate_words(str, max_words, omission: '…')
375
+ validate!(str)
376
+ raise Error, 'max_words must be a positive Integer' unless max_words.is_a?(Integer) && max_words.positive?
377
+
378
+ words = str.split(/\s+/).reject(&:empty?)
379
+ return str if words.length <= max_words
380
+
381
+ "#{words.first(max_words).join(' ')}#{omission}"
382
+ end
383
+
281
384
  class << self
282
385
  private
283
386
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: philiprehberger-string_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Rehberger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-15 00:00:00.000000000 Z
11
+ date: 2026-05-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: String case conversion, slug generation, transliteration, padding, HTML
14
14
  stripping, whitespace normalization, word counting, reading time estimation, excerpt