philiprehberger-string_kit 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ffce546205ecc50ec2bcdadc79aa6c62808e4501144ba4379a62c1baacd35e43
4
- data.tar.gz: 588a32c97d8bca9e364ba44e96cd7343641f43bc7dc31884f833914953c667b6
3
+ metadata.gz: 4a290f7b9aa5a9534b5f0f55086b416671e860b89bd58df4dae1558e8451f489
4
+ data.tar.gz: e4162f06038b8da1ad5f3b67249e3c1776fe9213014c2287a305addc163a978e
5
5
  SHA512:
6
- metadata.gz: f58dd4dd8318d552374e7280f29a8b2864d8749dac9160ef22af88dbf3a58b7e4833fb14334dbb4b9ea82d4403302189b3d9188914c0bb787baf8d2e54c62710
7
- data.tar.gz: 6af93996e58d274244114a6b82f804bb1e43ec9016b7a9e43c126ec3fff549a4e803242fc809f446eec61608286f1a8a838aadbc6475a6b45b413f42a53227eb
6
+ metadata.gz: df68e8c923b8ab6693894e01068317abbb4254ab10063d5859c81dbc2e1c585160e7ec4fa94189329b25ad86ae8b6b6728fa153c990fd74537b22653796abd51
7
+ data.tar.gz: d41749d919b01bff185fe0bd555b65a0b7c2989e27fb9cb7d0b33cd301fdeabb1491467637eee0ac09b5eb9d29c0d4481d47c73f0a9fbadc7c2fa2e93cf2f84e
data/CHANGELOG.md CHANGED
@@ -7,6 +7,13 @@ and this gem adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [0.3.0] - 2026-04-25
11
+
12
+ ### Added
13
+ - `StringKit.strip_zero_width` removes zero-width and invisible Unicode characters
14
+ - `StringKit.levenshtein` returns Levenshtein edit distance between two strings
15
+ - `StringKit.similarity` returns a 0.0–1.0 similarity score derived from Levenshtein distance
16
+
10
17
  ## [0.2.1] - 2026-04-15
11
18
 
12
19
  ### Fixed
data/README.md CHANGED
@@ -75,6 +75,22 @@ Philiprehberger::StringKit.indent("hello\nworld", 2) # => " hello\n
75
75
  Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworld"
76
76
  ```
77
77
 
78
+ ### Zero-Width Characters
79
+
80
+ ```ruby
81
+ require "philiprehberger/string_kit"
82
+
83
+ raw = "hello​world"
84
+ Philiprehberger::StringKit.strip_zero_width(raw) # => "helloworld"
85
+ ```
86
+
87
+ ### String Similarity
88
+
89
+ ```ruby
90
+ Philiprehberger::StringKit.levenshtein('kitten', 'sitting') # => 3
91
+ Philiprehberger::StringKit.similarity('kitten', 'sitting') # => ~0.571
92
+ ```
93
+
78
94
  ## API
79
95
 
80
96
  | Method | Description |
@@ -99,6 +115,9 @@ Philiprehberger::StringKit.dedent(" hello\n world") # => "hello\nworl
99
115
  | `StringKit.squeeze(str)` | Remove consecutive duplicate characters |
100
116
  | `StringKit.indent(str, n)` | Indent each line by n spaces |
101
117
  | `StringKit.dedent(str)` | Remove common leading whitespace |
118
+ | `.strip_zero_width(str)` | Remove zero-width and invisible Unicode characters |
119
+ | `.levenshtein(a, b)` | Edit distance between two strings |
120
+ | `.similarity(a, b)` | 0.0–1.0 similarity derived from Levenshtein distance |
102
121
 
103
122
  ## Development
104
123
 
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Philiprehberger
4
4
  module StringKit
5
- VERSION = '0.2.1'
5
+ VERSION = '0.3.0'
6
6
  end
7
7
  end
@@ -278,6 +278,53 @@ module Philiprehberger
278
278
  str.swapcase
279
279
  end
280
280
 
281
+ # Removes zero-width and invisible Unicode characters from `str`.
282
+ # Useful when ingesting content copied from web pages.
283
+ #
284
+ # @param str [String]
285
+ # @return [String]
286
+ def self.strip_zero_width(str)
287
+ str.gsub(/[​‌‍⁠؜]/, '')
288
+ end
289
+
290
+ # Levenshtein edit distance between `a` and `b`.
291
+ #
292
+ # @param a [String]
293
+ # @param b [String]
294
+ # @return [Integer]
295
+ def self.levenshtein(a, b)
296
+ return b.length if a.empty?
297
+ return a.length if b.empty?
298
+
299
+ prev = (0..b.length).to_a
300
+ curr = Array.new(b.length + 1)
301
+
302
+ a.each_char.with_index do |ac, i|
303
+ curr[0] = i + 1
304
+ b.each_char.with_index do |bc, j|
305
+ cost = ac == bc ? 0 : 1
306
+ curr[j + 1] = [curr[j] + 1, prev[j + 1] + 1, prev[j] + cost].min
307
+ end
308
+ prev = curr.dup
309
+ end
310
+
311
+ prev[b.length]
312
+ end
313
+
314
+ # Similarity score between 0.0 and 1.0 derived from Levenshtein distance.
315
+ # Returns 1.0 for identical strings, 1.0 for two empty strings, and
316
+ # `1 - distance / max_length` otherwise.
317
+ #
318
+ # @param a [String]
319
+ # @param b [String]
320
+ # @return [Float]
321
+ def self.similarity(a, b)
322
+ max = [a.length, b.length].max
323
+ return 1.0 if max.zero?
324
+
325
+ 1.0 - (levenshtein(a, b).to_f / max)
326
+ end
327
+
281
328
  class << self
282
329
  private
283
330
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: philiprehberger-string_kit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Philip Rehberger
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-15 00:00:00.000000000 Z
11
+ date: 2026-04-26 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: String case conversion, slug generation, transliteration, padding, HTML
14
14
  stripping, whitespace normalization, word counting, reading time estimation, excerpt