RubyGems - philiprehberger-encoding_kit - Versions diffs - 0.5.0 → 0.6.0 - Mend

philiprehberger-encoding_kit 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +8 -2
data/lib/philiprehberger/encoding_kit/converter.rb +4 -2
data/lib/philiprehberger/encoding_kit/version.rb +1 -1
data/lib/philiprehberger/encoding_kit.rb +6 -4
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3e69668037179f14b92560b58c816a29a14134560bd32568775fd321325b4644
-  data.tar.gz: 3ad9997b697ca6cecca1d8e63955512eb860a86c48007a5138777167da9c4d13
+  metadata.gz: 47de974b9f87b11740744bced13177a6afe17de3ac74a589246b67604241085c
+  data.tar.gz: 3ee29c6b81b51166858bb9e051ae5652f2da84d242f80c8baa6ba45017ecbaa8
 SHA512:
-  metadata.gz: af626ca49ad283a08574162ed45b81fabdccf7aca6d978d78d9367df150234e28d7cd563a1c212145c3a399a7b431b65cd1508fbba67a784517344b124a85a38
-  data.tar.gz: ffd8620298177f0a689411d49bdadbcc61c0a93c0fb0c5afc6cd9e5f72ef77f627ec14aea5819ca898f4aeb774cbdef02957c1777d640712cab360c7eb6622e6
+  metadata.gz: ba8d01eda47ece58ac39d5c494fa82a5a4fa761087cf7c97da6fda3188b1e060afe50dc90e1228a0dfd3a8ab21384255ba862d027fee98407d33ebfd5f9466ca
+  data.tar.gz: 2b5e70cb2767541713c59733e858a6b5f751bd838df739811308e7773a3383aea0955bcd24d161921fec1c1526b92e60b9e0734bafeaed3713f7f379ade00ece

data/CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [0.6.0] - 2026-05-20
+### Added
+- `strip_bom:` option on `EncodingKit.to_utf8` and `EncodingKit.read_as_utf8` to drop a leading UTF BOM after transcoding
+- Card image reference in the README for registry-side rendering
 ## [0.5.0] - 2026-04-30
 ### Added

data/README.md CHANGED Viewed

@@ -4,6 +4,8 @@
 [![Gem Version](https://badge.fury.io/rb/philiprehberger-encoding_kit.svg)](https://rubygems.org/gems/philiprehberger-encoding_kit)
 [![Last updated](https://img.shields.io/github/last-commit/philiprehberger/rb-encoding-kit)](https://github.com/philiprehberger/rb-encoding-kit/commits/main)
+![philiprehberger-encoding_kit](https://raw.githubusercontent.com/philiprehberger/rb-encoding-kit/main/package-card.webp)
 Character encoding detection, conversion, and normalization
 ## Requirements
@@ -103,6 +105,10 @@ utf8 = Philiprehberger::EncodingKit.to_utf8(raw_bytes)
 # Specify source encoding
 utf8 = Philiprehberger::EncodingKit.to_utf8(latin1_string, from: Encoding::ISO_8859_1)
+# Strip a leading BOM after transcoding
+clean = Philiprehberger::EncodingKit.to_utf8("\xEF\xBB\xBFhello".b, strip_bom: true)
+# => "hello"
 ```
 ### Normalize
@@ -195,7 +201,7 @@ Philiprehberger::EncodingKit.valid?("hello", encoding: Encoding::US_ASCII)  # =>
 | `EncodingKit.detect_stream(io, sample_size: 4096)` | Detect encoding from an IO stream by sampling bytes |
 | `EncodingKit.analyze(string)` | Analyze byte distribution and return encoding candidates with stats |
 | `EncodingKit.transcode(string, to:, fallback:, replace:)` | Auto-detect source and convert to target encoding |
-| `EncodingKit.to_utf8(string, from: nil)` | Convert to UTF-8, auto-detect source if `from` is nil |
+| `EncodingKit.to_utf8(string, from: nil, strip_bom: false)` | Convert to UTF-8, auto-detect source if `from` is nil; pass `strip_bom: true` to drop a leading UTF BOM |
 | `EncodingKit.normalize(string)` | Force to valid UTF-8, replacing bad bytes with U+FFFD |
 | `EncodingKit.scrub(string)` | Force to valid UTF-8 by removing invalid bytes entirely |
 | `EncodingKit.normalize_line_endings(string, to: :lf)` | Convert mixed CRLF/CR/LF to a single canonical form (`:lf`, `:crlf`, `:cr`) |
@@ -204,7 +210,7 @@ Philiprehberger::EncodingKit.valid?("hello", encoding: Encoding::US_ASCII)  # =>
 | `EncodingKit.strip_bom(string)` | Remove byte order mark if present |
 | `EncodingKit.bom?(string)` | Check if string starts with a BOM |
 | `EncodingKit.detect_file(path, sample_size: 4096)` | Detect encoding of a file by reading a byte sample |
-| `EncodingKit.read_as_utf8(path, from: nil)` | Read a file and return its content as UTF-8 |
+| `EncodingKit.read_as_utf8(path, from: nil, strip_bom: false)` | Read a file and return its content as UTF-8; pass `strip_bom: true` to drop a leading UTF BOM |
 | `EncodingKit.file_valid?(path, encoding: nil)` | Check if a file's content is valid in the given encoding |
 | `EncodingKit.guess_from_filename(path)` | Guess `Encoding` from filename suffixes (e.g. `.utf8`, `.latin1`), `nil` if unknown |

data/lib/philiprehberger/encoding_kit/converter.rb CHANGED Viewed

@@ -33,12 +33,14 @@ module Philiprehberger
         #
         # @param string [String] the input string
         # @param from [String, Encoding, nil] source encoding (auto-detect if nil)
+        # @param strip_bom [Boolean] remove any leading UTF BOM from the result (default: false)
         # @return [String] UTF-8 encoded string
-        def to_utf8(string, from: nil)
+        def to_utf8(string, from: nil, strip_bom: false)
           detected = from ? Encoding.find(from.to_s) : Detector.call(string)
           source = detected.is_a?(DetectionResult) ? detected.encoding : detected
           str = string.dup.force_encoding(source)
-          str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "\uFFFD")
+          encoded = str.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "\uFFFD")
+          strip_bom ? encoded.delete_prefix("\uFEFF") : encoded
         end
         # Force a string to valid UTF-8 by replacing invalid and undefined bytes.

data/lib/philiprehberger/encoding_kit/version.rb CHANGED Viewed

@@ -2,6 +2,6 @@
 module Philiprehberger
   module EncodingKit
-    VERSION = '0.5.0'
+    VERSION = '0.6.0'
   end
 end

data/lib/philiprehberger/encoding_kit.rb CHANGED Viewed

@@ -90,9 +90,10 @@ module Philiprehberger
     #
     # @param string [String] the input string
     # @param from [String, Encoding, nil] source encoding (auto-detect if nil)
+    # @param strip_bom [Boolean] remove any leading UTF BOM from the result (default: false)
     # @return [String] UTF-8 encoded string
-    def self.to_utf8(string, from: nil)
-      Converter.to_utf8(string, from: from)
+    def self.to_utf8(string, from: nil, strip_bom: false)
+      Converter.to_utf8(string, from: from, strip_bom: strip_bom)
     end
     # Normalize a string to valid UTF-8, replacing invalid/undefined bytes
@@ -210,10 +211,11 @@ module Philiprehberger
     #
     # @param path [String] path to the file
     # @param from [String, Encoding, nil] source encoding (auto-detect if nil)
+    # @param strip_bom [Boolean] remove any leading UTF BOM from the result (default: false)
     # @return [String] UTF-8 encoded file content
-    def self.read_as_utf8(path, from: nil)
+    def self.read_as_utf8(path, from: nil, strip_bom: false)
       raw = File.binread(path)
-      to_utf8(raw, from: from)
+      to_utf8(raw, from: from, strip_bom: strip_bom)
     end
     # Check if a file's content is valid in the detected or specified encoding.

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: philiprehberger-encoding_kit
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 0.6.0
 platform: ruby
 authors:
 - Philip Rehberger
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-05-01 00:00:00.000000000 Z
+date: 2026-05-20 00:00:00.000000000 Z
 dependencies: []
 description: Detect encoding from BOM and heuristics with confidence scores, convert
   between encodings, normalize to UTF-8, analyze byte distributions, and handle Windows