RubyGems - unicoder - Versions diffs - 1.1.0 → 1.1.1 - Mend

unicoder 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/Gemfile.lock +1 -1
data/README.md +1 -0
data/lib/unicoder/builders/blocks.rb +4 -2
data/lib/unicoder/builders/name.rb +1 -1
data/lib/unicoder/builders/scripts.rb +19 -2
data/lib/unicoder/constants.rb +1 -1
data/lib/unicoder/replace_common_words.rb +3 -2
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b9cec551ee0c7308313eada0859d3b2cbe1a8c3aaeb45072d3fb08a886b25e8a
-  data.tar.gz: 907185cfd8e98d4d8f291a33b64b0af349716b75757f72abc00de1e98e7bdd3a
+  metadata.gz: f531e7ea0b5d27ea2bcb05bba6ddd93e0b7325d3d097abaf9ed815cc11d9a197
+  data.tar.gz: 8c3d133fb02f3b3d516c9a07390f768509eb3b22f2ae2850ad134819b0390462
 SHA512:
-  metadata.gz: ac60e2255690882f372023e66fff6bbfaf1b039f50896e7496ce6cdec2324d993a5fa644b7de539f3039a20bd8bdbaf18f6b81aa727ccf1a6d411608b3355a6e
-  data.tar.gz: 403549b0dfdc3fe4dc3f93f3a4c743fdef4e2e057364c49ee7d38d71c4722b90d50bf64ea9050d0ad2975c118ceb2c2bcd6c9464d4976e843655f69f0bffe2b1
+  metadata.gz: 05bd755d93de557ca8786c740675e88847da99c31c07a1517040f51fc0e9846537eeb9317087d955f4cc65d4c6d2434cd83a5216ad21dba8f583edd1166eb10a
+  data.tar.gz: 1644c4b6dee2db05f6d7ec91a5250798045e358e3580e141327f39d8c150cce5ad7d687cce8ffca796ca1de728151bb0a29b9d8084ae7fe6b87ec4e080fb95cb

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 ## CHANGELOG
+### 1.1.1
+- Fix bug related to unsafe characters
+- Fix squared CJK
+- Small adjustments for scripts and blocks index builders
 ### 1.1.0
 - Improve name index size: Support ranges

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    unicoder (1.1.0)
+    unicoder (1.1.1)
       oga (~> 2.9)
       rationalist (~> 2.0)
       rubyzip (~> 1.2)

data/README.md CHANGED Viewed

@@ -39,6 +39,7 @@ Index Name    | Module
 --------------|----
 name, sequence\_name, type | [unicode-name.js](https://github.com/janlelis/unicode-name.js)
 numeric\_value| [unicode-number.js](https://github.com/janlelis/unicode-number.js)
+scripts | [unicode-scripts.js](https://github.com/janlelis/unicode-scripts.js)
 ## MIT License

data/lib/unicoder/builders/blocks.rb CHANGED Viewed

@@ -4,12 +4,14 @@ module Unicoder
       include Builder
       def initialize_index
-        @index = []
+        @index = {
+          BLOCKS: []
+        }
       end
       def parse!
         parse_file :blocks, :line, regex: /^(?<from>\S+?)\.\.(?<to>\S+);\s(?<name>.+)$/ do |line|
-          @index << [line["from"].to_i(16), line["to"].to_i(16), line["name"]]
+          @index[:BLOCKS] << [line["from"].to_i(16), line["to"].to_i(16), line["name"]]
         end
       end
     end

data/lib/unicoder/builders/name.rb CHANGED Viewed

@@ -68,7 +68,7 @@ module Unicoder
             elsif line["name"] != "<control>"
               raise ArgumentError, "inconsistent range found in data, don't know what to do"
             end
-          elsif line["name"] =~ Regexp.union(@index[:CP_RANGES].keys)
+          elsif line["name"] =~ Regexp.union(@index[:CP_RANGES].keys.map{/^#{_1}/})
             # ignore
           else
             assign :NAMES, line["codepoint"].to_i(16), line["name"]

data/lib/unicoder/builders/scripts.rb CHANGED Viewed

@@ -10,6 +10,12 @@ module Unicoder
           SCRIPT_EXTENSIONS: {},
           SCRIPT_ALIASES: {},
           SCRIPT_NAMES: [],
+          OFFSETS: [
+            0x10000,
+            0x1000,
+            0x100,
+            0x10
+          ],
         }
         @reverse_script_names = {}
         @reverse_script_extension_names = {}
@@ -21,6 +27,17 @@ module Unicoder
         }
       end
+      # TODO refactor how multiple indexes are organized
+      def assign_classic(sub_index_name, codepoint, value)
+        idx = @index[sub_index_name]
+        if option =~ /charkeys/
+          idx[[codepoint].pack("U*")] = value
+        else
+          idx[codepoint] = value
+        end
+      end
       def parse!
         parse_file :property_value_aliases, :line, regex: /^sc ; (?<short>\S+?)\s*; (?<long>\S+?)(?:\s*; (?<short2>\S+))?$/ do |line|
           @index[:SCRIPT_NAMES] << line["long"]
@@ -47,10 +64,10 @@ module Unicoder
         parse_file :script_extensions, :line, regex: /^(?<from>\S+?)(\.\.(?<to>\S+))?\s+; (?<scripts>.+?) #.*$/ do |line|
           if line["to"]
             (line["from"].to_i(16)..line["to"].to_i(16)).each{ |codepoint|
-              @index[:SCRIPT_EXTENSIONS][codepoint] = lookup_extension_names(line["scripts"])
+              assign_classic :SCRIPT_EXTENSIONS, codepoint, lookup_extension_names(line["scripts"])
             }
           else
-            @index[:SCRIPT_EXTENSIONS][line["from"].to_i(16)] = lookup_extension_names(line["scripts"])
+            assign_classic :SCRIPT_EXTENSIONS, line["from"].to_i(16), lookup_extension_names(line["scripts"])
           end
         end
       end

data/lib/unicoder/constants.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 module Unicoder
-  VERSION = "1.1.0"
+  VERSION = "1.1.1"
   UNICODE_VERSIONS = %w[
     16.0.0

data/lib/unicoder/replace_common_words.rb CHANGED Viewed

@@ -2,8 +2,9 @@ require "json"
 module Unicoder
   module ReplaceCommonWords
-  	def replace_common_words!(which_index, words, count = 500, base = ?[.ord, min_word_length = 4)
-  	  puts "Starting to replace the #{count} most common words"
+  	def replace_common_words!(which_index, words, count = 500, _ = ?[.ord, min_word_length = 4)
+      base = @words.join.chars.max.ord + 1
+  	  puts "Starting to replace the #{count} most common words (replace base: #{base})"
   	  @index[:REPLACE_BASE] = base
   	  @index[:COMMON_WORDS] = words.
   	    select{_1.size >= min_word_length}.

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: unicoder
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 1.1.1
 platform: ruby
 authors:
 - Jan Lelis
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-10-09 00:00:00.000000000 Z
+date: 2024-10-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rationalist