RubyGems - obfs - Versions diffs - 0.0.1 → 0.1.0 - Mend

obfs 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/main/levenshtein.rb +162 -0
data/lib/main/store.rb +130 -0
data/lib/main/white_similarity.rb +65 -0
data/lib/obfs.rb +4 -105
metadata +6 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 60ad552589248d9ac27f0560e71ad3ddd4dfd32407c50fc4c10ee9732b17945a
-  data.tar.gz: 89bfcf071f49f51bc9ad25768c4fdc9e3e7b1c67a9305c7f6aaa358859798ed4
+  metadata.gz: c1e900423f8d6d0f9a467d611d6a4af2bb370b6e68d86aa416be37facabe3d32
+  data.tar.gz: 0e675fcbf20eef86fcc09615e66d745026fd3945a3437787e5a96cc183ca705c
 SHA512:
-  metadata.gz: 64ec76f87128169a5cad3d118dd8b13bf08a4fe2841e7b0b312fbd1f93becbf00e652a8da21466ed85c14e9f42523c723bca51e429b7cef28fb254f5073f79ef
-  data.tar.gz: 3389becdd8b5533ee7883b95907071bcaa77c5a2e9a54a7a7bbd89da7a758a4819ec525691dd244905c5f7fcb2443682a4a641a29ac8b1d891b370fc6b3c99cf
+  metadata.gz: 03650e01c5346d2206c9d3805f83ad76496dcefc31ea0354a5432fddaf550f4c9580768231ea0fa47e35038abeb90cef2e054d242c37d1688d1c5760a7c0f779
+  data.tar.gz: 4d7eec2c13212cf38c7a7974c122b2280a6867d7bc5672a10544d2a85d2e072c1a06fe22296408b4a1f51e2c98e790f11602d330078fc8b3098392930a289d22

data/lib/main/levenshtein.rb ADDED

@@ -0,0 +1,162 @@
+#
+# Credits to threedaymonk
+# https://github.com/threedaymonk/text/blob/master/lib/text/levenshtein.rb
+#
+# Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
+#
+# The Levenshtein distance is a measure of how similar two strings s and t are,
+# calculated as the number of deletions/insertions/substitutions needed to
+# transform s into t. The greater the distance, the more the strings differ.
+#
+# The Levenshtein distance is also sometimes referred to as the
+# easier-to-pronounce-and-spell 'edit distance'.
+#
+# Author: Paul Battley (pbattley@gmail.com)
+#
+module OBFS # :nodoc:
+    module Levenshtein
+      # Calculate the Levenshtein distance between two strings +str1+ and +str2+.
+      #
+      # The optional argument max_distance can reduce the number of iterations by
+      # stopping if the Levenshtein distance exceeds this value. This increases
+      # performance where it is only necessary to compare the distance with a
+      # reference value instead of calculating the exact distance.
+      #
+      # The distance is calculated in terms of Unicode codepoints. Be aware that
+      # this algorithm does not perform normalisation: if there is a possibility
+      # of different normalised forms being used, normalisation should be performed
+      # beforehand.
+      #
+      def distance(str1, str2, max_distance = nil)
+        if max_distance
+          distance_with_maximum(str1, str2, max_distance)
+        else
+          distance_without_maximum(str1, str2)
+        end
+      end
+    private
+      def distance_with_maximum(str1, str2, max_distance) # :nodoc:
+        s = str1.encode(Encoding::UTF_8).unpack("U*")
+        t = str2.encode(Encoding::UTF_8).unpack("U*")
+        n = s.length
+        m = t.length
+        big_int = n * m
+        # Swap if necessary so that s is always the shorter of the two strings
+        s, t, n, m = t, s, m, n if m < n
+        # If the length difference is already greater than the max_distance, then
+        # there is nothing else to check
+        if (n - m).abs >= max_distance
+          return max_distance
+        end
+        return 0 if s == t
+        return m if n.zero?
+        return n if m.zero?
+        # The values necessary for our threshold are written; the ones after must
+        # be filled with large integers since the tailing member of the threshold
+        # window in the bottom array will run min across them
+        d = (m + 1).times.map { |i|
+          if i < m || i < max_distance + 1
+            i
+          else
+            big_int
+          end
+        }
+        x = nil
+        e = nil
+        n.times do |i|
+          # Since we're reusing arrays, we need to be sure to wipe the value left
+          # of the starting index; we don't have to worry about the value above the
+          # ending index as the arrays were initially filled with large integers
+          # and we progress to the right
+          if e.nil?
+            e = i + 1
+          else
+            e = big_int
+          end
+          diag_index = t.length - s.length + i
+          # If max_distance was specified, we can reduce second loop. So we set
+          # up our threshold window.
+          # See:
+          # Gusfield, Dan (1997). Algorithms on strings, trees, and sequences:
+          # computer science and computational biology.
+          # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
+          # pp. 263–264.
+          min = i - max_distance - 1
+          min = 0 if min < 0
+          max = i + max_distance
+          max = m - 1 if max > m - 1
+          min.upto(max) do |j|
+            # If the diagonal value is already greater than the max_distance
+            # then we can safety return: the diagonal will never go lower again.
+            # See: http://www.levenshtein.net/
+            if j == diag_index && d[j] >= max_distance
+              return max_distance
+            end
+            cost = s[i] == t[j] ? 0 : 1
+            insertion = d[j + 1] + 1
+            deletion = e + 1
+            substitution = d[j] + cost
+            x = insertion < deletion ? insertion : deletion
+            x = substitution if substitution < x
+            d[j] = e
+            e = x
+          end
+          d[m] = x
+        end
+        if x > max_distance
+          return max_distance
+        else
+          return x
+        end
+      end
+      def distance_without_maximum(str1, str2) # :nodoc:
+        s = str1.encode(Encoding::UTF_8).unpack("U*")
+        t = str2.encode(Encoding::UTF_8).unpack("U*")
+        n = s.length
+        m = t.length
+        return m if n.zero?
+        return n if m.zero?
+        d = (0..m).to_a
+        x = nil
+        n.times do |i|
+          e = i + 1
+          m.times do |j|
+            cost = s[i] == t[j] ? 0 : 1
+            insertion = d[j + 1] + 1
+            deletion = e + 1
+            substitution = d[j] + cost
+            x = insertion < deletion ? insertion : deletion
+            x = substitution if substitution < x
+            d[j] = e
+            e = x
+          end
+          d[m] = x
+        end
+        return x
+      end
+      extend self
+    end
+    end

data/lib/main/store.rb ADDED

@@ -0,0 +1,130 @@
+module OBFS
+    class Store
+            def initialize(attributes = {}) # hash argument
+                @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
+            end
+            # regular methods
+            def method_missing(m, *args, &block)
+                # normalize
+                method_name = m.to_s
+                dataA = args[0]
+                dataB = args[1]
+                # prevent traversing out of dir
+                raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
+                # setter call
+                if  method_name.end_with?('=')
+                    # clean up name
+                    method_name = method_name.gsub('=','')
+                    # reassign if square bracket notation
+                    if method_name == "[]"
+                        method_name = dataA
+                        data = dataB
+                    else # make sure we load the proper method_name and data
+                        method_name = m.to_s.gsub('=','')
+                        data = args[0]
+                    end
+                    # prevent traversing out of dir
+                    raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
+                    # write data
+                    if data == nil
+                        FileUtils.rm_rf (File.join @path, method_name)
+                    else
+                        FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
+                        FileUtils.mkpath @path if !File.directory? @path
+                        write(@path, method_name, data)
+                    end
+                # bracket notation
+                elsif method_name == "[]"
+                    method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
+                    # prevent traversing out of dir
+                    raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
+                    if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
+                        read(@path, method_name)
+                    else
+                        OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
+                    end
+                # recurse or read
+                else
+                    # prevent traversing out of dir
+                    raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
+                    if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
+                        read(@path, method_name)
+                    else
+                        OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
+                    end
+                end
+            end
+            # special methods
+            # returns current working path for obfs
+            def _path
+                @path
+            end
+            # returns directory contents in an array
+            def _index
+                Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
+            end
+            # searches directory contents (1 level) and returns array sorted by relevance
+            def _find(term = '', records = 1000, tolerance = 50)
+                output = []
+                search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
+                search_space.each do |search_space_term|
+                    if OBFS::Levenshtein.distance(search_space_term, term) <= tolerance && OBFS::WhiteSimilarity.similarity(search_space_term, term) > 0.0
+                        output << search_space_term
+                    end
+                end
+                output.first(records)
+            end
+            # searches directory contents (1 level) and returns boolean if term exist
+            def _exist(term = '')
+                exist_space = Dir.entries(@path).reject { |k| k != term.to_s || k == '.' || k == '..' }
+                if exist_space.length > 0
+                    true
+                else
+                    false
+                end
+            end
+            private
+            # filesystem R/W
+            def write(path, filename, data)
+                Thread.new {
+                    curr_path = File.join path, filename
+                    File.write(curr_path, JSON.unparse(data))
+                }
+            end
+            def read(path, filename)
+                curr_path = File.join path, filename
+                JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
+            end
+    end
+end

data/lib/main/white_similarity.rb ADDED

@@ -0,0 +1,65 @@
+#
+# Credits to threedaymonk
+# https://github.com/threedaymonk/text/blob/master/lib/text/white_similarity.rb
+#
+# encoding: utf-8
+# Original author: Wilker Lúcio <wilkerlucio@gmail.com>
+module OBFS
+  # Ruby implementation of the string similarity described by Simon White
+  # at: http://www.catalysoft.com/articles/StrikeAMatch.html
+  #
+  #                        2 * |pairs(s1) INTERSECT pairs(s2)|
+  #   similarity(s1, s2) = -----------------------------------
+  #                            |pairs(s1)| + |pairs(s2)|
+  #
+  # e.g.
+  #                                             2 * |{FR, NC}|
+  #   similarity(FRANCE, FRENCH) = ---------------------------------------
+  #                                |{FR,RA,AN,NC,CE}| + |{FR,RE,EN,NC,CH}|
+  #
+  #                              = (2 * 2) / (5 + 5)
+  #
+  #                              = 0.4
+  #
+  #   WhiteSimilarity.new.similarity("FRANCE", "FRENCH")
+  #
+  class WhiteSimilarity
+    def self.similarity(str1, str2)
+      new.similarity(str1, str2)
+    end
+    def initialize
+      @word_letter_pairs = {}
+    end
+    def similarity(str1, str2)
+      pairs1 = word_letter_pairs(str1)
+      pairs2 = word_letter_pairs(str2).dup
+      union = pairs1.length + pairs2.length
+      intersection = 0
+      pairs1.each do |pair1|
+        if index = pairs2.index(pair1)
+          intersection += 1
+          pairs2.delete_at(index)
+        end
+      end
+      (2.0 * intersection) / union
+    end
+  private
+    def word_letter_pairs(str)
+      @word_letter_pairs[str] ||=
+        str.upcase.split(/\s+/).map{ |word|
+          (0 ... (word.length - 1)).map { |i| word[i, 2] }
+        }.flatten.freeze
+    end
+  end
+end

data/lib/obfs.rb CHANGED

@@ -1,107 +1,6 @@
-# dependencies
 require 'fileutils'
 require 'json'
-require 'text'
-# main
-class OBFS
-        def initialize(attributes = {}) # hash argument
-            @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
-        end
-        # regular methods
-        def method_missing(m, *args, &block)
-            # normalize
-            method_name = m.to_s
-            dataA = args[0]
-            dataB = args[1]
-            # setter call
-            if  method_name.end_with?('=')
-                # clean up name
-                method_name = method_name.gsub('=','')
-                # reassign if square bracket notation
-                if method_name == "[]"
-                    method_name = dataA
-                    data = dataB
-                else # make sure we load the proper method_name and data
-                    method_name = m.to_s.gsub('=','')
-                    data = args[0]
-                end
-                # write data
-                if data == nil
-                    FileUtils.rm_rf File.join @path, method_name
-                else
-                    FileUtils.rm_rf @path, method_name if File.exist? File.join @path, method_name
-                    FileUtils.mkpath @path if !File.directory? @path
-                    write(@path, method_name, data)
-                end
-            # bracket notation
-            elsif method_name == "[]"
-                method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
-                if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
-                    read(@path, method_name)
-                else
-                    OBFS.new({ path: File.join(@path, method_name.to_s) })
-                end
-            # recurse or read
-            else
-                if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
-                    read(@path, method_name)
-                else
-                    OBFS.new({ path: File.join(@path, method_name.to_s) })
-                end
-            end
-        end
-        # special methods
-        # returns current working path for obfs
-        def _path
-            @path
-        end
-        # returns directory contents in an array
-        def _index
-            Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
-        end
-        # searches directory contents (1 level) and returns array sorted by relevance
-        def _find(term = '', records = 10, tolerance = 10)
-            output = []
-            search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
-            search_space.each do |search_space_term|
-                if Text::Levenshtein.distance(search_space_term, term) <= tolerance && Text::WhiteSimilarity.similarity(search_space_term, term) > 0.0
-                    output << search_space_term
-                end
-            end
-            output.first(records)
-        end
-        private
-        # filesystem R/W
-        def write(path, filename, data)
-            curr_path = File.join path, filename
-            File.write(curr_path, JSON.unparse(data))
-        end
-        def read(path, filename)
-            curr_path = File.join path, filename
-            JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
-        end
-end
+require 'set'
+require 'main/store'
+require 'main/levenshtein'
+require 'main/white_similarity'

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: obfs
 version: !ruby/object:Gem::Version
-  version: 0.0.1
+  version: 0.1.0
 platform: ruby
 authors:
 - Jensel Gatchalian
@@ -16,8 +16,11 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- lib/main/levenshtein.rb
+- lib/main/store.rb
+- lib/main/white_similarity.rb
 - lib/obfs.rb
-homepage: https://rubygems.org/gems/obfs
+homepage: https://github.com/jenselg/obfs-ruby
 licenses:
 - MIT
 metadata: {}
@@ -36,7 +39,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.3
+rubygems_version: 3.0.8
 signing_key:
 specification_version: 4
 summary: OBFS