obfs 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 60ad552589248d9ac27f0560e71ad3ddd4dfd32407c50fc4c10ee9732b17945a
4
- data.tar.gz: 89bfcf071f49f51bc9ad25768c4fdc9e3e7b1c67a9305c7f6aaa358859798ed4
3
+ metadata.gz: c1e900423f8d6d0f9a467d611d6a4af2bb370b6e68d86aa416be37facabe3d32
4
+ data.tar.gz: 0e675fcbf20eef86fcc09615e66d745026fd3945a3437787e5a96cc183ca705c
5
5
  SHA512:
6
- metadata.gz: 64ec76f87128169a5cad3d118dd8b13bf08a4fe2841e7b0b312fbd1f93becbf00e652a8da21466ed85c14e9f42523c723bca51e429b7cef28fb254f5073f79ef
7
- data.tar.gz: 3389becdd8b5533ee7883b95907071bcaa77c5a2e9a54a7a7bbd89da7a758a4819ec525691dd244905c5f7fcb2443682a4a641a29ac8b1d891b370fc6b3c99cf
6
+ metadata.gz: 03650e01c5346d2206c9d3805f83ad76496dcefc31ea0354a5432fddaf550f4c9580768231ea0fa47e35038abeb90cef2e054d242c37d1688d1c5760a7c0f779
7
+ data.tar.gz: 4d7eec2c13212cf38c7a7974c122b2280a6867d7bc5672a10544d2a85d2e072c1a06fe22296408b4a1f51e2c98e790f11602d330078fc8b3098392930a289d22
@@ -0,0 +1,162 @@
1
+ #
2
+ # Credits to threedaymonk
3
+ # https://github.com/threedaymonk/text/blob/master/lib/text/levenshtein.rb
4
+ #
5
+ # Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
6
+ #
7
+ # The Levenshtein distance is a measure of how similar two strings s and t are,
8
+ # calculated as the number of deletions/insertions/substitutions needed to
9
+ # transform s into t. The greater the distance, the more the strings differ.
10
+ #
11
+ # The Levenshtein distance is also sometimes referred to as the
12
+ # easier-to-pronounce-and-spell 'edit distance'.
13
+ #
14
+ # Author: Paul Battley (pbattley@gmail.com)
15
+ #
16
+
17
+ module OBFS # :nodoc:
18
+ module Levenshtein
19
+
20
+ # Calculate the Levenshtein distance between two strings +str1+ and +str2+.
21
+ #
22
+ # The optional argument max_distance can reduce the number of iterations by
23
+ # stopping if the Levenshtein distance exceeds this value. This increases
24
+ # performance where it is only necessary to compare the distance with a
25
+ # reference value instead of calculating the exact distance.
26
+ #
27
+ # The distance is calculated in terms of Unicode codepoints. Be aware that
28
+ # this algorithm does not perform normalisation: if there is a possibility
29
+ # of different normalised forms being used, normalisation should be performed
30
+ # beforehand.
31
+ #
32
+ def distance(str1, str2, max_distance = nil)
33
+ if max_distance
34
+ distance_with_maximum(str1, str2, max_distance)
35
+ else
36
+ distance_without_maximum(str1, str2)
37
+ end
38
+ end
39
+
40
+ private
41
+ def distance_with_maximum(str1, str2, max_distance) # :nodoc:
42
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
43
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
44
+
45
+ n = s.length
46
+ m = t.length
47
+ big_int = n * m
48
+
49
+ # Swap if necessary so that s is always the shorter of the two strings
50
+ s, t, n, m = t, s, m, n if m < n
51
+
52
+ # If the length difference is already greater than the max_distance, then
53
+ # there is nothing else to check
54
+ if (n - m).abs >= max_distance
55
+ return max_distance
56
+ end
57
+
58
+ return 0 if s == t
59
+ return m if n.zero?
60
+ return n if m.zero?
61
+
62
+ # The values necessary for our threshold are written; the ones after must
63
+ # be filled with large integers since the tailing member of the threshold
64
+ # window in the bottom array will run min across them
65
+ d = (m + 1).times.map { |i|
66
+ if i < m || i < max_distance + 1
67
+ i
68
+ else
69
+ big_int
70
+ end
71
+ }
72
+ x = nil
73
+ e = nil
74
+
75
+ n.times do |i|
76
+ # Since we're reusing arrays, we need to be sure to wipe the value left
77
+ # of the starting index; we don't have to worry about the value above the
78
+ # ending index as the arrays were initially filled with large integers
79
+ # and we progress to the right
80
+ if e.nil?
81
+ e = i + 1
82
+ else
83
+ e = big_int
84
+ end
85
+
86
+ diag_index = t.length - s.length + i
87
+
88
+ # If max_distance was specified, we can reduce second loop. So we set
89
+ # up our threshold window.
90
+ # See:
91
+ # Gusfield, Dan (1997). Algorithms on strings, trees, and sequences:
92
+ # computer science and computational biology.
93
+ # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
94
+ # pp. 263–264.
95
+ min = i - max_distance - 1
96
+ min = 0 if min < 0
97
+ max = i + max_distance
98
+ max = m - 1 if max > m - 1
99
+
100
+ min.upto(max) do |j|
101
+ # If the diagonal value is already greater than the max_distance
102
+ # then we can safety return: the diagonal will never go lower again.
103
+ # See: http://www.levenshtein.net/
104
+ if j == diag_index && d[j] >= max_distance
105
+ return max_distance
106
+ end
107
+
108
+ cost = s[i] == t[j] ? 0 : 1
109
+ insertion = d[j + 1] + 1
110
+ deletion = e + 1
111
+ substitution = d[j] + cost
112
+ x = insertion < deletion ? insertion : deletion
113
+ x = substitution if substitution < x
114
+
115
+ d[j] = e
116
+ e = x
117
+ end
118
+ d[m] = x
119
+ end
120
+
121
+ if x > max_distance
122
+ return max_distance
123
+ else
124
+ return x
125
+ end
126
+ end
127
+
128
+ def distance_without_maximum(str1, str2) # :nodoc:
129
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
130
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
131
+
132
+ n = s.length
133
+ m = t.length
134
+
135
+ return m if n.zero?
136
+ return n if m.zero?
137
+
138
+ d = (0..m).to_a
139
+ x = nil
140
+
141
+ n.times do |i|
142
+ e = i + 1
143
+ m.times do |j|
144
+ cost = s[i] == t[j] ? 0 : 1
145
+ insertion = d[j + 1] + 1
146
+ deletion = e + 1
147
+ substitution = d[j] + cost
148
+ x = insertion < deletion ? insertion : deletion
149
+ x = substitution if substitution < x
150
+
151
+ d[j] = e
152
+ e = x
153
+ end
154
+ d[m] = x
155
+ end
156
+
157
+ return x
158
+ end
159
+
160
+ extend self
161
+ end
162
+ end
@@ -0,0 +1,130 @@
1
+ module OBFS
2
+
3
+ class Store
4
+
5
+ def initialize(attributes = {}) # hash argument
6
+ @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
7
+ end
8
+
9
+ # regular methods
10
+
11
+ def method_missing(m, *args, &block)
12
+
13
+ # normalize
14
+ method_name = m.to_s
15
+ dataA = args[0]
16
+ dataB = args[1]
17
+
18
+ # prevent traversing out of dir
19
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
20
+
21
+ # setter call
22
+ if method_name.end_with?('=')
23
+
24
+ # clean up name
25
+ method_name = method_name.gsub('=','')
26
+
27
+ # reassign if square bracket notation
28
+ if method_name == "[]"
29
+ method_name = dataA
30
+ data = dataB
31
+ else # make sure we load the proper method_name and data
32
+ method_name = m.to_s.gsub('=','')
33
+ data = args[0]
34
+ end
35
+
36
+ # prevent traversing out of dir
37
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
38
+
39
+ # write data
40
+ if data == nil
41
+ FileUtils.rm_rf (File.join @path, method_name)
42
+ else
43
+ FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
44
+ FileUtils.mkpath @path if !File.directory? @path
45
+ write(@path, method_name, data)
46
+ end
47
+
48
+ # bracket notation
49
+ elsif method_name == "[]"
50
+
51
+ method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
52
+
53
+ # prevent traversing out of dir
54
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
55
+
56
+ if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
57
+ read(@path, method_name)
58
+ else
59
+ OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
60
+ end
61
+
62
+ # recurse or read
63
+ else
64
+
65
+ # prevent traversing out of dir
66
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
67
+
68
+ if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
69
+ read(@path, method_name)
70
+ else
71
+ OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
72
+ end
73
+
74
+ end
75
+
76
+ end
77
+
78
+ # special methods
79
+
80
+ # returns current working path for obfs
81
+ def _path
82
+ @path
83
+ end
84
+
85
+ # returns directory contents in an array
86
+ def _index
87
+ Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
88
+ end
89
+
90
+ # searches directory contents (1 level) and returns array sorted by relevance
91
+ def _find(term = '', records = 1000, tolerance = 50)
92
+ output = []
93
+ search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
94
+ search_space.each do |search_space_term|
95
+ if OBFS::Levenshtein.distance(search_space_term, term) <= tolerance && OBFS::WhiteSimilarity.similarity(search_space_term, term) > 0.0
96
+ output << search_space_term
97
+ end
98
+ end
99
+ output.first(records)
100
+ end
101
+
102
+ # searches directory contents (1 level) and returns boolean if term exist
103
+ def _exist(term = '')
104
+ exist_space = Dir.entries(@path).reject { |k| k != term.to_s || k == '.' || k == '..' }
105
+ if exist_space.length > 0
106
+ true
107
+ else
108
+ false
109
+ end
110
+ end
111
+
112
+ private
113
+
114
+ # filesystem R/W
115
+
116
+ def write(path, filename, data)
117
+ Thread.new {
118
+ curr_path = File.join path, filename
119
+ File.write(curr_path, JSON.unparse(data))
120
+ }
121
+ end
122
+
123
+ def read(path, filename)
124
+ curr_path = File.join path, filename
125
+ JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
126
+ end
127
+
128
+ end
129
+
130
+ end
@@ -0,0 +1,65 @@
1
+ #
2
+ # Credits to threedaymonk
3
+ # https://github.com/threedaymonk/text/blob/master/lib/text/white_similarity.rb
4
+ #
5
+ # encoding: utf-8
6
+ # Original author: Wilker Lúcio <wilkerlucio@gmail.com>
7
+
8
+ module OBFS
9
+
10
+ # Ruby implementation of the string similarity described by Simon White
11
+ # at: http://www.catalysoft.com/articles/StrikeAMatch.html
12
+ #
13
+ # 2 * |pairs(s1) INTERSECT pairs(s2)|
14
+ # similarity(s1, s2) = -----------------------------------
15
+ # |pairs(s1)| + |pairs(s2)|
16
+ #
17
+ # e.g.
18
+ # 2 * |{FR, NC}|
19
+ # similarity(FRANCE, FRENCH) = ---------------------------------------
20
+ # |{FR,RA,AN,NC,CE}| + |{FR,RE,EN,NC,CH}|
21
+ #
22
+ # = (2 * 2) / (5 + 5)
23
+ #
24
+ # = 0.4
25
+ #
26
+ # WhiteSimilarity.new.similarity("FRANCE", "FRENCH")
27
+ #
28
+ class WhiteSimilarity
29
+
30
+ def self.similarity(str1, str2)
31
+ new.similarity(str1, str2)
32
+ end
33
+
34
+ def initialize
35
+ @word_letter_pairs = {}
36
+ end
37
+
38
+ def similarity(str1, str2)
39
+ pairs1 = word_letter_pairs(str1)
40
+ pairs2 = word_letter_pairs(str2).dup
41
+
42
+ union = pairs1.length + pairs2.length
43
+
44
+ intersection = 0
45
+ pairs1.each do |pair1|
46
+ if index = pairs2.index(pair1)
47
+ intersection += 1
48
+ pairs2.delete_at(index)
49
+ end
50
+ end
51
+
52
+ (2.0 * intersection) / union
53
+ end
54
+
55
+ private
56
+
57
+ def word_letter_pairs(str)
58
+ @word_letter_pairs[str] ||=
59
+ str.upcase.split(/\s+/).map{ |word|
60
+ (0 ... (word.length - 1)).map { |i| word[i, 2] }
61
+ }.flatten.freeze
62
+ end
63
+
64
+ end
65
+ end
@@ -1,107 +1,6 @@
1
- # dependencies
2
1
  require 'fileutils'
3
2
  require 'json'
4
- require 'text'
5
-
6
- # main
7
- class OBFS
8
-
9
- def initialize(attributes = {}) # hash argument
10
- @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
11
- end
12
-
13
- # regular methods
14
-
15
- def method_missing(m, *args, &block)
16
-
17
- # normalize
18
- method_name = m.to_s
19
- dataA = args[0]
20
- dataB = args[1]
21
-
22
- # setter call
23
- if method_name.end_with?('=')
24
-
25
- # clean up name
26
- method_name = method_name.gsub('=','')
27
-
28
- # reassign if square bracket notation
29
- if method_name == "[]"
30
- method_name = dataA
31
- data = dataB
32
- else # make sure we load the proper method_name and data
33
- method_name = m.to_s.gsub('=','')
34
- data = args[0]
35
- end
36
-
37
- # write data
38
- if data == nil
39
- FileUtils.rm_rf File.join @path, method_name
40
- else
41
- FileUtils.rm_rf @path, method_name if File.exist? File.join @path, method_name
42
- FileUtils.mkpath @path if !File.directory? @path
43
- write(@path, method_name, data)
44
- end
45
-
46
- # bracket notation
47
- elsif method_name == "[]"
48
-
49
- method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
50
- if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
51
- read(@path, method_name)
52
- else
53
- OBFS.new({ path: File.join(@path, method_name.to_s) })
54
- end
55
-
56
- # recurse or read
57
- else
58
-
59
- if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
60
- read(@path, method_name)
61
- else
62
- OBFS.new({ path: File.join(@path, method_name.to_s) })
63
- end
64
-
65
- end
66
-
67
- end
68
-
69
- # special methods
70
-
71
- # returns current working path for obfs
72
- def _path
73
- @path
74
- end
75
-
76
- # returns directory contents in an array
77
- def _index
78
- Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
79
- end
80
-
81
- # searches directory contents (1 level) and returns array sorted by relevance
82
- def _find(term = '', records = 10, tolerance = 10)
83
- output = []
84
- search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
85
- search_space.each do |search_space_term|
86
- if Text::Levenshtein.distance(search_space_term, term) <= tolerance && Text::WhiteSimilarity.similarity(search_space_term, term) > 0.0
87
- output << search_space_term
88
- end
89
- end
90
- output.first(records)
91
- end
92
-
93
- private
94
-
95
- # filesystem R/W
96
-
97
- def write(path, filename, data)
98
- curr_path = File.join path, filename
99
- File.write(curr_path, JSON.unparse(data))
100
- end
101
-
102
- def read(path, filename)
103
- curr_path = File.join path, filename
104
- JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
105
- end
106
-
107
- end
3
+ require 'set'
4
+ require 'main/store'
5
+ require 'main/levenshtein'
6
+ require 'main/white_similarity'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jensel Gatchalian
@@ -16,8 +16,11 @@ executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
+ - lib/main/levenshtein.rb
20
+ - lib/main/store.rb
21
+ - lib/main/white_similarity.rb
19
22
  - lib/obfs.rb
20
- homepage: https://rubygems.org/gems/obfs
23
+ homepage: https://github.com/jenselg/obfs-ruby
21
24
  licenses:
22
25
  - MIT
23
26
  metadata: {}
@@ -36,7 +39,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
36
39
  - !ruby/object:Gem::Version
37
40
  version: '0'
38
41
  requirements: []
39
- rubygems_version: 3.2.3
42
+ rubygems_version: 3.0.8
40
43
  signing_key:
41
44
  specification_version: 4
42
45
  summary: OBFS