obfs 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b42095eefc2563cbea6e67ccbb0a2273c3113e633789c888cc40206dcdc4641
4
- data.tar.gz: 5f9826b223472531a9ab6667bb391499cf12914fce800a055198a156d5695f83
3
+ metadata.gz: c1e900423f8d6d0f9a467d611d6a4af2bb370b6e68d86aa416be37facabe3d32
4
+ data.tar.gz: 0e675fcbf20eef86fcc09615e66d745026fd3945a3437787e5a96cc183ca705c
5
5
  SHA512:
6
- metadata.gz: af4f5afd4443b96dca83c15f9c5b863899b1a9b1ee55f12d86c45b00af1ad285e3b0b3d9af540aacdb3a2e4aeb22a11ea71102806b5128bedef19f8a602e7280
7
- data.tar.gz: 279e2596cb0a2276fdc7f8dfb4b69af49355bd309ec4404ef14010c19d18ee911f8539e09535a8f66773175198ef354f468c959600ccac1802c0c85e652470ae
6
+ metadata.gz: 03650e01c5346d2206c9d3805f83ad76496dcefc31ea0354a5432fddaf550f4c9580768231ea0fa47e35038abeb90cef2e054d242c37d1688d1c5760a7c0f779
7
+ data.tar.gz: 4d7eec2c13212cf38c7a7974c122b2280a6867d7bc5672a10544d2a85d2e072c1a06fe22296408b4a1f51e2c98e790f11602d330078fc8b3098392930a289d22
@@ -0,0 +1,162 @@
1
+ #
2
+ # Credits to threedaymonk
3
+ # https://github.com/threedaymonk/text/blob/master/lib/text/levenshtein.rb
4
+ #
5
+ # Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
6
+ #
7
+ # The Levenshtein distance is a measure of how similar two strings s and t are,
8
+ # calculated as the number of deletions/insertions/substitutions needed to
9
+ # transform s into t. The greater the distance, the more the strings differ.
10
+ #
11
+ # The Levenshtein distance is also sometimes referred to as the
12
+ # easier-to-pronounce-and-spell 'edit distance'.
13
+ #
14
+ # Author: Paul Battley (pbattley@gmail.com)
15
+ #
16
+
17
+ module OBFS # :nodoc:
18
+ module Levenshtein
19
+
20
+ # Calculate the Levenshtein distance between two strings +str1+ and +str2+.
21
+ #
22
+ # The optional argument max_distance can reduce the number of iterations by
23
+ # stopping if the Levenshtein distance exceeds this value. This increases
24
+ # performance where it is only necessary to compare the distance with a
25
+ # reference value instead of calculating the exact distance.
26
+ #
27
+ # The distance is calculated in terms of Unicode codepoints. Be aware that
28
+ # this algorithm does not perform normalisation: if there is a possibility
29
+ # of different normalised forms being used, normalisation should be performed
30
+ # beforehand.
31
+ #
32
+ def distance(str1, str2, max_distance = nil)
33
+ if max_distance
34
+ distance_with_maximum(str1, str2, max_distance)
35
+ else
36
+ distance_without_maximum(str1, str2)
37
+ end
38
+ end
39
+
40
+ private
41
+ def distance_with_maximum(str1, str2, max_distance) # :nodoc:
42
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
43
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
44
+
45
+ n = s.length
46
+ m = t.length
47
+ big_int = n * m
48
+
49
+ # Swap if necessary so that s is always the shorter of the two strings
50
+ s, t, n, m = t, s, m, n if m < n
51
+
52
+ # If the length difference is already greater than the max_distance, then
53
+ # there is nothing else to check
54
+ if (n - m).abs >= max_distance
55
+ return max_distance
56
+ end
57
+
58
+ return 0 if s == t
59
+ return m if n.zero?
60
+ return n if m.zero?
61
+
62
+ # The values necessary for our threshold are written; the ones after must
63
+ # be filled with large integers since the tailing member of the threshold
64
+ # window in the bottom array will run min across them
65
+ d = (m + 1).times.map { |i|
66
+ if i < m || i < max_distance + 1
67
+ i
68
+ else
69
+ big_int
70
+ end
71
+ }
72
+ x = nil
73
+ e = nil
74
+
75
+ n.times do |i|
76
+ # Since we're reusing arrays, we need to be sure to wipe the value left
77
+ # of the starting index; we don't have to worry about the value above the
78
+ # ending index as the arrays were initially filled with large integers
79
+ # and we progress to the right
80
+ if e.nil?
81
+ e = i + 1
82
+ else
83
+ e = big_int
84
+ end
85
+
86
+ diag_index = t.length - s.length + i
87
+
88
+ # If max_distance was specified, we can reduce second loop. So we set
89
+ # up our threshold window.
90
+ # See:
91
+ # Gusfield, Dan (1997). Algorithms on strings, trees, and sequences:
92
+ # computer science and computational biology.
93
+ # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
94
+ # pp. 263–264.
95
+ min = i - max_distance - 1
96
+ min = 0 if min < 0
97
+ max = i + max_distance
98
+ max = m - 1 if max > m - 1
99
+
100
+ min.upto(max) do |j|
101
+ # If the diagonal value is already greater than the max_distance
102
+ # then we can safety return: the diagonal will never go lower again.
103
+ # See: http://www.levenshtein.net/
104
+ if j == diag_index && d[j] >= max_distance
105
+ return max_distance
106
+ end
107
+
108
+ cost = s[i] == t[j] ? 0 : 1
109
+ insertion = d[j + 1] + 1
110
+ deletion = e + 1
111
+ substitution = d[j] + cost
112
+ x = insertion < deletion ? insertion : deletion
113
+ x = substitution if substitution < x
114
+
115
+ d[j] = e
116
+ e = x
117
+ end
118
+ d[m] = x
119
+ end
120
+
121
+ if x > max_distance
122
+ return max_distance
123
+ else
124
+ return x
125
+ end
126
+ end
127
+
128
+ def distance_without_maximum(str1, str2) # :nodoc:
129
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
130
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
131
+
132
+ n = s.length
133
+ m = t.length
134
+
135
+ return m if n.zero?
136
+ return n if m.zero?
137
+
138
+ d = (0..m).to_a
139
+ x = nil
140
+
141
+ n.times do |i|
142
+ e = i + 1
143
+ m.times do |j|
144
+ cost = s[i] == t[j] ? 0 : 1
145
+ insertion = d[j + 1] + 1
146
+ deletion = e + 1
147
+ substitution = d[j] + cost
148
+ x = insertion < deletion ? insertion : deletion
149
+ x = substitution if substitution < x
150
+
151
+ d[j] = e
152
+ e = x
153
+ end
154
+ d[m] = x
155
+ end
156
+
157
+ return x
158
+ end
159
+
160
+ extend self
161
+ end
162
+ end
@@ -0,0 +1,130 @@
1
+ module OBFS
2
+
3
+ class Store
4
+
5
+ def initialize(attributes = {}) # hash argument
6
+ @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
7
+ end
8
+
9
+ # regular methods
10
+
11
+ def method_missing(m, *args, &block)
12
+
13
+ # normalize
14
+ method_name = m.to_s
15
+ dataA = args[0]
16
+ dataB = args[1]
17
+
18
+ # prevent traversing out of dir
19
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
20
+
21
+ # setter call
22
+ if method_name.end_with?('=')
23
+
24
+ # clean up name
25
+ method_name = method_name.gsub('=','')
26
+
27
+ # reassign if square bracket notation
28
+ if method_name == "[]"
29
+ method_name = dataA
30
+ data = dataB
31
+ else # make sure we load the proper method_name and data
32
+ method_name = m.to_s.gsub('=','')
33
+ data = args[0]
34
+ end
35
+
36
+ # prevent traversing out of dir
37
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
38
+
39
+ # write data
40
+ if data == nil
41
+ FileUtils.rm_rf (File.join @path, method_name)
42
+ else
43
+ FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
44
+ FileUtils.mkpath @path if !File.directory? @path
45
+ write(@path, method_name, data)
46
+ end
47
+
48
+ # bracket notation
49
+ elsif method_name == "[]"
50
+
51
+ method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
52
+
53
+ # prevent traversing out of dir
54
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
55
+
56
+ if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
57
+ read(@path, method_name)
58
+ else
59
+ OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
60
+ end
61
+
62
+ # recurse or read
63
+ else
64
+
65
+ # prevent traversing out of dir
66
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
67
+
68
+ if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
69
+ read(@path, method_name)
70
+ else
71
+ OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
72
+ end
73
+
74
+ end
75
+
76
+ end
77
+
78
+ # special methods
79
+
80
+ # returns current working path for obfs
81
+ def _path
82
+ @path
83
+ end
84
+
85
+ # returns directory contents in an array
86
+ def _index
87
+ Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
88
+ end
89
+
90
+ # searches directory contents (1 level) and returns array sorted by relevance
91
+ def _find(term = '', records = 1000, tolerance = 50)
92
+ output = []
93
+ search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
94
+ search_space.each do |search_space_term|
95
+ if OBFS::Levenshtein.distance(search_space_term, term) <= tolerance && OBFS::WhiteSimilarity.similarity(search_space_term, term) > 0.0
96
+ output << search_space_term
97
+ end
98
+ end
99
+ output.first(records)
100
+ end
101
+
102
+ # searches directory contents (1 level) and returns boolean if term exist
103
+ def _exist(term = '')
104
+ exist_space = Dir.entries(@path).reject { |k| k != term.to_s || k == '.' || k == '..' }
105
+ if exist_space.length > 0
106
+ true
107
+ else
108
+ false
109
+ end
110
+ end
111
+
112
+ private
113
+
114
+ # filesystem R/W
115
+
116
+ def write(path, filename, data)
117
+ Thread.new {
118
+ curr_path = File.join path, filename
119
+ File.write(curr_path, JSON.unparse(data))
120
+ }
121
+ end
122
+
123
+ def read(path, filename)
124
+ curr_path = File.join path, filename
125
+ JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
126
+ end
127
+
128
+ end
129
+
130
+ end
@@ -0,0 +1,65 @@
1
+ #
2
+ # Credits to threedaymonk
3
+ # https://github.com/threedaymonk/text/blob/master/lib/text/white_similarity.rb
4
+ #
5
+ # encoding: utf-8
6
+ # Original author: Wilker Lúcio <wilkerlucio@gmail.com>
7
+
8
+ module OBFS
9
+
10
+ # Ruby implementation of the string similarity described by Simon White
11
+ # at: http://www.catalysoft.com/articles/StrikeAMatch.html
12
+ #
13
+ # 2 * |pairs(s1) INTERSECT pairs(s2)|
14
+ # similarity(s1, s2) = -----------------------------------
15
+ # |pairs(s1)| + |pairs(s2)|
16
+ #
17
+ # e.g.
18
+ # 2 * |{FR, NC}|
19
+ # similarity(FRANCE, FRENCH) = ---------------------------------------
20
+ # |{FR,RA,AN,NC,CE}| + |{FR,RE,EN,NC,CH}|
21
+ #
22
+ # = (2 * 2) / (5 + 5)
23
+ #
24
+ # = 0.4
25
+ #
26
+ # WhiteSimilarity.new.similarity("FRANCE", "FRENCH")
27
+ #
28
+ class WhiteSimilarity
29
+
30
+ def self.similarity(str1, str2)
31
+ new.similarity(str1, str2)
32
+ end
33
+
34
+ def initialize
35
+ @word_letter_pairs = {}
36
+ end
37
+
38
+ def similarity(str1, str2)
39
+ pairs1 = word_letter_pairs(str1)
40
+ pairs2 = word_letter_pairs(str2).dup
41
+
42
+ union = pairs1.length + pairs2.length
43
+
44
+ intersection = 0
45
+ pairs1.each do |pair1|
46
+ if index = pairs2.index(pair1)
47
+ intersection += 1
48
+ pairs2.delete_at(index)
49
+ end
50
+ end
51
+
52
+ (2.0 * intersection) / union
53
+ end
54
+
55
+ private
56
+
57
+ def word_letter_pairs(str)
58
+ @word_letter_pairs[str] ||=
59
+ str.upcase.split(/\s+/).map{ |word|
60
+ (0 ... (word.length - 1)).map { |i| word[i, 2] }
61
+ }.flatten.freeze
62
+ end
63
+
64
+ end
65
+ end
@@ -1,132 +1,6 @@
1
- # dependencies
2
1
  require 'fileutils'
3
2
  require 'json'
4
- require 'text'
5
-
6
- # main
7
- class OBFS
8
-
9
- def initialize(attributes = {}) # hash argument
10
- @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
11
- end
12
-
13
- # regular methods
14
-
15
- def method_missing(m, *args, &block)
16
-
17
- # normalize
18
- method_name = m.to_s
19
- dataA = args[0]
20
- dataB = args[1]
21
-
22
- # prevent traversing out of dir
23
- raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
24
-
25
- # setter call
26
- if method_name.end_with?('=')
27
-
28
- # clean up name
29
- method_name = method_name.gsub('=','')
30
-
31
- # reassign if square bracket notation
32
- if method_name == "[]"
33
- method_name = dataA
34
- data = dataB
35
- else # make sure we load the proper method_name and data
36
- method_name = m.to_s.gsub('=','')
37
- data = args[0]
38
- end
39
-
40
- # prevent traversing out of dir
41
- raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
42
-
43
- # write data
44
- if data == nil
45
- FileUtils.rm_rf (File.join @path, method_name)
46
- else
47
- FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
48
- FileUtils.mkpath @path if !File.directory? @path
49
- write(@path, method_name, data)
50
- end
51
-
52
- # bracket notation
53
- elsif method_name == "[]"
54
-
55
- method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
56
-
57
- # prevent traversing out of dir
58
- raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
59
-
60
- if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
61
- read(@path, method_name)
62
- else
63
- OBFS.new({ path: File.join(@path, method_name.to_s) })
64
- end
65
-
66
- # recurse or read
67
- else
68
-
69
- # prevent traversing out of dir
70
- raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
71
-
72
- if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
73
- read(@path, method_name)
74
- else
75
- OBFS.new({ path: File.join(@path, method_name.to_s) })
76
- end
77
-
78
- end
79
-
80
- end
81
-
82
- # special methods
83
-
84
- # returns current working path for obfs
85
- def _path
86
- @path
87
- end
88
-
89
- # returns directory contents in an array
90
- def _index
91
- Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
92
- end
93
-
94
- # searches directory contents (1 level) and returns array sorted by relevance
95
- def _find(term = '', records = 1000, tolerance = 50)
96
- output = []
97
- search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
98
- search_space.each do |search_space_term|
99
- if Text::Levenshtein.distance(search_space_term, term) <= tolerance && Text::WhiteSimilarity.similarity(search_space_term, term) > 0.0
100
- output << search_space_term
101
- end
102
- end
103
- output.first(records)
104
- end
105
-
106
- # searches directory contents (1 level) and returns boolean if term exist
107
- def _exist(term = '')
108
- exist_space = Dir.entries(@path).reject { |k| k != term.to_s || k == '.' || k == '..' }
109
- if exist_space.length > 0
110
- true
111
- else
112
- false
113
- end
114
- end
115
-
116
- private
117
-
118
- # filesystem R/W
119
-
120
- def write(path, filename, data)
121
- Thread.new {
122
- curr_path = File.join path, filename
123
- File.write(curr_path, JSON.unparse(data))
124
- }
125
- end
126
-
127
- def read(path, filename)
128
- curr_path = File.join path, filename
129
- JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
130
- end
131
-
132
- end
3
+ require 'set'
4
+ require 'main/store'
5
+ require 'main/levenshtein'
6
+ require 'main/white_similarity'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jensel Gatchalian
@@ -9,27 +9,16 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2021-01-16 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: text
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
12
+ dependencies: []
27
13
  description: File-based, object-oriented data store for Ruby
28
14
  email: jensel.gatchalian@gmail.com
29
15
  executables: []
30
16
  extensions: []
31
17
  extra_rdoc_files: []
32
18
  files:
19
+ - lib/main/levenshtein.rb
20
+ - lib/main/store.rb
21
+ - lib/main/white_similarity.rb
33
22
  - lib/obfs.rb
34
23
  homepage: https://github.com/jenselg/obfs-ruby
35
24
  licenses: