obfs 0.0.2 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/obfs.rb +7 -115
- data/lib/obfs/store.rb +132 -0
- data/lib/text/levenshtein.rb +166 -0
- data/lib/text/white_similarity.rb +66 -0
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 704cf6c36b527d888d2a755f3b41f56c94a9f121543e8337e7d9b8afb28f32b0
|
4
|
+
data.tar.gz: 4a9ed29dd0f313ca5ced039ef1d6900e02d9cede067e5d537596be532a23c57f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2eddd3da38dc73e10715018286734ce0170b17ae73b6e459aa9faa9ca1ac94e27e4a3420423ff3b3a47fc84307d0f1f3caac2ad673bda405968cfe379f97f81
|
7
|
+
data.tar.gz: 3f2a4e93c277de2b09e449d3a2b05a5676e854ef511d5157cbbd908cec27308714e2ca5a685f9234aa21d648ccf31474396d15a063a780aed8039dded6df7644
|
data/lib/obfs.rb
CHANGED
@@ -1,119 +1,11 @@
|
|
1
|
-
#
|
1
|
+
# core
|
2
2
|
require 'fileutils'
|
3
3
|
require 'json'
|
4
|
-
require '
|
4
|
+
require 'set'
|
5
5
|
|
6
|
-
#
|
7
|
-
|
6
|
+
# obfs
|
7
|
+
require 'obfs/store'
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# regular methods
|
14
|
-
|
15
|
-
def method_missing(m, *args, &block)
|
16
|
-
|
17
|
-
# normalize
|
18
|
-
method_name = m.to_s
|
19
|
-
dataA = args[0]
|
20
|
-
dataB = args[1]
|
21
|
-
|
22
|
-
# setter call
|
23
|
-
if method_name.end_with?('=')
|
24
|
-
|
25
|
-
# clean up name
|
26
|
-
method_name = method_name.gsub('=','')
|
27
|
-
|
28
|
-
# reassign if square bracket notation
|
29
|
-
if method_name == "[]"
|
30
|
-
method_name = dataA
|
31
|
-
data = dataB
|
32
|
-
else # make sure we load the proper method_name and data
|
33
|
-
method_name = m.to_s.gsub('=','')
|
34
|
-
data = args[0]
|
35
|
-
end
|
36
|
-
|
37
|
-
# write data
|
38
|
-
if data == nil
|
39
|
-
FileUtils.rm_rf (File.join @path, method_name)
|
40
|
-
else
|
41
|
-
FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
|
42
|
-
FileUtils.mkpath @path if !File.directory? @path
|
43
|
-
write(@path, method_name, data)
|
44
|
-
end
|
45
|
-
|
46
|
-
# bracket notation
|
47
|
-
elsif method_name == "[]"
|
48
|
-
|
49
|
-
method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
|
50
|
-
if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
|
51
|
-
read(@path, method_name)
|
52
|
-
else
|
53
|
-
OBFS.new({ path: File.join(@path, method_name.to_s) })
|
54
|
-
end
|
55
|
-
|
56
|
-
# recurse or read
|
57
|
-
else
|
58
|
-
|
59
|
-
if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
|
60
|
-
read(@path, method_name)
|
61
|
-
else
|
62
|
-
OBFS.new({ path: File.join(@path, method_name.to_s) })
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
66
|
-
|
67
|
-
end
|
68
|
-
|
69
|
-
# special methods
|
70
|
-
|
71
|
-
# returns current working path for obfs
|
72
|
-
def _path
|
73
|
-
@path
|
74
|
-
end
|
75
|
-
|
76
|
-
# returns directory contents in an array
|
77
|
-
def _index
|
78
|
-
Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
|
79
|
-
end
|
80
|
-
|
81
|
-
# searches directory contents (1 level) and returns array sorted by relevance
|
82
|
-
def _find(term = '', records = 1000, tolerance = 10)
|
83
|
-
output = []
|
84
|
-
search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
|
85
|
-
search_space.each do |search_space_term|
|
86
|
-
if Text::Levenshtein.distance(search_space_term, term) <= tolerance && Text::WhiteSimilarity.similarity(search_space_term, term) > 0.0
|
87
|
-
output << search_space_term
|
88
|
-
end
|
89
|
-
end
|
90
|
-
output.first(records)
|
91
|
-
end
|
92
|
-
|
93
|
-
# searches directory contents (1 level) and returns boolean if term exist
|
94
|
-
def _exist(term = '')
|
95
|
-
exist_space = Dir.entries(@path).reject { |k| k != term.to_s }
|
96
|
-
if exist_space.length > 0
|
97
|
-
true
|
98
|
-
else
|
99
|
-
false
|
100
|
-
end
|
101
|
-
end
|
102
|
-
|
103
|
-
private
|
104
|
-
|
105
|
-
# filesystem R/W
|
106
|
-
|
107
|
-
def write(path, filename, data)
|
108
|
-
Thread.new {
|
109
|
-
curr_path = File.join path, filename
|
110
|
-
File.write(curr_path, JSON.unparse(data))
|
111
|
-
}
|
112
|
-
end
|
113
|
-
|
114
|
-
def read(path, filename)
|
115
|
-
curr_path = File.join path, filename
|
116
|
-
JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
|
117
|
-
end
|
118
|
-
|
119
|
-
end
|
9
|
+
# third party
|
10
|
+
require 'text/levenshtein'
|
11
|
+
require 'text/white_similarity'
|
data/lib/obfs/store.rb
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
module OBFS
|
2
|
+
|
3
|
+
class Store
|
4
|
+
|
5
|
+
def initialize(attributes = {}) # hash argument
|
6
|
+
@path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
|
7
|
+
end
|
8
|
+
|
9
|
+
# regular methods
|
10
|
+
|
11
|
+
def method_missing(m, *args, &block)
|
12
|
+
|
13
|
+
# normalize
|
14
|
+
method_name = m.to_s
|
15
|
+
dataA = args[0]
|
16
|
+
dataB = args[1]
|
17
|
+
|
18
|
+
# prevent traversing out of dir
|
19
|
+
raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
|
20
|
+
|
21
|
+
# setter call
|
22
|
+
if method_name.end_with?('=')
|
23
|
+
|
24
|
+
# clean up name
|
25
|
+
method_name = method_name.gsub('=','')
|
26
|
+
|
27
|
+
# reassign if square bracket notation
|
28
|
+
if method_name == "[]"
|
29
|
+
method_name = dataA
|
30
|
+
data = dataB
|
31
|
+
else # make sure we load the proper method_name and data
|
32
|
+
method_name = m.to_s.gsub('=','')
|
33
|
+
data = args[0]
|
34
|
+
end
|
35
|
+
|
36
|
+
# prevent traversing out of dir
|
37
|
+
raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
|
38
|
+
|
39
|
+
# write data
|
40
|
+
if data == nil
|
41
|
+
FileUtils.rm_rf (File.join @path, method_name)
|
42
|
+
else
|
43
|
+
FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
|
44
|
+
FileUtils.mkpath @path if !File.directory? @path
|
45
|
+
write(@path, method_name, data)
|
46
|
+
end
|
47
|
+
|
48
|
+
# bracket notation
|
49
|
+
elsif method_name == "[]"
|
50
|
+
|
51
|
+
method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
|
52
|
+
|
53
|
+
# prevent traversing out of dir
|
54
|
+
raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
|
55
|
+
|
56
|
+
if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
|
57
|
+
read(@path, method_name)
|
58
|
+
else
|
59
|
+
OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
|
60
|
+
end
|
61
|
+
|
62
|
+
# recurse or read
|
63
|
+
else
|
64
|
+
|
65
|
+
# prevent traversing out of dir
|
66
|
+
raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
|
67
|
+
|
68
|
+
if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
|
69
|
+
read(@path, method_name)
|
70
|
+
else
|
71
|
+
OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
# special methods
|
79
|
+
|
80
|
+
# returns current working path for obfs
|
81
|
+
def _path
|
82
|
+
@path
|
83
|
+
end
|
84
|
+
|
85
|
+
# returns directory contents in an array
|
86
|
+
def _index
|
87
|
+
Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
|
88
|
+
end
|
89
|
+
|
90
|
+
# searches directory contents (1 level) and returns array sorted by relevance
|
91
|
+
def _find(term = '', records = 1000, tolerance = 50)
|
92
|
+
output = []
|
93
|
+
search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
|
94
|
+
search_space.each do |search_space_term|
|
95
|
+
if OBFS::Levenshtein.distance(search_space_term, term) <= tolerance && OBFS::WhiteSimilarity.similarity(search_space_term, term) > 0.0
|
96
|
+
output << search_space_term
|
97
|
+
end
|
98
|
+
end
|
99
|
+
output.first(records)
|
100
|
+
end
|
101
|
+
|
102
|
+
# searches directory contents (1 level) and returns boolean if term exist
|
103
|
+
def _exist(term = '')
|
104
|
+
exist_space = Dir.entries(@path).reject { |k| k != term.to_s || k == '.' || k == '..' } rescue nil
|
105
|
+
if !exist_space.nil?
|
106
|
+
if exist_space.length > 0
|
107
|
+
true
|
108
|
+
else
|
109
|
+
false
|
110
|
+
end
|
111
|
+
else
|
112
|
+
false
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
# filesystem R/W
|
119
|
+
|
120
|
+
def write(path, filename, data)
|
121
|
+
curr_path = File.join path, filename
|
122
|
+
File.write(curr_path, JSON.unparse(data))
|
123
|
+
end
|
124
|
+
|
125
|
+
def read(path, filename)
|
126
|
+
curr_path = File.join path, filename
|
127
|
+
JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
#
|
2
|
+
# Credits to threedaymonk
|
3
|
+
# https://github.com/threedaymonk/text/blob/master/lib/text/levenshtein.rb
|
4
|
+
#
|
5
|
+
# Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
|
6
|
+
#
|
7
|
+
# The Levenshtein distance is a measure of how similar two strings s and t are,
|
8
|
+
# calculated as the number of deletions/insertions/substitutions needed to
|
9
|
+
# transform s into t. The greater the distance, the more the strings differ.
|
10
|
+
#
|
11
|
+
# The Levenshtein distance is also sometimes referred to as the
|
12
|
+
# easier-to-pronounce-and-spell 'edit distance'.
|
13
|
+
#
|
14
|
+
# Author: Paul Battley (pbattley@gmail.com)
|
15
|
+
#
|
16
|
+
|
17
|
+
module OBFS # :nodoc:
|
18
|
+
|
19
|
+
module Levenshtein
|
20
|
+
|
21
|
+
# Calculate the Levenshtein distance between two strings +str1+ and +str2+.
|
22
|
+
#
|
23
|
+
# The optional argument max_distance can reduce the number of iterations by
|
24
|
+
# stopping if the Levenshtein distance exceeds this value. This increases
|
25
|
+
# performance where it is only necessary to compare the distance with a
|
26
|
+
# reference value instead of calculating the exact distance.
|
27
|
+
#
|
28
|
+
# The distance is calculated in terms of Unicode codepoints. Be aware that
|
29
|
+
# this algorithm does not perform normalisation: if there is a possibility
|
30
|
+
# of different normalised forms being used, normalisation should be performed
|
31
|
+
# beforehand.
|
32
|
+
#
|
33
|
+
def distance(str1, str2, max_distance = nil)
|
34
|
+
if max_distance
|
35
|
+
distance_with_maximum(str1, str2, max_distance)
|
36
|
+
else
|
37
|
+
distance_without_maximum(str1, str2)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def distance_with_maximum(str1, str2, max_distance) # :nodoc:
|
44
|
+
s = str1.encode(Encoding::UTF_8).unpack("U*")
|
45
|
+
t = str2.encode(Encoding::UTF_8).unpack("U*")
|
46
|
+
|
47
|
+
n = s.length
|
48
|
+
m = t.length
|
49
|
+
big_int = n * m
|
50
|
+
|
51
|
+
# Swap if necessary so that s is always the shorter of the two strings
|
52
|
+
s, t, n, m = t, s, m, n if m < n
|
53
|
+
|
54
|
+
# If the length difference is already greater than the max_distance, then
|
55
|
+
# there is nothing else to check
|
56
|
+
if (n - m).abs >= max_distance
|
57
|
+
return max_distance
|
58
|
+
end
|
59
|
+
|
60
|
+
return 0 if s == t
|
61
|
+
return m if n.zero?
|
62
|
+
return n if m.zero?
|
63
|
+
|
64
|
+
# The values necessary for our threshold are written; the ones after must
|
65
|
+
# be filled with large integers since the tailing member of the threshold
|
66
|
+
# window in the bottom array will run min across them
|
67
|
+
d = (m + 1).times.map { |i|
|
68
|
+
if i < m || i < max_distance + 1
|
69
|
+
i
|
70
|
+
else
|
71
|
+
big_int
|
72
|
+
end
|
73
|
+
}
|
74
|
+
x = nil
|
75
|
+
e = nil
|
76
|
+
|
77
|
+
n.times do |i|
|
78
|
+
# Since we're reusing arrays, we need to be sure to wipe the value left
|
79
|
+
# of the starting index; we don't have to worry about the value above the
|
80
|
+
# ending index as the arrays were initially filled with large integers
|
81
|
+
# and we progress to the right
|
82
|
+
if e.nil?
|
83
|
+
e = i + 1
|
84
|
+
else
|
85
|
+
e = big_int
|
86
|
+
end
|
87
|
+
|
88
|
+
diag_index = t.length - s.length + i
|
89
|
+
|
90
|
+
# If max_distance was specified, we can reduce second loop. So we set
|
91
|
+
# up our threshold window.
|
92
|
+
# See:
|
93
|
+
# Gusfield, Dan (1997). Algorithms on strings, trees, and sequences:
|
94
|
+
# computer science and computational biology.
|
95
|
+
# Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
|
96
|
+
# pp. 263–264.
|
97
|
+
min = i - max_distance - 1
|
98
|
+
min = 0 if min < 0
|
99
|
+
max = i + max_distance
|
100
|
+
max = m - 1 if max > m - 1
|
101
|
+
|
102
|
+
min.upto(max) do |j|
|
103
|
+
# If the diagonal value is already greater than the max_distance
|
104
|
+
# then we can safety return: the diagonal will never go lower again.
|
105
|
+
# See: http://www.levenshtein.net/
|
106
|
+
if j == diag_index && d[j] >= max_distance
|
107
|
+
return max_distance
|
108
|
+
end
|
109
|
+
|
110
|
+
cost = s[i] == t[j] ? 0 : 1
|
111
|
+
insertion = d[j + 1] + 1
|
112
|
+
deletion = e + 1
|
113
|
+
substitution = d[j] + cost
|
114
|
+
x = insertion < deletion ? insertion : deletion
|
115
|
+
x = substitution if substitution < x
|
116
|
+
|
117
|
+
d[j] = e
|
118
|
+
e = x
|
119
|
+
end
|
120
|
+
d[m] = x
|
121
|
+
end
|
122
|
+
|
123
|
+
if x > max_distance
|
124
|
+
return max_distance
|
125
|
+
else
|
126
|
+
return x
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def distance_without_maximum(str1, str2) # :nodoc:
|
131
|
+
s = str1.encode(Encoding::UTF_8).unpack("U*")
|
132
|
+
t = str2.encode(Encoding::UTF_8).unpack("U*")
|
133
|
+
|
134
|
+
n = s.length
|
135
|
+
m = t.length
|
136
|
+
|
137
|
+
return m if n.zero?
|
138
|
+
return n if m.zero?
|
139
|
+
|
140
|
+
d = (0..m).to_a
|
141
|
+
x = nil
|
142
|
+
|
143
|
+
n.times do |i|
|
144
|
+
e = i + 1
|
145
|
+
m.times do |j|
|
146
|
+
cost = s[i] == t[j] ? 0 : 1
|
147
|
+
insertion = d[j + 1] + 1
|
148
|
+
deletion = e + 1
|
149
|
+
substitution = d[j] + cost
|
150
|
+
x = insertion < deletion ? insertion : deletion
|
151
|
+
x = substitution if substitution < x
|
152
|
+
|
153
|
+
d[j] = e
|
154
|
+
e = x
|
155
|
+
end
|
156
|
+
d[m] = x
|
157
|
+
end
|
158
|
+
|
159
|
+
return x
|
160
|
+
end
|
161
|
+
|
162
|
+
extend self
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#
|
2
|
+
# Credits to threedaymonk
|
3
|
+
# https://github.com/threedaymonk/text/blob/master/lib/text/white_similarity.rb
|
4
|
+
#
|
5
|
+
# encoding: utf-8
|
6
|
+
# Original author: Wilker Lúcio <wilkerlucio@gmail.com>
|
7
|
+
|
8
|
+
module OBFS
|
9
|
+
|
10
|
+
# Ruby implementation of the string similarity described by Simon White
|
11
|
+
# at: http://www.catalysoft.com/articles/StrikeAMatch.html
|
12
|
+
#
|
13
|
+
# 2 * |pairs(s1) INTERSECT pairs(s2)|
|
14
|
+
# similarity(s1, s2) = -----------------------------------
|
15
|
+
# |pairs(s1)| + |pairs(s2)|
|
16
|
+
#
|
17
|
+
# e.g.
|
18
|
+
# 2 * |{FR, NC}|
|
19
|
+
# similarity(FRANCE, FRENCH) = ---------------------------------------
|
20
|
+
# |{FR,RA,AN,NC,CE}| + |{FR,RE,EN,NC,CH}|
|
21
|
+
#
|
22
|
+
# = (2 * 2) / (5 + 5)
|
23
|
+
#
|
24
|
+
# = 0.4
|
25
|
+
#
|
26
|
+
# WhiteSimilarity.new.similarity("FRANCE", "FRENCH")
|
27
|
+
#
|
28
|
+
class WhiteSimilarity
|
29
|
+
|
30
|
+
def self.similarity(str1, str2)
|
31
|
+
new.similarity(str1, str2)
|
32
|
+
end
|
33
|
+
|
34
|
+
def initialize
|
35
|
+
@word_letter_pairs = {}
|
36
|
+
end
|
37
|
+
|
38
|
+
def similarity(str1, str2)
|
39
|
+
pairs1 = word_letter_pairs(str1)
|
40
|
+
pairs2 = word_letter_pairs(str2).dup
|
41
|
+
|
42
|
+
union = pairs1.length + pairs2.length
|
43
|
+
|
44
|
+
intersection = 0
|
45
|
+
pairs1.each do |pair1|
|
46
|
+
if index = pairs2.index(pair1)
|
47
|
+
intersection += 1
|
48
|
+
pairs2.delete_at(index)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
(2.0 * intersection) / union
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def word_letter_pairs(str)
|
58
|
+
@word_letter_pairs[str] ||=
|
59
|
+
str.upcase.split(/\s+/).map{ |word|
|
60
|
+
(0 ... (word.length - 1)).map { |i| word[i, 2] }
|
61
|
+
}.flatten.freeze
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: obfs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jensel Gatchalian
|
@@ -17,6 +17,9 @@ extensions: []
|
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
19
|
- lib/obfs.rb
|
20
|
+
- lib/obfs/store.rb
|
21
|
+
- lib/text/levenshtein.rb
|
22
|
+
- lib/text/white_similarity.rb
|
20
23
|
homepage: https://github.com/jenselg/obfs-ruby
|
21
24
|
licenses:
|
22
25
|
- MIT
|