obfs 0.0.2 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62ca2bff9fda087f834b3823adac6a193fd419c78e3dd72bdbf37b4b104c0e00
4
- data.tar.gz: 07a819a4eb162a7bcbc43d3310f1a68ed4c6dd35b7cc1a333b69b57d536fc8bd
3
+ metadata.gz: 704cf6c36b527d888d2a755f3b41f56c94a9f121543e8337e7d9b8afb28f32b0
4
+ data.tar.gz: 4a9ed29dd0f313ca5ced039ef1d6900e02d9cede067e5d537596be532a23c57f
5
5
  SHA512:
6
- metadata.gz: df53e0a7fed4b2f6e2b78fabe11ac0e40acc055128eff51119b9f615a25fe5bfe61becbac770043422c958d6ecc43fbb11b091fcfca68d91a5adf836d3320675
7
- data.tar.gz: 45765a1f45e9b0969c8597003ca1f4b8e56fcc8bdba79bc5c6a326653b2cd2d682837d3e1d7e928226a40f2d32bef3426c2ccb7569bf2bfc3378907843c346af
6
+ metadata.gz: c2eddd3da38dc73e10715018286734ce0170b17ae73b6e459aa9faa9ca1ac94e27e4a3420423ff3b3a47fc84307d0f1f3caac2ad673bda405968cfe379f97f81
7
+ data.tar.gz: 3f2a4e93c277de2b09e449d3a2b05a5676e854ef511d5157cbbd908cec27308714e2ca5a685f9234aa21d648ccf31474396d15a063a780aed8039dded6df7644
@@ -1,119 +1,11 @@
1
- # dependencies
1
+ # core
2
2
  require 'fileutils'
3
3
  require 'json'
4
- require 'text'
4
+ require 'set'
5
5
 
6
- # main
7
- class OBFS
6
+ # obfs
7
+ require 'obfs/store'
8
8
 
9
- def initialize(attributes = {}) # hash argument
10
- @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
11
- end
12
-
13
- # regular methods
14
-
15
- def method_missing(m, *args, &block)
16
-
17
- # normalize
18
- method_name = m.to_s
19
- dataA = args[0]
20
- dataB = args[1]
21
-
22
- # setter call
23
- if method_name.end_with?('=')
24
-
25
- # clean up name
26
- method_name = method_name.gsub('=','')
27
-
28
- # reassign if square bracket notation
29
- if method_name == "[]"
30
- method_name = dataA
31
- data = dataB
32
- else # make sure we load the proper method_name and data
33
- method_name = m.to_s.gsub('=','')
34
- data = args[0]
35
- end
36
-
37
- # write data
38
- if data == nil
39
- FileUtils.rm_rf (File.join @path, method_name)
40
- else
41
- FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
42
- FileUtils.mkpath @path if !File.directory? @path
43
- write(@path, method_name, data)
44
- end
45
-
46
- # bracket notation
47
- elsif method_name == "[]"
48
-
49
- method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
50
- if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
51
- read(@path, method_name)
52
- else
53
- OBFS.new({ path: File.join(@path, method_name.to_s) })
54
- end
55
-
56
- # recurse or read
57
- else
58
-
59
- if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
60
- read(@path, method_name)
61
- else
62
- OBFS.new({ path: File.join(@path, method_name.to_s) })
63
- end
64
-
65
- end
66
-
67
- end
68
-
69
- # special methods
70
-
71
- # returns current working path for obfs
72
- def _path
73
- @path
74
- end
75
-
76
- # returns directory contents in an array
77
- def _index
78
- Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
79
- end
80
-
81
- # searches directory contents (1 level) and returns array sorted by relevance
82
- def _find(term = '', records = 1000, tolerance = 10)
83
- output = []
84
- search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
85
- search_space.each do |search_space_term|
86
- if Text::Levenshtein.distance(search_space_term, term) <= tolerance && Text::WhiteSimilarity.similarity(search_space_term, term) > 0.0
87
- output << search_space_term
88
- end
89
- end
90
- output.first(records)
91
- end
92
-
93
- # searches directory contents (1 level) and returns boolean if term exist
94
- def _exist(term = '')
95
- exist_space = Dir.entries(@path).reject { |k| k != term.to_s }
96
- if exist_space.length > 0
97
- true
98
- else
99
- false
100
- end
101
- end
102
-
103
- private
104
-
105
- # filesystem R/W
106
-
107
- def write(path, filename, data)
108
- Thread.new {
109
- curr_path = File.join path, filename
110
- File.write(curr_path, JSON.unparse(data))
111
- }
112
- end
113
-
114
- def read(path, filename)
115
- curr_path = File.join path, filename
116
- JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
117
- end
118
-
119
- end
9
+ # third party
10
+ require 'text/levenshtein'
11
+ require 'text/white_similarity'
@@ -0,0 +1,132 @@
1
+ module OBFS
2
+
3
+ class Store
4
+
5
+ def initialize(attributes = {}) # hash argument
6
+ @path = (attributes.keys.include? :path) ? attributes[:path] : (File.join(Dir.home, '.obfs'))
7
+ end
8
+
9
+ # regular methods
10
+
11
+ def method_missing(m, *args, &block)
12
+
13
+ # normalize
14
+ method_name = m.to_s
15
+ dataA = args[0]
16
+ dataB = args[1]
17
+
18
+ # prevent traversing out of dir
19
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
20
+
21
+ # setter call
22
+ if method_name.end_with?('=')
23
+
24
+ # clean up name
25
+ method_name = method_name.gsub('=','')
26
+
27
+ # reassign if square bracket notation
28
+ if method_name == "[]"
29
+ method_name = dataA
30
+ data = dataB
31
+ else # make sure we load the proper method_name and data
32
+ method_name = m.to_s.gsub('=','')
33
+ data = args[0]
34
+ end
35
+
36
+ # prevent traversing out of dir
37
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
38
+
39
+ # write data
40
+ if data == nil
41
+ FileUtils.rm_rf (File.join @path, method_name)
42
+ else
43
+ FileUtils.rm_rf (File.join @path, method_name) if File.exist? (File.join @path, method_name)
44
+ FileUtils.mkpath @path if !File.directory? @path
45
+ write(@path, method_name, data)
46
+ end
47
+
48
+ # bracket notation
49
+ elsif method_name == "[]"
50
+
51
+ method_name = dataA.to_s.gsub(/\["/,'').gsub(/"\]/,'')
52
+
53
+ # prevent traversing out of dir
54
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
55
+
56
+ if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
57
+ read(@path, method_name)
58
+ else
59
+ OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
60
+ end
61
+
62
+ # recurse or read
63
+ else
64
+
65
+ # prevent traversing out of dir
66
+ raise "traversal through . and .. not allowed" if ['.', '..'].include? method_name
67
+
68
+ if (!File.directory? File.join(@path, method_name)) && (File.exist? File.join(@path, method_name))
69
+ read(@path, method_name)
70
+ else
71
+ OBFS::Store.new({ path: File.join(@path, method_name.to_s) })
72
+ end
73
+
74
+ end
75
+
76
+ end
77
+
78
+ # special methods
79
+
80
+ # returns current working path for obfs
81
+ def _path
82
+ @path
83
+ end
84
+
85
+ # returns directory contents in an array
86
+ def _index
87
+ Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue nil
88
+ end
89
+
90
+ # searches directory contents (1 level) and returns array sorted by relevance
91
+ def _find(term = '', records = 1000, tolerance = 50)
92
+ output = []
93
+ search_space = Dir.entries(@path).reject { |k| k == '.' || k == '..' } rescue []
94
+ search_space.each do |search_space_term|
95
+ if OBFS::Levenshtein.distance(search_space_term, term) <= tolerance && OBFS::WhiteSimilarity.similarity(search_space_term, term) > 0.0
96
+ output << search_space_term
97
+ end
98
+ end
99
+ output.first(records)
100
+ end
101
+
102
+ # searches directory contents (1 level) and returns boolean if term exist
103
+ def _exist(term = '')
104
+ exist_space = Dir.entries(@path).reject { |k| k != term.to_s || k == '.' || k == '..' } rescue nil
105
+ if !exist_space.nil?
106
+ if exist_space.length > 0
107
+ true
108
+ else
109
+ false
110
+ end
111
+ else
112
+ false
113
+ end
114
+ end
115
+
116
+ private
117
+
118
+ # filesystem R/W
119
+
120
+ def write(path, filename, data)
121
+ curr_path = File.join path, filename
122
+ File.write(curr_path, JSON.unparse(data))
123
+ end
124
+
125
+ def read(path, filename)
126
+ curr_path = File.join path, filename
127
+ JSON.parse(File.open(curr_path).read) rescue File.open(curr_path).read
128
+ end
129
+
130
+ end
131
+
132
+ end
@@ -0,0 +1,166 @@
1
+ #
2
+ # Credits to threedaymonk
3
+ # https://github.com/threedaymonk/text/blob/master/lib/text/levenshtein.rb
4
+ #
5
+ # Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
6
+ #
7
+ # The Levenshtein distance is a measure of how similar two strings s and t are,
8
+ # calculated as the number of deletions/insertions/substitutions needed to
9
+ # transform s into t. The greater the distance, the more the strings differ.
10
+ #
11
+ # The Levenshtein distance is also sometimes referred to as the
12
+ # easier-to-pronounce-and-spell 'edit distance'.
13
+ #
14
+ # Author: Paul Battley (pbattley@gmail.com)
15
+ #
16
+
17
+ module OBFS # :nodoc:
18
+
19
+ module Levenshtein
20
+
21
+ # Calculate the Levenshtein distance between two strings +str1+ and +str2+.
22
+ #
23
+ # The optional argument max_distance can reduce the number of iterations by
24
+ # stopping if the Levenshtein distance exceeds this value. This increases
25
+ # performance where it is only necessary to compare the distance with a
26
+ # reference value instead of calculating the exact distance.
27
+ #
28
+ # The distance is calculated in terms of Unicode codepoints. Be aware that
29
+ # this algorithm does not perform normalisation: if there is a possibility
30
+ # of different normalised forms being used, normalisation should be performed
31
+ # beforehand.
32
+ #
33
+ def distance(str1, str2, max_distance = nil)
34
+ if max_distance
35
+ distance_with_maximum(str1, str2, max_distance)
36
+ else
37
+ distance_without_maximum(str1, str2)
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def distance_with_maximum(str1, str2, max_distance) # :nodoc:
44
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
45
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
46
+
47
+ n = s.length
48
+ m = t.length
49
+ big_int = n * m
50
+
51
+ # Swap if necessary so that s is always the shorter of the two strings
52
+ s, t, n, m = t, s, m, n if m < n
53
+
54
+ # If the length difference is already greater than the max_distance, then
55
+ # there is nothing else to check
56
+ if (n - m).abs >= max_distance
57
+ return max_distance
58
+ end
59
+
60
+ return 0 if s == t
61
+ return m if n.zero?
62
+ return n if m.zero?
63
+
64
+ # The values necessary for our threshold are written; the ones after must
65
+ # be filled with large integers since the tailing member of the threshold
66
+ # window in the bottom array will run min across them
67
+ d = (m + 1).times.map { |i|
68
+ if i < m || i < max_distance + 1
69
+ i
70
+ else
71
+ big_int
72
+ end
73
+ }
74
+ x = nil
75
+ e = nil
76
+
77
+ n.times do |i|
78
+ # Since we're reusing arrays, we need to be sure to wipe the value left
79
+ # of the starting index; we don't have to worry about the value above the
80
+ # ending index as the arrays were initially filled with large integers
81
+ # and we progress to the right
82
+ if e.nil?
83
+ e = i + 1
84
+ else
85
+ e = big_int
86
+ end
87
+
88
+ diag_index = t.length - s.length + i
89
+
90
+ # If max_distance was specified, we can reduce second loop. So we set
91
+ # up our threshold window.
92
+ # See:
93
+ # Gusfield, Dan (1997). Algorithms on strings, trees, and sequences:
94
+ # computer science and computational biology.
95
+ # Cambridge, UK: Cambridge University Press. ISBN 0-521-58519-8.
96
+ # pp. 263–264.
97
+ min = i - max_distance - 1
98
+ min = 0 if min < 0
99
+ max = i + max_distance
100
+ max = m - 1 if max > m - 1
101
+
102
+ min.upto(max) do |j|
103
+ # If the diagonal value is already greater than the max_distance
104
+ # then we can safety return: the diagonal will never go lower again.
105
+ # See: http://www.levenshtein.net/
106
+ if j == diag_index && d[j] >= max_distance
107
+ return max_distance
108
+ end
109
+
110
+ cost = s[i] == t[j] ? 0 : 1
111
+ insertion = d[j + 1] + 1
112
+ deletion = e + 1
113
+ substitution = d[j] + cost
114
+ x = insertion < deletion ? insertion : deletion
115
+ x = substitution if substitution < x
116
+
117
+ d[j] = e
118
+ e = x
119
+ end
120
+ d[m] = x
121
+ end
122
+
123
+ if x > max_distance
124
+ return max_distance
125
+ else
126
+ return x
127
+ end
128
+ end
129
+
130
+ def distance_without_maximum(str1, str2) # :nodoc:
131
+ s = str1.encode(Encoding::UTF_8).unpack("U*")
132
+ t = str2.encode(Encoding::UTF_8).unpack("U*")
133
+
134
+ n = s.length
135
+ m = t.length
136
+
137
+ return m if n.zero?
138
+ return n if m.zero?
139
+
140
+ d = (0..m).to_a
141
+ x = nil
142
+
143
+ n.times do |i|
144
+ e = i + 1
145
+ m.times do |j|
146
+ cost = s[i] == t[j] ? 0 : 1
147
+ insertion = d[j + 1] + 1
148
+ deletion = e + 1
149
+ substitution = d[j] + cost
150
+ x = insertion < deletion ? insertion : deletion
151
+ x = substitution if substitution < x
152
+
153
+ d[j] = e
154
+ e = x
155
+ end
156
+ d[m] = x
157
+ end
158
+
159
+ return x
160
+ end
161
+
162
+ extend self
163
+
164
+ end
165
+
166
+ end
@@ -0,0 +1,66 @@
1
+ #
2
+ # Credits to threedaymonk
3
+ # https://github.com/threedaymonk/text/blob/master/lib/text/white_similarity.rb
4
+ #
5
+ # encoding: utf-8
6
+ # Original author: Wilker Lúcio <wilkerlucio@gmail.com>
7
+
8
+ module OBFS
9
+
10
+ # Ruby implementation of the string similarity described by Simon White
11
+ # at: http://www.catalysoft.com/articles/StrikeAMatch.html
12
+ #
13
+ # 2 * |pairs(s1) INTERSECT pairs(s2)|
14
+ # similarity(s1, s2) = -----------------------------------
15
+ # |pairs(s1)| + |pairs(s2)|
16
+ #
17
+ # e.g.
18
+ # 2 * |{FR, NC}|
19
+ # similarity(FRANCE, FRENCH) = ---------------------------------------
20
+ # |{FR,RA,AN,NC,CE}| + |{FR,RE,EN,NC,CH}|
21
+ #
22
+ # = (2 * 2) / (5 + 5)
23
+ #
24
+ # = 0.4
25
+ #
26
+ # WhiteSimilarity.new.similarity("FRANCE", "FRENCH")
27
+ #
28
+ class WhiteSimilarity
29
+
30
+ def self.similarity(str1, str2)
31
+ new.similarity(str1, str2)
32
+ end
33
+
34
+ def initialize
35
+ @word_letter_pairs = {}
36
+ end
37
+
38
+ def similarity(str1, str2)
39
+ pairs1 = word_letter_pairs(str1)
40
+ pairs2 = word_letter_pairs(str2).dup
41
+
42
+ union = pairs1.length + pairs2.length
43
+
44
+ intersection = 0
45
+ pairs1.each do |pair1|
46
+ if index = pairs2.index(pair1)
47
+ intersection += 1
48
+ pairs2.delete_at(index)
49
+ end
50
+ end
51
+
52
+ (2.0 * intersection) / union
53
+ end
54
+
55
+ private
56
+
57
+ def word_letter_pairs(str)
58
+ @word_letter_pairs[str] ||=
59
+ str.upcase.split(/\s+/).map{ |word|
60
+ (0 ... (word.length - 1)).map { |i| word[i, 2] }
61
+ }.flatten.freeze
62
+ end
63
+
64
+ end
65
+
66
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: obfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jensel Gatchalian
@@ -17,6 +17,9 @@ extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
19
  - lib/obfs.rb
20
+ - lib/obfs/store.rb
21
+ - lib/text/levenshtein.rb
22
+ - lib/text/white_similarity.rb
20
23
  homepage: https://github.com/jenselg/obfs-ruby
21
24
  licenses:
22
25
  - MIT