enf 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile.lock +56 -0
- data/README.md +112 -0
- data/enf.gemspec +26 -0
- data/lib/enf/elephant.rb +55 -0
- data/lib/enf/version.rb +14 -0
- data/lib/enf.rb +4 -0
- metadata +50 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6e55d0099cadb7696261d7142c429fab7af91d4a
|
4
|
+
data.tar.gz: 85fd5160859456781784d49ceed188f4b26ed9f0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 679913e249b668fa54d3da5303a9acb87aaa2e7444c9beaf50c0b35ea4f599b02e428caab10b548d56c0edd393b9901b195f85832f84de12333c975f24255ee0
|
7
|
+
data.tar.gz: fe80390060f3cb8d6af9b843a116780bf8f996bb2f7bc01283d736db29eb37acfe6fc74b0eb684ea32e2c473b0ffe37ce5ce097acd596150a0b428d5397d0f63
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ast (2.0.0)
|
5
|
+
astrolabe (1.3.0)
|
6
|
+
parser (>= 2.2.0.pre.3, < 3.0)
|
7
|
+
benchmark-ips (2.1.1)
|
8
|
+
byebug (4.0.5)
|
9
|
+
columnize (= 0.9.0)
|
10
|
+
coderay (1.1.0)
|
11
|
+
columnize (0.9.0)
|
12
|
+
diff-lcs (1.2.5)
|
13
|
+
memory_profiler (0.9.4)
|
14
|
+
method_source (0.8.2)
|
15
|
+
parser (2.2.2.5)
|
16
|
+
ast (>= 1.1, < 3.0)
|
17
|
+
powerpack (0.1.1)
|
18
|
+
pry (0.10.1)
|
19
|
+
coderay (~> 1.1.0)
|
20
|
+
method_source (~> 0.8.1)
|
21
|
+
slop (~> 3.4)
|
22
|
+
pry-byebug (3.1.0)
|
23
|
+
byebug (~> 4.0)
|
24
|
+
pry (~> 0.10)
|
25
|
+
rainbow (2.0.0)
|
26
|
+
rspec (3.3.0)
|
27
|
+
rspec-core (~> 3.3.0)
|
28
|
+
rspec-expectations (~> 3.3.0)
|
29
|
+
rspec-mocks (~> 3.3.0)
|
30
|
+
rspec-core (3.3.1)
|
31
|
+
rspec-support (~> 3.3.0)
|
32
|
+
rspec-expectations (3.3.0)
|
33
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
34
|
+
rspec-support (~> 3.3.0)
|
35
|
+
rspec-mocks (3.3.1)
|
36
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
+
rspec-support (~> 3.3.0)
|
38
|
+
rspec-support (3.3.0)
|
39
|
+
rubocop (0.32.1)
|
40
|
+
astrolabe (~> 1.3)
|
41
|
+
parser (>= 2.2.2.5, < 3.0)
|
42
|
+
powerpack (~> 0.1)
|
43
|
+
rainbow (>= 1.99.1, < 3.0)
|
44
|
+
ruby-progressbar (~> 1.4)
|
45
|
+
ruby-progressbar (1.7.5)
|
46
|
+
slop (3.6.0)
|
47
|
+
|
48
|
+
PLATFORMS
|
49
|
+
ruby
|
50
|
+
|
51
|
+
DEPENDENCIES
|
52
|
+
benchmark-ips
|
53
|
+
memory_profiler
|
54
|
+
pry-byebug
|
55
|
+
rspec (~> 3.3)
|
56
|
+
rubocop
|
data/README.md
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
# Elephants never forget !
|
2
|
+
|
3
|
+
Memory lightweight implementation of a white/black list
|
4
|
+
|
5
|
+
## Examples
|
6
|
+
|
7
|
+
*Building a dictionnary of all terms used in 'Les misérables'*
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
require 'open-uri'
|
11
|
+
URI = 'https://www.gutenberg.org/ebooks/135.txt.utf-8'
|
12
|
+
|
13
|
+
elephant = Enf::Elephant.new
|
14
|
+
|
15
|
+
open(URI) do |file|
|
16
|
+
file.read.scan(/[[:alpha:]]*/).each do |token|
|
17
|
+
elephant.register! token.downcase
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
elephant.include? 'bonjour'
|
22
|
+
# => true
|
23
|
+
|
24
|
+
elephant.include? 'megadrive'
|
25
|
+
# => false
|
26
|
+
|
27
|
+
```
|
28
|
+
|
29
|
+
*Building a shared blacklist with a rack app*
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
require 'rack'
|
33
|
+
require 'enf'
|
34
|
+
require 'JSON'
|
35
|
+
|
36
|
+
elephant = Enf::Elephant.new
|
37
|
+
|
38
|
+
app = Proc.new do |env|
|
39
|
+
puts env.inspect
|
40
|
+
path = env.fetch('PATH_INFO')
|
41
|
+
case path
|
42
|
+
when '/'
|
43
|
+
['200', {'Content-Type' => 'text/html'}, ['A sample elephant black list rack app']]
|
44
|
+
when /^\/add\//
|
45
|
+
elephant.register!(token = path[5..-1])
|
46
|
+
['200', {'Content-Type' => 'text/html'}, ["Registered '#{token}'"]]
|
47
|
+
when /^\/know\//
|
48
|
+
result = elephant.include?(token = path[6..-1])
|
49
|
+
['200', {'Content-Type' => 'text/json'}, [{ token => result }.to_json]]
|
50
|
+
else
|
51
|
+
['404', {'Content-Type' => 'text/html'}, ['Learn to talk elephantish']]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
Rack::Handler::WEBrick.run app
|
56
|
+
```
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
## Note on implementation
|
61
|
+
|
62
|
+
Instead of storing all listed values like follow:
|
63
|
+
```ruby
|
64
|
+
{
|
65
|
+
"foo" => true,
|
66
|
+
"bar" => true,
|
67
|
+
"foobar" => true,
|
68
|
+
# ...
|
69
|
+
"foobob" => true
|
70
|
+
}
|
71
|
+
```
|
72
|
+
|
73
|
+
Those are stored in several nested hashes like follow:
|
74
|
+
|
75
|
+
```ruby
|
76
|
+
{
|
77
|
+
"f" => {
|
78
|
+
"o" => {
|
79
|
+
"o" => { # leave
|
80
|
+
"b" => {
|
81
|
+
"a" => "r", # leave
|
82
|
+
"o" => "b" # leave
|
83
|
+
}
|
84
|
+
}
|
85
|
+
}
|
86
|
+
},
|
87
|
+
"b" => {
|
88
|
+
"a" => "r" # leave
|
89
|
+
}
|
90
|
+
}
|
91
|
+
```
|
92
|
+
|
93
|
+
Avoiding duplication of factorizable content.
|
94
|
+
|
95
|
+
## Note on performances
|
96
|
+
|
97
|
+
Enf::Elephant being sensibly slower than using sets or hashes on small
|
98
|
+
volume of data, it is recommended to use it when memory matters most and
|
99
|
+
when words are easily factorizable (example : as a black list for twitter
|
100
|
+
spam bots).
|
101
|
+
|
102
|
+
## Next ?
|
103
|
+
|
104
|
+
I will probably be working on:
|
105
|
+
|
106
|
+
1. Single path merging: Now, when a single word uses a specific path
|
107
|
+
(example: bar, in the previous example), a hash is created for every
|
108
|
+
letter of that specific path. We have to merge theim as a single element
|
109
|
+
to avoid useless hash creation.
|
110
|
+
2. Completion: With that data structure, it will be quite easy and
|
111
|
+
efficient to propose completion candidates from a start of word.
|
112
|
+
3. tbd
|
data/enf.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
4
|
+
require 'enf/version'
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'enf'
|
7
|
+
s.version = Enf::Version::STRING
|
8
|
+
s.summary = 'Elephants never forget'
|
9
|
+
s.description = 'Graph based white/black list implementation. Your elephant won\'t forget.'
|
10
|
+
s.authors = ['Alexandre Ignjatovic']
|
11
|
+
s.email = 'alexandre.ignjatovic@gmail.com'
|
12
|
+
s.files = `git ls-files`.split($RS).reject do |file|
|
13
|
+
file =~ %r{^(?:
|
14
|
+
spec/.*
|
15
|
+
|.*\.swp
|
16
|
+
|Gemfile
|
17
|
+
|Rakefile
|
18
|
+
|\.rspec
|
19
|
+
|\.gitignore
|
20
|
+
|\.rubocop.yml
|
21
|
+
)$}x
|
22
|
+
end
|
23
|
+
s.require_paths = ['lib']
|
24
|
+
s.homepage = 'https://github.com/bankair/enf'
|
25
|
+
s.license = 'MIT'
|
26
|
+
end
|
data/lib/enf/elephant.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Enf
|
4
|
+
# Represent a node of the graph
|
5
|
+
class Elephant
|
6
|
+
class CannotRegister < RuntimeError; end
|
7
|
+
def initialize(default_leave_value = false)
|
8
|
+
@default_leave_value = default_leave_value
|
9
|
+
@leave = default_leave_value
|
10
|
+
@children = Hash.new { |hash, key| hash[key] = Elephant.new }
|
11
|
+
end
|
12
|
+
|
13
|
+
def register!(element, payload = true)
|
14
|
+
fail CannotRegister if frozen? || invalid?(element)
|
15
|
+
return (@leave = payload) if element.empty?
|
16
|
+
@children[element[0]].register!(element[1..-1])
|
17
|
+
end
|
18
|
+
|
19
|
+
# Null node, used to answer to unknown values
|
20
|
+
class Nope
|
21
|
+
require 'singleton'
|
22
|
+
include Singleton
|
23
|
+
|
24
|
+
def include_impl(_)
|
25
|
+
false
|
26
|
+
end
|
27
|
+
|
28
|
+
def register!(_)
|
29
|
+
fail CannotRegister
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def include?(element)
|
34
|
+
return @default_leave_value if invalid?(element)
|
35
|
+
include_impl element
|
36
|
+
end
|
37
|
+
|
38
|
+
def include_impl(element)
|
39
|
+
return @leave if element.empty?
|
40
|
+
@children.fetch(element[0]) { Nope.instance }.include_impl(element[1..-1])
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
|
45
|
+
require 'set'
|
46
|
+
|
47
|
+
AUTHORIZED_TYPES = Set.new([String, Array]).freeze
|
48
|
+
|
49
|
+
def invalid?(element)
|
50
|
+
return true if element.nil?
|
51
|
+
return true unless AUTHORIZED_TYPES.include? element.class
|
52
|
+
false
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/enf/version.rb
ADDED
data/lib/enf.rb
ADDED
metadata
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: enf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Alexandre Ignjatovic
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-07-15 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Graph based white/black list implementation. Your elephant won't forget.
|
14
|
+
email: alexandre.ignjatovic@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- Gemfile.lock
|
20
|
+
- README.md
|
21
|
+
- enf.gemspec
|
22
|
+
- lib/enf.rb
|
23
|
+
- lib/enf/elephant.rb
|
24
|
+
- lib/enf/version.rb
|
25
|
+
homepage: https://github.com/bankair/enf
|
26
|
+
licenses:
|
27
|
+
- MIT
|
28
|
+
metadata: {}
|
29
|
+
post_install_message:
|
30
|
+
rdoc_options: []
|
31
|
+
require_paths:
|
32
|
+
- lib
|
33
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 2.0.14
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: Elephants never forget
|
49
|
+
test_files: []
|
50
|
+
has_rdoc:
|