middle_squid 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +674 -0
- data/README.md +227 -0
- data/Rakefile +7 -0
- data/bin/middle_squid +7 -0
- data/lib/middle_squid/actions.rb +77 -0
- data/lib/middle_squid/adapter.rb +54 -0
- data/lib/middle_squid/adapters/squid.rb +57 -0
- data/lib/middle_squid/backends/keyboard.rb +31 -0
- data/lib/middle_squid/backends/thin.rb +14 -0
- data/lib/middle_squid/blacklist.rb +67 -0
- data/lib/middle_squid/builder.rb +159 -0
- data/lib/middle_squid/cli.rb +119 -0
- data/lib/middle_squid/core_ext/hash.rb +29 -0
- data/lib/middle_squid/database.rb +47 -0
- data/lib/middle_squid/exceptions.rb +4 -0
- data/lib/middle_squid/helpers.rb +74 -0
- data/lib/middle_squid/indexer.rb +194 -0
- data/lib/middle_squid/runner.rb +37 -0
- data/lib/middle_squid/server.rb +84 -0
- data/lib/middle_squid/uri.rb +31 -0
- data/lib/middle_squid/version.rb +3 -0
- data/lib/middle_squid.rb +46 -0
- data/middle_squid.gemspec +37 -0
- data/middle_squid_wrapper.sh +4 -0
- data/test/helper.rb +26 -0
- data/test/resources/backslash/cat/list +1 -0
- data/test/resources/black/ads/domains +2 -0
- data/test/resources/black/ads/urls +1 -0
- data/test/resources/black/tracker/domains +2 -0
- data/test/resources/black/tracker/urls +2 -0
- data/test/resources/copy_of_duplicates/cat/copy_of_list +2 -0
- data/test/resources/copy_of_duplicates/cat/list +2 -0
- data/test/resources/copy_of_duplicates/copy_of_cat/copy_of_list +2 -0
- data/test/resources/copy_of_duplicates/copy_of_cat/list +2 -0
- data/test/resources/duplicates/cat/copy_of_list +2 -0
- data/test/resources/duplicates/cat/list +2 -0
- data/test/resources/duplicates/copy_of_cat/copy_of_list +2 -0
- data/test/resources/duplicates/copy_of_cat/list +2 -0
- data/test/resources/empty/cat/emptylist +0 -0
- data/test/resources/empty_path/cat/list +1 -0
- data/test/resources/expressions/cat/list +3 -0
- data/test/resources/gray/isp/domains +2 -0
- data/test/resources/gray/isp/urls +1 -0
- data/test/resources/gray/news/domains +2 -0
- data/test/resources/hello.rb +2 -0
- data/test/resources/invalid_byte/cat/list +1 -0
- data/test/resources/mixed/cat/list +2 -0
- data/test/resources/subdirectory/cat/ignore/.gitkeep +0 -0
- data/test/resources/trailing_space/cat/list +2 -0
- data/test/test_actions.rb +76 -0
- data/test/test_adapter.rb +61 -0
- data/test/test_blacklist.rb +189 -0
- data/test/test_builder.rb +89 -0
- data/test/test_cli.rb +105 -0
- data/test/test_database.rb +20 -0
- data/test/test_hash.rb +28 -0
- data/test/test_helper.rb +76 -0
- data/test/test_indexer.rb +457 -0
- data/test/test_keyboard.rb +79 -0
- data/test/test_runner.rb +56 -0
- data/test/test_server.rb +86 -0
- data/test/test_squid.rb +110 -0
- data/test/test_thin.rb +7 -0
- data/test/test_uri.rb +69 -0
- metadata +363 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
# Small DSL to configure MiddleSquid.
|
3
|
+
#
|
4
|
+
# @example
|
5
|
+
# database '/home/proxy/blacklist.db'
|
6
|
+
#
|
7
|
+
# adv = blacklist 'adv', aliases: ['ads']
|
8
|
+
# tracker = blacklist 'tracker'
|
9
|
+
#
|
10
|
+
# run lambda {|uri, extras|
|
11
|
+
# if adv.include? uri
|
12
|
+
# redirect_to 'http://your.webserver/block_pages/advertising.html'
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# if tracker.include? uri
|
16
|
+
# redirect_to 'http://your.webserver/block_pages/tracker.html'
|
17
|
+
# end
|
18
|
+
# }
|
19
|
+
class Builder
|
20
|
+
# Returns the blacklists registered by {#blacklist}.
|
21
|
+
#
|
22
|
+
# @return [Array<BlackList>]
|
23
|
+
attr_reader :blacklists
|
24
|
+
|
25
|
+
# Returns the custom actions created by {#define_action}.
|
26
|
+
#
|
27
|
+
# @return [Hash<Symbol, Proc>]
|
28
|
+
attr_reader :custom_actions
|
29
|
+
|
30
|
+
# Returns the object passed to {#run}.
|
31
|
+
#
|
32
|
+
# @return [#call]
|
33
|
+
attr_reader :handler
|
34
|
+
|
35
|
+
# Returns the adapter selected by {#use}.
|
36
|
+
#
|
37
|
+
# @!attribute [r] adapter
|
38
|
+
# @return [Adapter]
|
39
|
+
def adapter
|
40
|
+
@adapter ||= Adapters::Squid.new
|
41
|
+
end
|
42
|
+
|
43
|
+
def initialize
|
44
|
+
@blacklists = []
|
45
|
+
@custom_actions = {}
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Builder]
|
49
|
+
def self.from_file(file)
|
50
|
+
obj = self.new
|
51
|
+
content = File.read file
|
52
|
+
|
53
|
+
obj.instance_eval content, file
|
54
|
+
obj
|
55
|
+
end
|
56
|
+
|
57
|
+
# Select the active adapter.
|
58
|
+
# By default {Adapters::Squid} with no options will be used.
|
59
|
+
#
|
60
|
+
# @example Squid in concurrency mode
|
61
|
+
# use Adapters::Squid, concurrency: true
|
62
|
+
# @param adapter [Class]
|
63
|
+
# @param options [Hash] adapter configuration
|
64
|
+
# @return [Adapter]
|
65
|
+
# @raise [ArgumentError] if the adapter is not a subclass of {Adapter}
|
66
|
+
# @see Adapters List of predefined adapters
|
67
|
+
def use(adapter, **options)
|
68
|
+
raise ArgumentError, 'Not an adapter.' unless adapter < Adapter
|
69
|
+
|
70
|
+
@adapter = adapter.new(options)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Setup the blacklist database.
|
74
|
+
# It will be created if the file does not exists.
|
75
|
+
# Read/write access is required.
|
76
|
+
#
|
77
|
+
# Run {CLI#index middle_squid index} to add your blacklists to the database.
|
78
|
+
#
|
79
|
+
# @example
|
80
|
+
# database '/home/proxy/blacklist.db'
|
81
|
+
#
|
82
|
+
# run lambda {|uri, extras| }
|
83
|
+
# @param path [String] path to the SQLite database
|
84
|
+
def database(path)
|
85
|
+
Database.setup path
|
86
|
+
end
|
87
|
+
|
88
|
+
# Returns a new registered blacklist instance.
|
89
|
+
#
|
90
|
+
# @note You need to call {#database} in order to use the blacklists.
|
91
|
+
# @example Block advertising
|
92
|
+
# adv = blacklist 'adv'
|
93
|
+
#
|
94
|
+
# run lambda {|uri, extras|
|
95
|
+
# do_something if adv.include? uri
|
96
|
+
# }
|
97
|
+
# @example Group blacklists
|
98
|
+
# adv = blacklist 'adv'
|
99
|
+
# tracker = blacklist 'tracker'
|
100
|
+
#
|
101
|
+
# group = [adv, tracker]
|
102
|
+
#
|
103
|
+
# run lambda {|uri, extras|
|
104
|
+
# do_something if group.any? {|bl| bl.include? uri }
|
105
|
+
# }
|
106
|
+
# @example Create an alias
|
107
|
+
# adv = blacklist 'adv', aliases: ['ads']
|
108
|
+
#
|
109
|
+
# run lambda {|uri, extras|
|
110
|
+
# do_something if adv.include? uri
|
111
|
+
# }
|
112
|
+
# @return [BlackList]
|
113
|
+
# @see BlackList#initialize BlackList#initialize
|
114
|
+
def blacklist(*args)
|
115
|
+
bl = BlackList.new *args
|
116
|
+
@blacklists << bl
|
117
|
+
bl
|
118
|
+
end
|
119
|
+
|
120
|
+
# Register a custom action or helper.
|
121
|
+
#
|
122
|
+
# @example Don't Repeat Yourself
|
123
|
+
# define_action :block do
|
124
|
+
# redirect_to 'http://goodsite.com/'
|
125
|
+
# end
|
126
|
+
#
|
127
|
+
# run lambda {|uri, extras|
|
128
|
+
# block if uri.host == 'badsite.com'
|
129
|
+
# # ...
|
130
|
+
# block if uri.host == 'terriblesite.com'
|
131
|
+
# }
|
132
|
+
# @param name [Symbol] method name
|
133
|
+
# @param block [Proc] method body
|
134
|
+
# @see Actions List of predefined actions
|
135
|
+
# @see Helpers List of predefined helpers
|
136
|
+
def define_action(name, &block)
|
137
|
+
raise ArgumentError, 'no block given' unless block_given?
|
138
|
+
|
139
|
+
@custom_actions[name] = block
|
140
|
+
end
|
141
|
+
alias :define_helper :define_action
|
142
|
+
|
143
|
+
# Takes any object that responds to the +call+ method with two arguments:
|
144
|
+
# the URI to process and an array of extra data.
|
145
|
+
#
|
146
|
+
# @example
|
147
|
+
# run lambda {|uri, extras|
|
148
|
+
# # executed when the adapter has received a query from an underlying software (eg. Squid)
|
149
|
+
# }
|
150
|
+
# @param handler [#call<URI, Array>]
|
151
|
+
# @raise [ArgumentError] if the handler does not respond to +#call+
|
152
|
+
# @see Runner Execution context (Runner)
|
153
|
+
def run(handler)
|
154
|
+
raise ArgumentError, 'the handler must respond to #call' unless handler.respond_to? :call
|
155
|
+
|
156
|
+
@handler = handler
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
class CLI < Thor
|
3
|
+
package_name 'MiddleSquid'
|
4
|
+
|
5
|
+
class_option :'config-file',
|
6
|
+
aliases: '-C',
|
7
|
+
default: '~/middle_squid.rb',
|
8
|
+
desc: 'configuration file'
|
9
|
+
|
10
|
+
# <START>
|
11
|
+
desc 'start', 'Run the given configuration file'
|
12
|
+
|
13
|
+
# Runs the given configuration file.
|
14
|
+
#
|
15
|
+
# @example
|
16
|
+
# middle_squid start -C middle_squid_config.rb
|
17
|
+
def start
|
18
|
+
config_file = File.expand_path options[:'config-file']
|
19
|
+
|
20
|
+
builder = Builder.from_file config_file
|
21
|
+
|
22
|
+
MiddleSquid::Runner.new builder
|
23
|
+
end
|
24
|
+
# </START>
|
25
|
+
|
26
|
+
# <INDEX>
|
27
|
+
option :append, type: :boolean, default: false, aliases: '-a',
|
28
|
+
desc: 'keep the entries already in the database'
|
29
|
+
|
30
|
+
option :domains, type: :boolean, default: true,
|
31
|
+
desc: 'index domain lists'
|
32
|
+
|
33
|
+
option :full, type: :boolean, default: false,
|
34
|
+
desc: 'index all blacklist categories'
|
35
|
+
|
36
|
+
option :quiet, type: :boolean, default: false, aliases: '-q',
|
37
|
+
desc: 'disable status output'
|
38
|
+
|
39
|
+
option :urls, type: :boolean, default: true,
|
40
|
+
desc: 'index urls lists'
|
41
|
+
|
42
|
+
desc 'index SOURCES...', 'Populate the database from one or more blacklists'
|
43
|
+
|
44
|
+
# Populates the database from one or more blacklists.
|
45
|
+
#
|
46
|
+
# *Flags:*
|
47
|
+
#
|
48
|
+
# [\-a, \--append, \--no-append]
|
49
|
+
# Whether to keep the entries already in the database.
|
50
|
+
#
|
51
|
+
# [\--domains, \--no-domains]
|
52
|
+
# Whether to index domain lists.
|
53
|
+
#
|
54
|
+
# <b>Enabled by default.</b>
|
55
|
+
#
|
56
|
+
# [\--full, \--no-full]
|
57
|
+
# Whether to index all blacklist categories.
|
58
|
+
# By default MiddleSquid will only read the categories registered
|
59
|
+
# using {Builder#blacklist} in the configuration script.
|
60
|
+
#
|
61
|
+
# Enable if you want to reuse the same database in multiple configurations
|
62
|
+
# set to use different blacklist categories and you need to index everything.
|
63
|
+
#
|
64
|
+
# [\-q, \--quiet, \--no-quiet]
|
65
|
+
# Whether to disable status output.
|
66
|
+
#
|
67
|
+
# [\--urls, \--no-urls]
|
68
|
+
# Whether to index url lists.
|
69
|
+
#
|
70
|
+
# <b>Enabled by default.</b>
|
71
|
+
#
|
72
|
+
# @note {Builder#database} must be called in your configuration script in order to initialize the database.
|
73
|
+
# @example
|
74
|
+
# middle_squid index ShallaBlackList -C middle_squid_config.rb
|
75
|
+
def index(*directories)
|
76
|
+
config_file = File.expand_path options[:'config-file']
|
77
|
+
directories.map! {|rel| File.expand_path rel }
|
78
|
+
|
79
|
+
builder = Builder.from_file config_file
|
80
|
+
|
81
|
+
entries = []
|
82
|
+
entries << :url if options[:urls]
|
83
|
+
entries << :domain if options[:domains]
|
84
|
+
|
85
|
+
indexer = MiddleSquid::Indexer.new
|
86
|
+
indexer.blacklists = builder.blacklists
|
87
|
+
|
88
|
+
indexer.append = options[:append]
|
89
|
+
indexer.entries = entries
|
90
|
+
indexer.full_index = options[:full]
|
91
|
+
indexer.quiet = options[:quiet]
|
92
|
+
|
93
|
+
indexer.index directories
|
94
|
+
end
|
95
|
+
# </INDEX>
|
96
|
+
|
97
|
+
# <VERSION>
|
98
|
+
desc 'version', 'Show current version and copyright notice'
|
99
|
+
|
100
|
+
# Shows MiddleSquid's version and copyright notice.
|
101
|
+
def version
|
102
|
+
puts "MiddleSquid #{MiddleSquid::VERSION}"
|
103
|
+
puts <<GPL
|
104
|
+
Copyright (C) 2014 by Christian Fillion
|
105
|
+
|
106
|
+
This program is free software; you can redistribute it and/or
|
107
|
+
modify it under the terms of the GNU General Public License as
|
108
|
+
published by the Free Software Foundation, either version 3 of
|
109
|
+
the License, or (at your option) any later version.
|
110
|
+
|
111
|
+
This program is distributed in the hope that it will be useful,
|
112
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
113
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
114
|
+
General Public License for more details.
|
115
|
+
GPL
|
116
|
+
end
|
117
|
+
# </VERSION>
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class Hash
|
2
|
+
IGNORED_HEADERS = [
|
3
|
+
'Connection',
|
4
|
+
'Content-Encoding',
|
5
|
+
'Content-Length',
|
6
|
+
'Host',
|
7
|
+
'Transfer-Encoding',
|
8
|
+
'Version',
|
9
|
+
].freeze
|
10
|
+
|
11
|
+
DASH = '-'.freeze
|
12
|
+
UNDERSCORE = '_'.freeze
|
13
|
+
|
14
|
+
# @return [Hash]
|
15
|
+
def sanitize_headers!
|
16
|
+
clean = {}
|
17
|
+
each {|key, value|
|
18
|
+
key = key.tr UNDERSCORE, DASH
|
19
|
+
key = key.split(DASH).map(&:capitalize).join(DASH)
|
20
|
+
|
21
|
+
next if IGNORED_HEADERS.include? key
|
22
|
+
|
23
|
+
clean[key] = value
|
24
|
+
}
|
25
|
+
|
26
|
+
clear
|
27
|
+
merge! clean
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module MiddleSquid::Database
|
2
|
+
@@db = nil
|
3
|
+
|
4
|
+
# Setup the database. Use {Builder#database} instead.
|
5
|
+
#
|
6
|
+
# @param path [String]
|
7
|
+
def self.setup(path)
|
8
|
+
@@db.close if @@db
|
9
|
+
|
10
|
+
@@db = SQLite3::Database.new path
|
11
|
+
|
12
|
+
@@db.execute <<-SQL
|
13
|
+
CREATE TABLE IF NOT EXISTS domains (
|
14
|
+
category TEXT, host TEXT
|
15
|
+
)
|
16
|
+
SQL
|
17
|
+
|
18
|
+
@@db.execute <<-SQL
|
19
|
+
CREATE UNIQUE INDEX IF NOT EXISTS unique_domains ON domains (
|
20
|
+
category, host
|
21
|
+
)
|
22
|
+
SQL
|
23
|
+
|
24
|
+
@@db.execute <<-SQL
|
25
|
+
CREATE TABLE IF NOT EXISTS urls (
|
26
|
+
category TEXT, host TEXT, path TEXT
|
27
|
+
)
|
28
|
+
SQL
|
29
|
+
|
30
|
+
@@db.execute <<-SQL
|
31
|
+
CREATE UNIQUE INDEX IF NOT EXISTS unique_urls ON urls (
|
32
|
+
category, host, path
|
33
|
+
)
|
34
|
+
SQL
|
35
|
+
|
36
|
+
# minimize downtime due to locks when the database is rebuilding
|
37
|
+
# see http://www.sqlite.org/wal.html
|
38
|
+
@@db.execute 'PRAGMA journal_mode=WAL'
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [SQLite3::Database]
|
42
|
+
def db
|
43
|
+
raise "The database is not initialized. Did you call Builder#database in your configuration file?" unless @@db
|
44
|
+
|
45
|
+
@@db
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module MiddleSquid::Helpers
|
2
|
+
#
|
3
|
+
# @!group Predefined Helpers
|
4
|
+
#
|
5
|
+
|
6
|
+
# Download a resource with the same headers and body as a rack request.
|
7
|
+
#
|
8
|
+
# @note
|
9
|
+
# This method must be called inside an active fiber. {Actions#intercept} does it automatically.
|
10
|
+
# @example Transparent Proxying
|
11
|
+
# run lambda {|uri, extras|
|
12
|
+
# # you should use 'accept' instead of doing this
|
13
|
+
# intercept {|req, res|
|
14
|
+
# download_like req, uri
|
15
|
+
# }
|
16
|
+
# }
|
17
|
+
# @example Body Modification
|
18
|
+
# run lambda {|uri, extras|
|
19
|
+
# intercept {|req, res|
|
20
|
+
# status, headers, body = download_like req, uri
|
21
|
+
#
|
22
|
+
# content_type = headers['Content-Type'].to_s
|
23
|
+
#
|
24
|
+
# if content_type.include? 'text/html'
|
25
|
+
# body.gsub! 'green', 'blue'
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# [status, headers, body]
|
29
|
+
# }
|
30
|
+
# }
|
31
|
+
# @param request [Rack::Request] the request to imitate
|
32
|
+
# @param uri [URI] the resource to fetch
|
33
|
+
# @return [Array] a rack triplet (status code, response headers and body)
|
34
|
+
# @see Actions#intercept
|
35
|
+
def download_like(request, uri)
|
36
|
+
fiber = Fiber.current
|
37
|
+
|
38
|
+
method = request.request_method.downcase.to_sym
|
39
|
+
|
40
|
+
headers = {'Content-Type' => request.env['CONTENT_TYPE']}
|
41
|
+
request.env.
|
42
|
+
select {|k| k.start_with? 'HTTP_' }.
|
43
|
+
each {|key, val| headers[key[5..-1]] = val }
|
44
|
+
|
45
|
+
headers.sanitize_headers!
|
46
|
+
|
47
|
+
options = {
|
48
|
+
:head => headers,
|
49
|
+
:body => request.body.read,
|
50
|
+
}
|
51
|
+
|
52
|
+
http = EM::HttpRequest.new(uri.to_s).send method, options
|
53
|
+
|
54
|
+
http.callback {
|
55
|
+
status = http.response_header.status
|
56
|
+
headers = http.response_header
|
57
|
+
body = http.response
|
58
|
+
|
59
|
+
headers.sanitize_headers!
|
60
|
+
|
61
|
+
fiber.resume [status, headers, body]
|
62
|
+
}
|
63
|
+
|
64
|
+
http.errback {
|
65
|
+
fiber.resume [520, {}, "[MiddleSquid] #{http.error}"]
|
66
|
+
}
|
67
|
+
|
68
|
+
Fiber.yield
|
69
|
+
end
|
70
|
+
|
71
|
+
#
|
72
|
+
# @!endgroup
|
73
|
+
#
|
74
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
# Used internally to build the blacklist database.
|
3
|
+
#
|
4
|
+
# @see CLI#index <code>middle_squid index</code> command
|
5
|
+
class Indexer
|
6
|
+
include Database
|
7
|
+
|
8
|
+
# @return [Boolean]
|
9
|
+
attr_accessor :append
|
10
|
+
|
11
|
+
# @return [Array<Symbol>]
|
12
|
+
attr_accessor :entries
|
13
|
+
|
14
|
+
# @return [Boolean]
|
15
|
+
attr_accessor :full_index
|
16
|
+
|
17
|
+
# @return [Boolean]
|
18
|
+
attr_accessor :quiet
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@append = false
|
22
|
+
@entries = [:url, :domain]
|
23
|
+
@full_index = true
|
24
|
+
@quiet = false
|
25
|
+
|
26
|
+
@aliases = {}
|
27
|
+
@cats_in_use = []
|
28
|
+
@indexed_cats = []
|
29
|
+
|
30
|
+
@total = {
|
31
|
+
:url => 0,
|
32
|
+
:domain => 0,
|
33
|
+
:ignored => 0,
|
34
|
+
:duplicate => 0,
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param list [Array<BlackList>]
|
39
|
+
def blacklists=(list)
|
40
|
+
@cats_in_use.clear
|
41
|
+
@aliases.clear
|
42
|
+
|
43
|
+
list.each {|bl|
|
44
|
+
@cats_in_use << bl.category
|
45
|
+
|
46
|
+
bl.aliases.each {|name|
|
47
|
+
@aliases[name] = bl.category
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
@cats_in_use.uniq!
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param directories [Array<String>]
|
55
|
+
def index(directories)
|
56
|
+
if !@full_index && @cats_in_use.empty?
|
57
|
+
warn 'ERROR: the loaded configuration does not use any blacklist'
|
58
|
+
puts 'nothing to do in minimal indexing mode'
|
59
|
+
return
|
60
|
+
end
|
61
|
+
|
62
|
+
start_time = Time.now
|
63
|
+
|
64
|
+
db.transaction
|
65
|
+
|
66
|
+
truncate unless @append
|
67
|
+
directories.each {|dir|
|
68
|
+
walk_in dir
|
69
|
+
}
|
70
|
+
cats_summary
|
71
|
+
stats
|
72
|
+
commit_or_rollback
|
73
|
+
|
74
|
+
end_time = Time.now
|
75
|
+
puts "finished after #{end_time - start_time} seconds"
|
76
|
+
ensure
|
77
|
+
db.rollback if db.transaction_active?
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
def puts(*args)
|
82
|
+
super *args unless @quiet
|
83
|
+
end
|
84
|
+
|
85
|
+
def truncate
|
86
|
+
puts 'truncating database'
|
87
|
+
|
88
|
+
db.execute 'DELETE FROM domains'
|
89
|
+
db.execute 'DELETE FROM urls'
|
90
|
+
end
|
91
|
+
|
92
|
+
def walk_in(directory)
|
93
|
+
puts "reading #{directory}"
|
94
|
+
|
95
|
+
unless File.directory? directory
|
96
|
+
warn "WARNING: #{directory}: no such directory"
|
97
|
+
return
|
98
|
+
end
|
99
|
+
|
100
|
+
files = Dir.glob File.join(directory, '*/*')
|
101
|
+
files.sort! # fixes travis build
|
102
|
+
|
103
|
+
files.each {|file|
|
104
|
+
index_file file
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
def index_file(path)
|
109
|
+
pn = Pathname.new path
|
110
|
+
return unless pn.file?
|
111
|
+
|
112
|
+
dirname = pn.dirname.basename.to_s
|
113
|
+
category = @aliases.has_key?(dirname) \
|
114
|
+
? @aliases[dirname]
|
115
|
+
: dirname
|
116
|
+
|
117
|
+
if !@full_index
|
118
|
+
return unless @cats_in_use.include? category
|
119
|
+
end
|
120
|
+
|
121
|
+
@indexed_cats << category
|
122
|
+
|
123
|
+
puts "indexing #{dirname}/#{pn.basename}"
|
124
|
+
|
125
|
+
File.foreach(path) {|line|
|
126
|
+
type = append_to category, line
|
127
|
+
@total[type] += 1
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def append_to(category, line)
|
132
|
+
# remove trailing whitespace
|
133
|
+
line.strip!
|
134
|
+
|
135
|
+
# ignore regex lists
|
136
|
+
return :ignored unless line[0] =~ /\w/
|
137
|
+
|
138
|
+
# fix invalid bytes
|
139
|
+
line.scrub! ''
|
140
|
+
|
141
|
+
# fix for dirty lists
|
142
|
+
line.tr! '\\', '/'
|
143
|
+
|
144
|
+
uri = MiddleSquid::URI.parse "http://#{line}"
|
145
|
+
host, path = uri.cleanhost, uri.cleanpath
|
146
|
+
|
147
|
+
if path.empty?
|
148
|
+
return :ignored unless @entries.include? :domain
|
149
|
+
|
150
|
+
db.execute 'INSERT INTO domains (category, host) VALUES (?, ?)',
|
151
|
+
[category, host]
|
152
|
+
|
153
|
+
:domain
|
154
|
+
else
|
155
|
+
return :ignored unless @entries.include? :url
|
156
|
+
|
157
|
+
db.execute 'INSERT INTO urls (category, host, path) VALUES (?, ?, ?)',
|
158
|
+
[category, host, path]
|
159
|
+
|
160
|
+
:url
|
161
|
+
end
|
162
|
+
rescue SQLite3::ConstraintException
|
163
|
+
:duplicate
|
164
|
+
end
|
165
|
+
|
166
|
+
def cats_summary
|
167
|
+
@indexed_cats.uniq!
|
168
|
+
missing_cats = @cats_in_use - @indexed_cats
|
169
|
+
|
170
|
+
puts
|
171
|
+
puts "indexed #{@indexed_cats.size} categorie(s): #{@indexed_cats}"
|
172
|
+
warn "WARNING: could not find #{missing_cats}" unless missing_cats.empty?
|
173
|
+
end
|
174
|
+
|
175
|
+
def stats
|
176
|
+
puts "found #{@total[:domain]} domain(s)"
|
177
|
+
puts "found #{@total[:url]} url(s)"
|
178
|
+
puts "found #{@total[:duplicate]} duplicate(s)"
|
179
|
+
puts "found #{@total[:ignored]} ignored expression(s)"
|
180
|
+
puts
|
181
|
+
end
|
182
|
+
|
183
|
+
def commit_or_rollback
|
184
|
+
if @total[:domain] > 0 || @total[:url] > 0
|
185
|
+
puts 'committing changes'
|
186
|
+
db.commit
|
187
|
+
else
|
188
|
+
warn 'ERROR: nothing to commit'
|
189
|
+
puts 'reverting changes'
|
190
|
+
db.rollback
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
# Used internally to start the configured adapter and the internal HTTP server.
|
3
|
+
# The handler passed to {Builder#run} will be called in this context.
|
4
|
+
#
|
5
|
+
# @see Builder Configuration DSL
|
6
|
+
# @see CLI#start <code>middle_squid start</code> command
|
7
|
+
class Runner
|
8
|
+
include Actions
|
9
|
+
include Helpers
|
10
|
+
|
11
|
+
# Returns the internal HTTP server.
|
12
|
+
#
|
13
|
+
# @return [Server]
|
14
|
+
attr_reader :server
|
15
|
+
|
16
|
+
# @raise [Error] if the handler is undefined
|
17
|
+
def initialize(builder)
|
18
|
+
raise Error, 'Invalid handler. Did you call Builder#run in your configuration file?' unless builder.handler
|
19
|
+
|
20
|
+
define_singleton_method :_handler_wrapper, builder.handler
|
21
|
+
|
22
|
+
builder.custom_actions.each {|name, body|
|
23
|
+
define_singleton_method name, body
|
24
|
+
}
|
25
|
+
|
26
|
+
adapter = builder.adapter
|
27
|
+
adapter.handler = method :_handler_wrapper
|
28
|
+
|
29
|
+
@server = Server.new
|
30
|
+
|
31
|
+
EM.run {
|
32
|
+
adapter.start
|
33
|
+
@server.start
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|