middle_squid 1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +674 -0
- data/README.md +227 -0
- data/Rakefile +7 -0
- data/bin/middle_squid +7 -0
- data/lib/middle_squid/actions.rb +77 -0
- data/lib/middle_squid/adapter.rb +54 -0
- data/lib/middle_squid/adapters/squid.rb +57 -0
- data/lib/middle_squid/backends/keyboard.rb +31 -0
- data/lib/middle_squid/backends/thin.rb +14 -0
- data/lib/middle_squid/blacklist.rb +67 -0
- data/lib/middle_squid/builder.rb +159 -0
- data/lib/middle_squid/cli.rb +119 -0
- data/lib/middle_squid/core_ext/hash.rb +29 -0
- data/lib/middle_squid/database.rb +47 -0
- data/lib/middle_squid/exceptions.rb +4 -0
- data/lib/middle_squid/helpers.rb +74 -0
- data/lib/middle_squid/indexer.rb +194 -0
- data/lib/middle_squid/runner.rb +37 -0
- data/lib/middle_squid/server.rb +84 -0
- data/lib/middle_squid/uri.rb +31 -0
- data/lib/middle_squid/version.rb +3 -0
- data/lib/middle_squid.rb +46 -0
- data/middle_squid.gemspec +37 -0
- data/middle_squid_wrapper.sh +4 -0
- data/test/helper.rb +26 -0
- data/test/resources/backslash/cat/list +1 -0
- data/test/resources/black/ads/domains +2 -0
- data/test/resources/black/ads/urls +1 -0
- data/test/resources/black/tracker/domains +2 -0
- data/test/resources/black/tracker/urls +2 -0
- data/test/resources/copy_of_duplicates/cat/copy_of_list +2 -0
- data/test/resources/copy_of_duplicates/cat/list +2 -0
- data/test/resources/copy_of_duplicates/copy_of_cat/copy_of_list +2 -0
- data/test/resources/copy_of_duplicates/copy_of_cat/list +2 -0
- data/test/resources/duplicates/cat/copy_of_list +2 -0
- data/test/resources/duplicates/cat/list +2 -0
- data/test/resources/duplicates/copy_of_cat/copy_of_list +2 -0
- data/test/resources/duplicates/copy_of_cat/list +2 -0
- data/test/resources/empty/cat/emptylist +0 -0
- data/test/resources/empty_path/cat/list +1 -0
- data/test/resources/expressions/cat/list +3 -0
- data/test/resources/gray/isp/domains +2 -0
- data/test/resources/gray/isp/urls +1 -0
- data/test/resources/gray/news/domains +2 -0
- data/test/resources/hello.rb +2 -0
- data/test/resources/invalid_byte/cat/list +1 -0
- data/test/resources/mixed/cat/list +2 -0
- data/test/resources/subdirectory/cat/ignore/.gitkeep +0 -0
- data/test/resources/trailing_space/cat/list +2 -0
- data/test/test_actions.rb +76 -0
- data/test/test_adapter.rb +61 -0
- data/test/test_blacklist.rb +189 -0
- data/test/test_builder.rb +89 -0
- data/test/test_cli.rb +105 -0
- data/test/test_database.rb +20 -0
- data/test/test_hash.rb +28 -0
- data/test/test_helper.rb +76 -0
- data/test/test_indexer.rb +457 -0
- data/test/test_keyboard.rb +79 -0
- data/test/test_runner.rb +56 -0
- data/test/test_server.rb +86 -0
- data/test/test_squid.rb +110 -0
- data/test/test_thin.rb +7 -0
- data/test/test_uri.rb +69 -0
- metadata +363 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
# Small DSL to configure MiddleSquid.
|
3
|
+
#
|
4
|
+
# @example
|
5
|
+
# database '/home/proxy/blacklist.db'
|
6
|
+
#
|
7
|
+
# adv = blacklist 'adv', aliases: ['ads']
|
8
|
+
# tracker = blacklist 'tracker'
|
9
|
+
#
|
10
|
+
# run lambda {|uri, extras|
|
11
|
+
# if adv.include? uri
|
12
|
+
# redirect_to 'http://your.webserver/block_pages/advertising.html'
|
13
|
+
# end
|
14
|
+
#
|
15
|
+
# if tracker.include? uri
|
16
|
+
# redirect_to 'http://your.webserver/block_pages/tracker.html'
|
17
|
+
# end
|
18
|
+
# }
|
19
|
+
class Builder
|
20
|
+
# Returns the blacklists registered by {#blacklist}.
|
21
|
+
#
|
22
|
+
# @return [Array<BlackList>]
|
23
|
+
attr_reader :blacklists
|
24
|
+
|
25
|
+
# Returns the custom actions created by {#define_action}.
|
26
|
+
#
|
27
|
+
# @return [Hash<Symbol, Proc>]
|
28
|
+
attr_reader :custom_actions
|
29
|
+
|
30
|
+
# Returns the object passed to {#run}.
|
31
|
+
#
|
32
|
+
# @return [#call]
|
33
|
+
attr_reader :handler
|
34
|
+
|
35
|
+
# Returns the adapter selected by {#use}.
|
36
|
+
#
|
37
|
+
# @!attribute [r] adapter
|
38
|
+
# @return [Adapter]
|
39
|
+
def adapter
|
40
|
+
@adapter ||= Adapters::Squid.new
|
41
|
+
end
|
42
|
+
|
43
|
+
def initialize
|
44
|
+
@blacklists = []
|
45
|
+
@custom_actions = {}
|
46
|
+
end
|
47
|
+
|
48
|
+
# @return [Builder]
|
49
|
+
def self.from_file(file)
|
50
|
+
obj = self.new
|
51
|
+
content = File.read file
|
52
|
+
|
53
|
+
obj.instance_eval content, file
|
54
|
+
obj
|
55
|
+
end
|
56
|
+
|
57
|
+
# Select the active adapter.
|
58
|
+
# By default {Adapters::Squid} with no options will be used.
|
59
|
+
#
|
60
|
+
# @example Squid in concurrency mode
|
61
|
+
# use Adapters::Squid, concurrency: true
|
62
|
+
# @param adapter [Class]
|
63
|
+
# @param options [Hash] adapter configuration
|
64
|
+
# @return [Adapter]
|
65
|
+
# @raise [ArgumentError] if the adapter is not a subclass of {Adapter}
|
66
|
+
# @see Adapters List of predefined adapters
|
67
|
+
def use(adapter, **options)
|
68
|
+
raise ArgumentError, 'Not an adapter.' unless adapter < Adapter
|
69
|
+
|
70
|
+
@adapter = adapter.new(options)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Setup the blacklist database.
|
74
|
+
# It will be created if the file does not exists.
|
75
|
+
# Read/write access is required.
|
76
|
+
#
|
77
|
+
# Run {CLI#index middle_squid index} to add your blacklists to the database.
|
78
|
+
#
|
79
|
+
# @example
|
80
|
+
# database '/home/proxy/blacklist.db'
|
81
|
+
#
|
82
|
+
# run lambda {|uri, extras| }
|
83
|
+
# @param path [String] path to the SQLite database
|
84
|
+
def database(path)
|
85
|
+
Database.setup path
|
86
|
+
end
|
87
|
+
|
88
|
+
# Returns a new registered blacklist instance.
|
89
|
+
#
|
90
|
+
# @note You need to call {#database} in order to use the blacklists.
|
91
|
+
# @example Block advertising
|
92
|
+
# adv = blacklist 'adv'
|
93
|
+
#
|
94
|
+
# run lambda {|uri, extras|
|
95
|
+
# do_something if adv.include? uri
|
96
|
+
# }
|
97
|
+
# @example Group blacklists
|
98
|
+
# adv = blacklist 'adv'
|
99
|
+
# tracker = blacklist 'tracker'
|
100
|
+
#
|
101
|
+
# group = [adv, tracker]
|
102
|
+
#
|
103
|
+
# run lambda {|uri, extras|
|
104
|
+
# do_something if group.any? {|bl| bl.include? uri }
|
105
|
+
# }
|
106
|
+
# @example Create an alias
|
107
|
+
# adv = blacklist 'adv', aliases: ['ads']
|
108
|
+
#
|
109
|
+
# run lambda {|uri, extras|
|
110
|
+
# do_something if adv.include? uri
|
111
|
+
# }
|
112
|
+
# @return [BlackList]
|
113
|
+
# @see BlackList#initialize BlackList#initialize
|
114
|
+
def blacklist(*args)
|
115
|
+
bl = BlackList.new *args
|
116
|
+
@blacklists << bl
|
117
|
+
bl
|
118
|
+
end
|
119
|
+
|
120
|
+
# Register a custom action or helper.
|
121
|
+
#
|
122
|
+
# @example Don't Repeat Yourself
|
123
|
+
# define_action :block do
|
124
|
+
# redirect_to 'http://goodsite.com/'
|
125
|
+
# end
|
126
|
+
#
|
127
|
+
# run lambda {|uri, extras|
|
128
|
+
# block if uri.host == 'badsite.com'
|
129
|
+
# # ...
|
130
|
+
# block if uri.host == 'terriblesite.com'
|
131
|
+
# }
|
132
|
+
# @param name [Symbol] method name
|
133
|
+
# @param block [Proc] method body
|
134
|
+
# @see Actions List of predefined actions
|
135
|
+
# @see Helpers List of predefined helpers
|
136
|
+
def define_action(name, &block)
|
137
|
+
raise ArgumentError, 'no block given' unless block_given?
|
138
|
+
|
139
|
+
@custom_actions[name] = block
|
140
|
+
end
|
141
|
+
alias :define_helper :define_action
|
142
|
+
|
143
|
+
# Takes any object that responds to the +call+ method with two arguments:
|
144
|
+
# the URI to process and an array of extra data.
|
145
|
+
#
|
146
|
+
# @example
|
147
|
+
# run lambda {|uri, extras|
|
148
|
+
# # executed when the adapter has received a query from an underlying software (eg. Squid)
|
149
|
+
# }
|
150
|
+
# @param handler [#call<URI, Array>]
|
151
|
+
# @raise [ArgumentError] if the handler does not respond to +#call+
|
152
|
+
# @see Runner Execution context (Runner)
|
153
|
+
def run(handler)
|
154
|
+
raise ArgumentError, 'the handler must respond to #call' unless handler.respond_to? :call
|
155
|
+
|
156
|
+
@handler = handler
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
class CLI < Thor
|
3
|
+
package_name 'MiddleSquid'
|
4
|
+
|
5
|
+
class_option :'config-file',
|
6
|
+
aliases: '-C',
|
7
|
+
default: '~/middle_squid.rb',
|
8
|
+
desc: 'configuration file'
|
9
|
+
|
10
|
+
# <START>
|
11
|
+
desc 'start', 'Run the given configuration file'
|
12
|
+
|
13
|
+
# Runs the given configuration file.
|
14
|
+
#
|
15
|
+
# @example
|
16
|
+
# middle_squid start -C middle_squid_config.rb
|
17
|
+
def start
|
18
|
+
config_file = File.expand_path options[:'config-file']
|
19
|
+
|
20
|
+
builder = Builder.from_file config_file
|
21
|
+
|
22
|
+
MiddleSquid::Runner.new builder
|
23
|
+
end
|
24
|
+
# </START>
|
25
|
+
|
26
|
+
# <INDEX>
|
27
|
+
option :append, type: :boolean, default: false, aliases: '-a',
|
28
|
+
desc: 'keep the entries already in the database'
|
29
|
+
|
30
|
+
option :domains, type: :boolean, default: true,
|
31
|
+
desc: 'index domain lists'
|
32
|
+
|
33
|
+
option :full, type: :boolean, default: false,
|
34
|
+
desc: 'index all blacklist categories'
|
35
|
+
|
36
|
+
option :quiet, type: :boolean, default: false, aliases: '-q',
|
37
|
+
desc: 'disable status output'
|
38
|
+
|
39
|
+
option :urls, type: :boolean, default: true,
|
40
|
+
desc: 'index urls lists'
|
41
|
+
|
42
|
+
desc 'index SOURCES...', 'Populate the database from one or more blacklists'
|
43
|
+
|
44
|
+
# Populates the database from one or more blacklists.
|
45
|
+
#
|
46
|
+
# *Flags:*
|
47
|
+
#
|
48
|
+
# [\-a, \--append, \--no-append]
|
49
|
+
# Whether to keep the entries already in the database.
|
50
|
+
#
|
51
|
+
# [\--domains, \--no-domains]
|
52
|
+
# Whether to index domain lists.
|
53
|
+
#
|
54
|
+
# <b>Enabled by default.</b>
|
55
|
+
#
|
56
|
+
# [\--full, \--no-full]
|
57
|
+
# Whether to index all blacklist categories.
|
58
|
+
# By default MiddleSquid will only read the categories registered
|
59
|
+
# using {Builder#blacklist} in the configuration script.
|
60
|
+
#
|
61
|
+
# Enable if you want to reuse the same database in multiple configurations
|
62
|
+
# set to use different blacklist categories and you need to index everything.
|
63
|
+
#
|
64
|
+
# [\-q, \--quiet, \--no-quiet]
|
65
|
+
# Whether to disable status output.
|
66
|
+
#
|
67
|
+
# [\--urls, \--no-urls]
|
68
|
+
# Whether to index url lists.
|
69
|
+
#
|
70
|
+
# <b>Enabled by default.</b>
|
71
|
+
#
|
72
|
+
# @note {Builder#database} must be called in your configuration script in order to initialize the database.
|
73
|
+
# @example
|
74
|
+
# middle_squid index ShallaBlackList -C middle_squid_config.rb
|
75
|
+
def index(*directories)
|
76
|
+
config_file = File.expand_path options[:'config-file']
|
77
|
+
directories.map! {|rel| File.expand_path rel }
|
78
|
+
|
79
|
+
builder = Builder.from_file config_file
|
80
|
+
|
81
|
+
entries = []
|
82
|
+
entries << :url if options[:urls]
|
83
|
+
entries << :domain if options[:domains]
|
84
|
+
|
85
|
+
indexer = MiddleSquid::Indexer.new
|
86
|
+
indexer.blacklists = builder.blacklists
|
87
|
+
|
88
|
+
indexer.append = options[:append]
|
89
|
+
indexer.entries = entries
|
90
|
+
indexer.full_index = options[:full]
|
91
|
+
indexer.quiet = options[:quiet]
|
92
|
+
|
93
|
+
indexer.index directories
|
94
|
+
end
|
95
|
+
# </INDEX>
|
96
|
+
|
97
|
+
# <VERSION>
|
98
|
+
desc 'version', 'Show current version and copyright notice'
|
99
|
+
|
100
|
+
# Shows MiddleSquid's version and copyright notice.
|
101
|
+
def version
|
102
|
+
puts "MiddleSquid #{MiddleSquid::VERSION}"
|
103
|
+
puts <<GPL
|
104
|
+
Copyright (C) 2014 by Christian Fillion
|
105
|
+
|
106
|
+
This program is free software; you can redistribute it and/or
|
107
|
+
modify it under the terms of the GNU General Public License as
|
108
|
+
published by the Free Software Foundation, either version 3 of
|
109
|
+
the License, or (at your option) any later version.
|
110
|
+
|
111
|
+
This program is distributed in the hope that it will be useful,
|
112
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
113
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
114
|
+
General Public License for more details.
|
115
|
+
GPL
|
116
|
+
end
|
117
|
+
# </VERSION>
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class Hash
|
2
|
+
IGNORED_HEADERS = [
|
3
|
+
'Connection',
|
4
|
+
'Content-Encoding',
|
5
|
+
'Content-Length',
|
6
|
+
'Host',
|
7
|
+
'Transfer-Encoding',
|
8
|
+
'Version',
|
9
|
+
].freeze
|
10
|
+
|
11
|
+
DASH = '-'.freeze
|
12
|
+
UNDERSCORE = '_'.freeze
|
13
|
+
|
14
|
+
# @return [Hash]
|
15
|
+
def sanitize_headers!
|
16
|
+
clean = {}
|
17
|
+
each {|key, value|
|
18
|
+
key = key.tr UNDERSCORE, DASH
|
19
|
+
key = key.split(DASH).map(&:capitalize).join(DASH)
|
20
|
+
|
21
|
+
next if IGNORED_HEADERS.include? key
|
22
|
+
|
23
|
+
clean[key] = value
|
24
|
+
}
|
25
|
+
|
26
|
+
clear
|
27
|
+
merge! clean
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module MiddleSquid::Database
|
2
|
+
@@db = nil
|
3
|
+
|
4
|
+
# Setup the database. Use {Builder#database} instead.
|
5
|
+
#
|
6
|
+
# @param path [String]
|
7
|
+
def self.setup(path)
|
8
|
+
@@db.close if @@db
|
9
|
+
|
10
|
+
@@db = SQLite3::Database.new path
|
11
|
+
|
12
|
+
@@db.execute <<-SQL
|
13
|
+
CREATE TABLE IF NOT EXISTS domains (
|
14
|
+
category TEXT, host TEXT
|
15
|
+
)
|
16
|
+
SQL
|
17
|
+
|
18
|
+
@@db.execute <<-SQL
|
19
|
+
CREATE UNIQUE INDEX IF NOT EXISTS unique_domains ON domains (
|
20
|
+
category, host
|
21
|
+
)
|
22
|
+
SQL
|
23
|
+
|
24
|
+
@@db.execute <<-SQL
|
25
|
+
CREATE TABLE IF NOT EXISTS urls (
|
26
|
+
category TEXT, host TEXT, path TEXT
|
27
|
+
)
|
28
|
+
SQL
|
29
|
+
|
30
|
+
@@db.execute <<-SQL
|
31
|
+
CREATE UNIQUE INDEX IF NOT EXISTS unique_urls ON urls (
|
32
|
+
category, host, path
|
33
|
+
)
|
34
|
+
SQL
|
35
|
+
|
36
|
+
# minimize downtime due to locks when the database is rebuilding
|
37
|
+
# see http://www.sqlite.org/wal.html
|
38
|
+
@@db.execute 'PRAGMA journal_mode=WAL'
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [SQLite3::Database]
|
42
|
+
def db
|
43
|
+
raise "The database is not initialized. Did you call Builder#database in your configuration file?" unless @@db
|
44
|
+
|
45
|
+
@@db
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module MiddleSquid::Helpers
|
2
|
+
#
|
3
|
+
# @!group Predefined Helpers
|
4
|
+
#
|
5
|
+
|
6
|
+
# Download a resource with the same headers and body as a rack request.
|
7
|
+
#
|
8
|
+
# @note
|
9
|
+
# This method must be called inside an active fiber. {Actions#intercept} does it automatically.
|
10
|
+
# @example Transparent Proxying
|
11
|
+
# run lambda {|uri, extras|
|
12
|
+
# # you should use 'accept' instead of doing this
|
13
|
+
# intercept {|req, res|
|
14
|
+
# download_like req, uri
|
15
|
+
# }
|
16
|
+
# }
|
17
|
+
# @example Body Modification
|
18
|
+
# run lambda {|uri, extras|
|
19
|
+
# intercept {|req, res|
|
20
|
+
# status, headers, body = download_like req, uri
|
21
|
+
#
|
22
|
+
# content_type = headers['Content-Type'].to_s
|
23
|
+
#
|
24
|
+
# if content_type.include? 'text/html'
|
25
|
+
# body.gsub! 'green', 'blue'
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# [status, headers, body]
|
29
|
+
# }
|
30
|
+
# }
|
31
|
+
# @param request [Rack::Request] the request to imitate
|
32
|
+
# @param uri [URI] the resource to fetch
|
33
|
+
# @return [Array] a rack triplet (status code, response headers and body)
|
34
|
+
# @see Actions#intercept
|
35
|
+
def download_like(request, uri)
|
36
|
+
fiber = Fiber.current
|
37
|
+
|
38
|
+
method = request.request_method.downcase.to_sym
|
39
|
+
|
40
|
+
headers = {'Content-Type' => request.env['CONTENT_TYPE']}
|
41
|
+
request.env.
|
42
|
+
select {|k| k.start_with? 'HTTP_' }.
|
43
|
+
each {|key, val| headers[key[5..-1]] = val }
|
44
|
+
|
45
|
+
headers.sanitize_headers!
|
46
|
+
|
47
|
+
options = {
|
48
|
+
:head => headers,
|
49
|
+
:body => request.body.read,
|
50
|
+
}
|
51
|
+
|
52
|
+
http = EM::HttpRequest.new(uri.to_s).send method, options
|
53
|
+
|
54
|
+
http.callback {
|
55
|
+
status = http.response_header.status
|
56
|
+
headers = http.response_header
|
57
|
+
body = http.response
|
58
|
+
|
59
|
+
headers.sanitize_headers!
|
60
|
+
|
61
|
+
fiber.resume [status, headers, body]
|
62
|
+
}
|
63
|
+
|
64
|
+
http.errback {
|
65
|
+
fiber.resume [520, {}, "[MiddleSquid] #{http.error}"]
|
66
|
+
}
|
67
|
+
|
68
|
+
Fiber.yield
|
69
|
+
end
|
70
|
+
|
71
|
+
#
|
72
|
+
# @!endgroup
|
73
|
+
#
|
74
|
+
end
|
@@ -0,0 +1,194 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
# Used internally to build the blacklist database.
|
3
|
+
#
|
4
|
+
# @see CLI#index <code>middle_squid index</code> command
|
5
|
+
class Indexer
|
6
|
+
include Database
|
7
|
+
|
8
|
+
# @return [Boolean]
|
9
|
+
attr_accessor :append
|
10
|
+
|
11
|
+
# @return [Array<Symbol>]
|
12
|
+
attr_accessor :entries
|
13
|
+
|
14
|
+
# @return [Boolean]
|
15
|
+
attr_accessor :full_index
|
16
|
+
|
17
|
+
# @return [Boolean]
|
18
|
+
attr_accessor :quiet
|
19
|
+
|
20
|
+
def initialize
|
21
|
+
@append = false
|
22
|
+
@entries = [:url, :domain]
|
23
|
+
@full_index = true
|
24
|
+
@quiet = false
|
25
|
+
|
26
|
+
@aliases = {}
|
27
|
+
@cats_in_use = []
|
28
|
+
@indexed_cats = []
|
29
|
+
|
30
|
+
@total = {
|
31
|
+
:url => 0,
|
32
|
+
:domain => 0,
|
33
|
+
:ignored => 0,
|
34
|
+
:duplicate => 0,
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
# @param list [Array<BlackList>]
|
39
|
+
def blacklists=(list)
|
40
|
+
@cats_in_use.clear
|
41
|
+
@aliases.clear
|
42
|
+
|
43
|
+
list.each {|bl|
|
44
|
+
@cats_in_use << bl.category
|
45
|
+
|
46
|
+
bl.aliases.each {|name|
|
47
|
+
@aliases[name] = bl.category
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
@cats_in_use.uniq!
|
52
|
+
end
|
53
|
+
|
54
|
+
# @param directories [Array<String>]
|
55
|
+
def index(directories)
|
56
|
+
if !@full_index && @cats_in_use.empty?
|
57
|
+
warn 'ERROR: the loaded configuration does not use any blacklist'
|
58
|
+
puts 'nothing to do in minimal indexing mode'
|
59
|
+
return
|
60
|
+
end
|
61
|
+
|
62
|
+
start_time = Time.now
|
63
|
+
|
64
|
+
db.transaction
|
65
|
+
|
66
|
+
truncate unless @append
|
67
|
+
directories.each {|dir|
|
68
|
+
walk_in dir
|
69
|
+
}
|
70
|
+
cats_summary
|
71
|
+
stats
|
72
|
+
commit_or_rollback
|
73
|
+
|
74
|
+
end_time = Time.now
|
75
|
+
puts "finished after #{end_time - start_time} seconds"
|
76
|
+
ensure
|
77
|
+
db.rollback if db.transaction_active?
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
def puts(*args)
|
82
|
+
super *args unless @quiet
|
83
|
+
end
|
84
|
+
|
85
|
+
def truncate
|
86
|
+
puts 'truncating database'
|
87
|
+
|
88
|
+
db.execute 'DELETE FROM domains'
|
89
|
+
db.execute 'DELETE FROM urls'
|
90
|
+
end
|
91
|
+
|
92
|
+
def walk_in(directory)
|
93
|
+
puts "reading #{directory}"
|
94
|
+
|
95
|
+
unless File.directory? directory
|
96
|
+
warn "WARNING: #{directory}: no such directory"
|
97
|
+
return
|
98
|
+
end
|
99
|
+
|
100
|
+
files = Dir.glob File.join(directory, '*/*')
|
101
|
+
files.sort! # fixes travis build
|
102
|
+
|
103
|
+
files.each {|file|
|
104
|
+
index_file file
|
105
|
+
}
|
106
|
+
end
|
107
|
+
|
108
|
+
def index_file(path)
|
109
|
+
pn = Pathname.new path
|
110
|
+
return unless pn.file?
|
111
|
+
|
112
|
+
dirname = pn.dirname.basename.to_s
|
113
|
+
category = @aliases.has_key?(dirname) \
|
114
|
+
? @aliases[dirname]
|
115
|
+
: dirname
|
116
|
+
|
117
|
+
if !@full_index
|
118
|
+
return unless @cats_in_use.include? category
|
119
|
+
end
|
120
|
+
|
121
|
+
@indexed_cats << category
|
122
|
+
|
123
|
+
puts "indexing #{dirname}/#{pn.basename}"
|
124
|
+
|
125
|
+
File.foreach(path) {|line|
|
126
|
+
type = append_to category, line
|
127
|
+
@total[type] += 1
|
128
|
+
}
|
129
|
+
end
|
130
|
+
|
131
|
+
def append_to(category, line)
|
132
|
+
# remove trailing whitespace
|
133
|
+
line.strip!
|
134
|
+
|
135
|
+
# ignore regex lists
|
136
|
+
return :ignored unless line[0] =~ /\w/
|
137
|
+
|
138
|
+
# fix invalid bytes
|
139
|
+
line.scrub! ''
|
140
|
+
|
141
|
+
# fix for dirty lists
|
142
|
+
line.tr! '\\', '/'
|
143
|
+
|
144
|
+
uri = MiddleSquid::URI.parse "http://#{line}"
|
145
|
+
host, path = uri.cleanhost, uri.cleanpath
|
146
|
+
|
147
|
+
if path.empty?
|
148
|
+
return :ignored unless @entries.include? :domain
|
149
|
+
|
150
|
+
db.execute 'INSERT INTO domains (category, host) VALUES (?, ?)',
|
151
|
+
[category, host]
|
152
|
+
|
153
|
+
:domain
|
154
|
+
else
|
155
|
+
return :ignored unless @entries.include? :url
|
156
|
+
|
157
|
+
db.execute 'INSERT INTO urls (category, host, path) VALUES (?, ?, ?)',
|
158
|
+
[category, host, path]
|
159
|
+
|
160
|
+
:url
|
161
|
+
end
|
162
|
+
rescue SQLite3::ConstraintException
|
163
|
+
:duplicate
|
164
|
+
end
|
165
|
+
|
166
|
+
def cats_summary
|
167
|
+
@indexed_cats.uniq!
|
168
|
+
missing_cats = @cats_in_use - @indexed_cats
|
169
|
+
|
170
|
+
puts
|
171
|
+
puts "indexed #{@indexed_cats.size} categorie(s): #{@indexed_cats}"
|
172
|
+
warn "WARNING: could not find #{missing_cats}" unless missing_cats.empty?
|
173
|
+
end
|
174
|
+
|
175
|
+
def stats
|
176
|
+
puts "found #{@total[:domain]} domain(s)"
|
177
|
+
puts "found #{@total[:url]} url(s)"
|
178
|
+
puts "found #{@total[:duplicate]} duplicate(s)"
|
179
|
+
puts "found #{@total[:ignored]} ignored expression(s)"
|
180
|
+
puts
|
181
|
+
end
|
182
|
+
|
183
|
+
def commit_or_rollback
|
184
|
+
if @total[:domain] > 0 || @total[:url] > 0
|
185
|
+
puts 'committing changes'
|
186
|
+
db.commit
|
187
|
+
else
|
188
|
+
warn 'ERROR: nothing to commit'
|
189
|
+
puts 'reverting changes'
|
190
|
+
db.rollback
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module MiddleSquid
|
2
|
+
# Used internally to start the configured adapter and the internal HTTP server.
|
3
|
+
# The handler passed to {Builder#run} will be called in this context.
|
4
|
+
#
|
5
|
+
# @see Builder Configuration DSL
|
6
|
+
# @see CLI#start <code>middle_squid start</code> command
|
7
|
+
class Runner
|
8
|
+
include Actions
|
9
|
+
include Helpers
|
10
|
+
|
11
|
+
# Returns the internal HTTP server.
|
12
|
+
#
|
13
|
+
# @return [Server]
|
14
|
+
attr_reader :server
|
15
|
+
|
16
|
+
# @raise [Error] if the handler is undefined
|
17
|
+
def initialize(builder)
|
18
|
+
raise Error, 'Invalid handler. Did you call Builder#run in your configuration file?' unless builder.handler
|
19
|
+
|
20
|
+
define_singleton_method :_handler_wrapper, builder.handler
|
21
|
+
|
22
|
+
builder.custom_actions.each {|name, body|
|
23
|
+
define_singleton_method name, body
|
24
|
+
}
|
25
|
+
|
26
|
+
adapter = builder.adapter
|
27
|
+
adapter.handler = method :_handler_wrapper
|
28
|
+
|
29
|
+
@server = Server.new
|
30
|
+
|
31
|
+
EM.run {
|
32
|
+
adapter.start
|
33
|
+
@server.start
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|