middle_squid 1.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8afc13446c5543d835fc77291d1a08a124fc222f
4
- data.tar.gz: 93d0646721e4cf660b75670f71cc18eef08a371a
3
+ metadata.gz: 516bbcd321414bdfd91755211912e7edb9a7533a
4
+ data.tar.gz: ff8669daab5db30a2f25810ae40a13feed93fbd7
5
5
  SHA512:
6
- metadata.gz: 830df42d71fab5086d17f6393ef1a4fdf7d22c8a67a95463439836bc07ed26fafca22772d07e6123d4d7735f5218ae1fc7382930868c28ce4a2d3c4ec497520f
7
- data.tar.gz: 95401e707dbe31ef3430c51fe8e65bbed16ec6555b1a4b142e806dfab53dde70f0c7b2fe9feecadd3de58949db0b4a3d88760d88652b3f27f1cc379d3e284bf5
6
+ metadata.gz: a62589c10bccafeb51c082f53552dd97bb566489c6be203f1eed1b3aff846f4111e4a4a5cc931b16cb502b6228596ef2650f33073fbc2094dac3d99effb16ef7
7
+ data.tar.gz: e473a55bc36ad604a4f1e7efd9c492d42a2ee08971f493484e1028f7ad8f83c1fcd40c1f61647261f4145f0c09f27384b1bfd9ba1b06d940df6323ca09e48466
data/README.md CHANGED
@@ -147,7 +147,7 @@ tar xzf shallalist.tar.gz
147
147
  mv BL ShallaBlackList
148
148
 
149
149
  # Construct the blacklist database
150
- /usr/local/bin/middle_squid_wrapper.sh index ShallaBlackList -C /etc/squid/middle_squid.rb
150
+ /usr/local/bin/middle_squid_wrapper.sh index ShallaBlackList -C /home/proxy/middle_squid_config.rb
151
151
 
152
152
  exit
153
153
  ```
@@ -208,6 +208,13 @@ MiddleSquid's documentation is hosted at
208
208
 
209
209
  ## Changelog
210
210
 
211
+ ### v1.0.1 (2014-11-06)
212
+
213
+ - send download errors as text/plain
214
+ - fix a crash when reading invalid UTF-8 byte sequences
215
+ - cleanup `index`'s output (everything is now sent to stderr)
216
+ - show the indexing progress
217
+
211
218
  ### v1.0 (2014-10-05)
212
219
 
213
220
  First public release.
@@ -29,7 +29,7 @@ module MiddleSquid::Helpers
29
29
  # }
30
30
  # }
31
31
  # @param request [Rack::Request] the request to imitate
32
- # @param uri [URI] the resource to fetch
32
+ # @param uri [#to_s] the resource to fetch
33
33
  # @return [Array] a rack triplet (status code, response headers and body)
34
34
  # @see Actions#intercept
35
35
  def download_like(request, uri)
@@ -62,7 +62,11 @@ module MiddleSquid::Helpers
62
62
  }
63
63
 
64
64
  http.errback {
65
- fiber.resume [520, {}, "[MiddleSquid] #{http.error}"]
65
+ fiber.resume [
66
+ 520,
67
+ {'Content-Type' => 'text/plain'},
68
+ "[MiddleSquid] #{http.error}"
69
+ ]
66
70
  }
67
71
 
68
72
  Fiber.yield
@@ -54,8 +54,8 @@ module MiddleSquid
54
54
  # @param directories [Array<String>]
55
55
  def index(directories)
56
56
  if !@full_index && @cats_in_use.empty?
57
- warn 'ERROR: the loaded configuration does not use any blacklist'
58
- puts 'nothing to do in minimal indexing mode'
57
+ oops 'the loaded configuration does not use any blacklist'
58
+ info 'nothing to do in minimal indexing mode'
59
59
  return
60
60
  end
61
61
 
@@ -72,28 +72,41 @@ module MiddleSquid
72
72
  commit_or_rollback
73
73
 
74
74
  end_time = Time.now
75
- puts "finished after #{end_time - start_time} seconds"
75
+ info "finished after #{end_time - start_time} seconds"
76
76
  ensure
77
77
  db.rollback if db.transaction_active?
78
78
  end
79
79
 
80
80
  private
81
- def puts(*args)
82
- super *args unless @quiet
81
+ def output(string, always: false)
82
+ $stderr.print string if always || !@quiet
83
+ end
84
+
85
+ def oops(msg)
86
+ output "ERROR: #{msg}\n", always: true
87
+ end
88
+
89
+ def warn(msg)
90
+ output "WARNING: #{msg}\n", always: true
91
+ end
92
+
93
+ def info(line = "")
94
+ line << "\n"
95
+ output line
83
96
  end
84
97
 
85
98
  def truncate
86
- puts 'truncating database'
99
+ info 'truncating database'
87
100
 
88
101
  db.execute 'DELETE FROM domains'
89
102
  db.execute 'DELETE FROM urls'
90
103
  end
91
104
 
92
105
  def walk_in(directory)
93
- puts "reading #{directory}"
106
+ info "reading #{directory}"
94
107
 
95
108
  unless File.directory? directory
96
- warn "WARNING: #{directory}: no such directory"
109
+ warn "#{directory}: no such directory"
97
110
  return
98
111
  end
99
112
 
@@ -120,24 +133,38 @@ module MiddleSquid
120
133
 
121
134
  @indexed_cats << category
122
135
 
123
- puts "indexing #{dirname}/#{pn.basename}"
136
+ total_size = File.size path
137
+ current_pos = percent = reported = 0
138
+
139
+ status = "\rindexing #{dirname}/#{pn.basename} [%d%%]"
140
+ output status % percent
124
141
 
125
142
  File.foreach(path) {|line|
143
+ current_pos += line.bytesize
144
+ percent = (current_pos.to_f / total_size * 100).to_i
145
+
146
+ if percent != reported
147
+ output status % percent
148
+ reported = percent
149
+ end
150
+
126
151
  type = append_to category, line
127
152
  @total[type] += 1
128
153
  }
154
+
155
+ output "\n"
129
156
  end
130
157
 
131
158
  def append_to(category, line)
159
+ # fix invalid UTF-8 byte sequences
160
+ line.scrub! ''
161
+
132
162
  # remove trailing whitespace
133
163
  line.strip!
134
164
 
135
165
  # ignore regex lists
136
166
  return :ignored unless line[0] =~ /\w/
137
167
 
138
- # fix invalid bytes
139
- line.scrub! ''
140
-
141
168
  # fix for dirty lists
142
169
  line.tr! '\\', '/'
143
170
 
@@ -167,26 +194,26 @@ module MiddleSquid
167
194
  @indexed_cats.uniq!
168
195
  missing_cats = @cats_in_use - @indexed_cats
169
196
 
170
- puts
171
- puts "indexed #{@indexed_cats.size} categorie(s): #{@indexed_cats}"
172
- warn "WARNING: could not find #{missing_cats}" unless missing_cats.empty?
197
+ info
198
+ info "indexed #{@indexed_cats.size} categorie(s): #{@indexed_cats}"
199
+ warn "could not find #{missing_cats}" unless missing_cats.empty?
173
200
  end
174
201
 
175
202
  def stats
176
- puts "found #{@total[:domain]} domain(s)"
177
- puts "found #{@total[:url]} url(s)"
178
- puts "found #{@total[:duplicate]} duplicate(s)"
179
- puts "found #{@total[:ignored]} ignored expression(s)"
180
- puts
203
+ info "found #{@total[:domain]} domain(s)"
204
+ info "found #{@total[:url]} url(s)"
205
+ info "found #{@total[:duplicate]} duplicate(s)"
206
+ info "found #{@total[:ignored]} ignored expression(s)"
207
+ info
181
208
  end
182
209
 
183
210
  def commit_or_rollback
184
211
  if @total[:domain] > 0 || @total[:url] > 0
185
- puts 'committing changes'
212
+ info 'committing changes'
186
213
  db.commit
187
214
  else
188
- warn 'ERROR: nothing to commit'
189
- puts 'reverting changes'
215
+ oops 'nothing to commit'
216
+ info 'reverting changes'
190
217
  db.rollback
191
218
  end
192
219
  end
@@ -1,3 +1,3 @@
1
1
  module MiddleSquid
2
- VERSION = '1.0'
2
+ VERSION = '1.0.1'
3
3
  end
@@ -0,0 +1 @@
1
+ run proc {}
@@ -1 +1,2 @@
1
1
  host.com/path_with_�_invalid_byte
2
+ host.com/invalid�byte
@@ -48,28 +48,30 @@ class TestCLI < MiniTest::Test
48
48
  MiddleSquid::CLI.start(%W[index #{list} -C #{conf} --full])
49
49
  end
50
50
 
51
- assert_match /\Ahello #<MiddleSquid:.+>$/, stdout
52
- assert_match "reading #{list}", stdout
51
+ assert_match /\Ahello #<MiddleSquid:.+>\Z/, stdout
52
+
53
+ assert_match "reading #{list}", stderr
53
54
  end
54
55
 
55
56
  def test_index_relative_path
56
57
  absolute = File.expand_path '../resources', __FILE__
57
58
  path = Pathname.new(absolute).relative_path_from(Pathname.new(Dir.home))
58
59
 
59
- conf = File.join '~', path, 'hello.rb'
60
+ conf = File.join '~', path, 'empty_config.rb'
60
61
  list = File.join '~', path, 'black'
61
62
 
62
63
  stdout, stderr = capture_io do
63
64
  MiddleSquid::CLI.start(%W[index #{list} -C #{conf} --full])
64
65
  end
65
66
 
66
- assert_match /\Ahello #<MiddleSquid:.+>$/, stdout
67
- assert_match "reading #{absolute}/black", stdout
67
+ assert_empty stdout
68
+
69
+ assert_match "reading #{absolute}/black", stderr
68
70
  end
69
71
 
70
72
  def test_index_multiple
71
73
  path = File.expand_path '../resources', __FILE__
72
- config = File.join path, 'hello.rb'
74
+ config = File.join path, 'empty_config.rb'
73
75
  list_1 = File.join path, 'black'
74
76
  list_2 = File.join path, 'gray'
75
77
 
@@ -77,8 +79,10 @@ class TestCLI < MiniTest::Test
77
79
  MiddleSquid::CLI.start(%W[index #{list_1} #{list_2} -C #{config} --full])
78
80
  end
79
81
 
80
- assert_match "reading #{list_1}", stdout
81
- assert_match "reading #{list_2}", stdout
82
+ assert_empty stdout
83
+
84
+ assert_match "reading #{list_1}", stderr
85
+ assert_match "reading #{list_2}", stderr
82
86
  end
83
87
 
84
88
  def test_version
@@ -70,7 +70,20 @@ class TestHelpers < MiniTest::Test
70
70
  assert_requested stub
71
71
 
72
72
  assert_equal 520, status
73
- assert_empty headers
73
+ assert_equal({'Content-Type' => 'text/plain'}, headers)
74
74
  assert_equal '[MiddleSquid] WebMock timeout error', body
75
75
  end
76
+
77
+ def test_download_from_string
78
+ uri = 'http://test.com/' # not a URI object
79
+
80
+ stub = stub_request(:get, uri).
81
+ to_return(:status => 200, :body => '')
82
+
83
+ download_wrapper uri,
84
+ 'REQUEST_METHOD' => 'GET',
85
+ 'rack.input' => StringIO.new
86
+
87
+ assert_requested stub
88
+ end
76
89
  end
@@ -42,22 +42,22 @@ class TestIndexer < MiniTest::Test
42
42
  @obj.index [File.join(@path, 'black')]
43
43
  end
44
44
 
45
- assert_equal "nothing to do in minimal indexing mode\n", stdout
46
-
47
- assert_match 'ERROR', stderr
45
+ assert_equal <<-OUT, stderr
46
+ ERROR: the loaded configuration does not use any blacklist
47
+ nothing to do in minimal indexing mode
48
+ OUT
48
49
 
49
50
  assert has_test_data?
50
51
  end
51
52
 
52
- def test_empty_rollback
53
+ def test_empty_file
53
54
  stdout, stderr = capture_io do
54
55
  @obj.index [File.join(@path, 'empty')]
55
56
  end
56
57
 
57
- assert_match 'indexing cat/emptylist', stdout
58
- assert_match 'reverting changes', stdout
59
-
58
+ assert_match 'indexing cat/emptylist', stderr
60
59
  assert_match 'ERROR: nothing to commit', stderr
60
+ assert_match 'reverting changes', stderr
61
61
 
62
62
  assert has_test_data?
63
63
  end
@@ -84,18 +84,18 @@ class TestIndexer < MiniTest::Test
84
84
  ['tracker', '.cloudfront-labs.amazonaws.com', 'x.png/'],
85
85
  ], urls
86
86
 
87
- assert_match 'indexing ads/urls', stdout
88
- assert_match 'indexing ads/domains', stdout
89
- assert_match 'indexing tracker/urls', stdout
90
- assert_match 'indexing tracker/domains', stdout
91
- assert_match 'indexed 2 categorie(s): ["ads", "tracker"]', stdout
92
- assert_match 'found 4 domain(s)', stdout
93
- assert_match 'found 3 url(s)', stdout
94
- assert_match 'found 0 duplicate(s)', stdout
95
- assert_match 'found 0 ignored expression(s)', stdout
96
- assert_match 'committing changes', stdout
97
-
98
- assert_empty stderr
87
+ assert_match 'indexing ads/urls', stderr
88
+ assert_match 'indexing ads/urls', stderr
89
+ assert_match 'indexing ads/domains', stderr
90
+ assert_match 'indexing ads/domains', stderr
91
+ assert_match 'indexing tracker/urls', stderr
92
+ assert_match 'indexing tracker/domains', stderr
93
+ assert_match 'indexed 2 categorie(s): ["ads", "tracker"]', stderr
94
+ assert_match 'found 4 domain(s)', stderr
95
+ assert_match 'found 3 url(s)', stderr
96
+ assert_match 'found 0 duplicate(s)', stderr
97
+ assert_match 'found 0 ignored expression(s)', stderr
98
+ assert_match 'committing changes', stderr
99
99
  end
100
100
 
101
101
  def test_index_multiple
@@ -128,12 +128,10 @@ class TestIndexer < MiniTest::Test
128
128
  ['isp', '.telus.com', 'content/internet/'],
129
129
  ], urls
130
130
 
131
- assert_match 'indexed 4 categorie(s): ["ads", "tracker", "isp", "news"]', stdout
132
- assert_match 'found 8 domain(s)', stdout
133
- assert_match 'found 4 url(s)', stdout
134
- assert_match 'found 0 duplicate(s)', stdout
135
-
136
- assert_empty stderr
131
+ assert_match 'indexed 4 categorie(s): ["ads", "tracker", "isp", "news"]', stderr
132
+ assert_match 'found 8 domain(s)', stderr
133
+ assert_match 'found 4 url(s)', stderr
134
+ assert_match 'found 0 duplicate(s)', stderr
137
135
  end
138
136
 
139
137
  def test_ignore_subdirectories
@@ -141,7 +139,8 @@ class TestIndexer < MiniTest::Test
141
139
  @obj.index [File.join(@path, 'subdirectory')]
142
140
  end
143
141
 
144
- refute_match 'cat/ignore', stdout
142
+ refute_match 'cat/ignore', stderr
143
+
145
144
  assert has_test_data?
146
145
  end
147
146
 
@@ -166,10 +165,8 @@ class TestIndexer < MiniTest::Test
166
165
  ['ads', '.google.com', 'adsense/'],
167
166
  ], urls
168
167
 
169
- refute_match 'tracker', stdout
170
- assert_match 'indexed 1 categorie(s): ["ads"]', stdout
171
-
172
- assert_empty stderr
168
+ refute_match 'tracker', stderr
169
+ assert_match 'indexed 1 categorie(s): ["ads"]', stderr
173
170
  end
174
171
 
175
172
  def test_not_found
@@ -179,7 +176,7 @@ class TestIndexer < MiniTest::Test
179
176
 
180
177
  assert has_test_data?
181
178
 
182
- assert_match "reading #{File.join @path, '404'}", stdout
179
+ assert_match "reading #{File.join @path, '404'}", stderr
183
180
 
184
181
  assert_match "WARNING: #{File.join @path, '404'}: no such directory\n", stderr
185
182
  assert_match "ERROR: nothing to commit", stderr
@@ -195,8 +192,8 @@ class TestIndexer < MiniTest::Test
195
192
 
196
193
  refute has_test_data?
197
194
 
198
- assert_match "reading #{File.join @path, '404'}", stdout
199
- assert_match "reading #{File.join @path, 'gray'}", stdout
195
+ assert_match "reading #{File.join @path, '404'}", stderr
196
+ assert_match "reading #{File.join @path, 'gray'}", stderr
200
197
 
201
198
  assert_match "WARNING: #{File.join @path, '404'}: no such directory\n", stderr
202
199
  end
@@ -248,6 +245,7 @@ class TestIndexer < MiniTest::Test
248
245
  urls = db.execute 'SELECT category, host, path FROM urls'
249
246
  assert_equal [
250
247
  ['cat', '.host.com', 'path_with__invalid_byte/'],
248
+ ['cat', '.host.com', 'invalidbyte/'],
251
249
  ], urls
252
250
  end
253
251
 
@@ -289,10 +287,8 @@ class TestIndexer < MiniTest::Test
289
287
  ['copy_of_cat', '.host.com', 'path/'],
290
288
  ], urls
291
289
 
292
- assert_match 'found 12 duplicate(s)', stdout
293
- assert_match 'found 0 ignored expression(s)', stdout
294
-
295
- assert_empty stderr
290
+ assert_match 'found 12 duplicate(s)', stderr
291
+ assert_match 'found 0 ignored expression(s)', stderr
296
292
  end
297
293
 
298
294
  def test_missing_category
@@ -315,7 +311,7 @@ class TestIndexer < MiniTest::Test
315
311
 
316
312
  assert has_test_data?
317
313
 
318
- assert_match 'found 3 ignored expression(s)', stdout
314
+ assert_match 'found 3 ignored expression(s)', stderr
319
315
  assert_match 'ERROR: nothing to commit', stderr
320
316
  end
321
317
 
@@ -342,11 +338,9 @@ class TestIndexer < MiniTest::Test
342
338
  ['cat_name', '.google.com', 'adsense/'],
343
339
  ], urls
344
340
 
345
- refute_match 'tracker', stdout
346
- assert_match 'indexing ads/', stdout
347
- assert_match 'indexed 1 categorie(s): ["cat_name"]', stdout
348
-
349
- assert_empty stderr
341
+ refute_match 'tracker', stderr
342
+ assert_match 'indexing ads/', stderr
343
+ assert_match 'indexed 1 categorie(s): ["cat_name"]', stderr
350
344
  end
351
345
 
352
346
  def test_domains_only
@@ -369,11 +363,9 @@ class TestIndexer < MiniTest::Test
369
363
  urls = db.execute 'SELECT category, host, path FROM urls'
370
364
  assert_empty urls
371
365
 
372
- assert_match 'found 4 domain(s)', stdout
373
- assert_match 'found 0 url(s)', stdout
374
- assert_match 'found 3 ignored expression(s)', stdout
375
-
376
- assert_empty stderr
366
+ assert_match 'found 4 domain(s)', stderr
367
+ assert_match 'found 0 url(s)', stderr
368
+ assert_match 'found 3 ignored expression(s)', stderr
377
369
  end
378
370
 
379
371
  def test_urls_only
@@ -395,11 +387,9 @@ class TestIndexer < MiniTest::Test
395
387
  ['tracker', '.cloudfront-labs.amazonaws.com', 'x.png/'],
396
388
  ], urls
397
389
 
398
- assert_match 'found 0 domain(s)', stdout
399
- assert_match 'found 3 url(s)', stdout
400
- assert_match 'found 4 ignored expression(s)', stdout
401
-
402
- assert_empty stderr
390
+ assert_match 'found 0 domain(s)', stderr
391
+ assert_match 'found 3 url(s)', stderr
392
+ assert_match 'found 4 ignored expression(s)', stderr
403
393
  end
404
394
 
405
395
  def test_append
@@ -410,10 +400,10 @@ class TestIndexer < MiniTest::Test
410
400
  end
411
401
 
412
402
  assert has_test_data?, 'should not be truncated'
413
- refute_match 'truncating', stdout
414
403
 
415
- assert_match 'found 4 domain(s)', stdout
416
- assert_match 'found 3 url(s)', stdout
404
+ refute_match 'truncating', stderr
405
+ assert_match 'found 4 domain(s)', stderr
406
+ assert_match 'found 3 url(s)', stderr
417
407
  end
418
408
 
419
409
  def test_quiet
@@ -454,4 +444,19 @@ class TestIndexer < MiniTest::Test
454
444
  ['cat', '.after.com', 'path/'],
455
445
  ], urls
456
446
  end
447
+
448
+ def test_progress
449
+ stdout, stderr = capture_io do
450
+ @obj.index [File.join(@path, 'black')]
451
+ end
452
+
453
+ lines = stderr.lines.select {|l| l =~ /indexing/ }
454
+
455
+ assert_equal [
456
+ "\rindexing ads/domains [0%]\rindexing ads/domains [48%]\rindexing ads/domains [100%]\n",
457
+ "\rindexing ads/urls [0%]\rindexing ads/urls [100%]\n",
458
+ "\rindexing tracker/domains [0%]\rindexing tracker/domains [30%]\rindexing tracker/domains [100%]\n",
459
+ "\rindexing tracker/urls [0%]\rindexing tracker/urls [40%]\rindexing tracker/urls [100%]\n",
460
+ ], lines
461
+ end
457
462
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: middle_squid
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Fillion
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-06 00:00:00.000000000 Z
11
+ date: 2014-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -270,6 +270,7 @@ files:
270
270
  - test/resources/duplicates/copy_of_cat/copy_of_list
271
271
  - test/resources/duplicates/copy_of_cat/list
272
272
  - test/resources/empty/cat/emptylist
273
+ - test/resources/empty_config.rb
273
274
  - test/resources/empty_path/cat/list
274
275
  - test/resources/expressions/cat/list
275
276
  - test/resources/gray/isp/domains
@@ -335,6 +336,7 @@ test_files:
335
336
  - test/resources/duplicates/copy_of_cat/copy_of_list
336
337
  - test/resources/duplicates/copy_of_cat/list
337
338
  - test/resources/empty/cat/emptylist
339
+ - test/resources/empty_config.rb
338
340
  - test/resources/empty_path/cat/list
339
341
  - test/resources/expressions/cat/list
340
342
  - test/resources/gray/isp/domains