middle_squid 1.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8afc13446c5543d835fc77291d1a08a124fc222f
4
- data.tar.gz: 93d0646721e4cf660b75670f71cc18eef08a371a
3
+ metadata.gz: 516bbcd321414bdfd91755211912e7edb9a7533a
4
+ data.tar.gz: ff8669daab5db30a2f25810ae40a13feed93fbd7
5
5
  SHA512:
6
- metadata.gz: 830df42d71fab5086d17f6393ef1a4fdf7d22c8a67a95463439836bc07ed26fafca22772d07e6123d4d7735f5218ae1fc7382930868c28ce4a2d3c4ec497520f
7
- data.tar.gz: 95401e707dbe31ef3430c51fe8e65bbed16ec6555b1a4b142e806dfab53dde70f0c7b2fe9feecadd3de58949db0b4a3d88760d88652b3f27f1cc379d3e284bf5
6
+ metadata.gz: a62589c10bccafeb51c082f53552dd97bb566489c6be203f1eed1b3aff846f4111e4a4a5cc931b16cb502b6228596ef2650f33073fbc2094dac3d99effb16ef7
7
+ data.tar.gz: e473a55bc36ad604a4f1e7efd9c492d42a2ee08971f493484e1028f7ad8f83c1fcd40c1f61647261f4145f0c09f27384b1bfd9ba1b06d940df6323ca09e48466
data/README.md CHANGED
@@ -147,7 +147,7 @@ tar xzf shallalist.tar.gz
147
147
  mv BL ShallaBlackList
148
148
 
149
149
  # Construct the blacklist database
150
- /usr/local/bin/middle_squid_wrapper.sh index ShallaBlackList -C /etc/squid/middle_squid.rb
150
+ /usr/local/bin/middle_squid_wrapper.sh index ShallaBlackList -C /home/proxy/middle_squid_config.rb
151
151
 
152
152
  exit
153
153
  ```
@@ -208,6 +208,13 @@ MiddleSquid's documentation is hosted at
208
208
 
209
209
  ## Changelog
210
210
 
211
+ ### v1.0.1 (2014-11-06)
212
+
213
+ - send download errors as text/plain
214
+ - fix a crash when reading invalid UTF-8 byte sequences
215
+ - cleanup `index`'s output (everything is now sent to stderr)
216
+ - show the indexing progress
217
+
211
218
  ### v1.0 (2014-10-05)
212
219
 
213
220
  First public release.
@@ -29,7 +29,7 @@ module MiddleSquid::Helpers
29
29
  # }
30
30
  # }
31
31
  # @param request [Rack::Request] the request to imitate
32
- # @param uri [URI] the resource to fetch
32
+ # @param uri [#to_s] the resource to fetch
33
33
  # @return [Array] a rack triplet (status code, response headers and body)
34
34
  # @see Actions#intercept
35
35
  def download_like(request, uri)
@@ -62,7 +62,11 @@ module MiddleSquid::Helpers
62
62
  }
63
63
 
64
64
  http.errback {
65
- fiber.resume [520, {}, "[MiddleSquid] #{http.error}"]
65
+ fiber.resume [
66
+ 520,
67
+ {'Content-Type' => 'text/plain'},
68
+ "[MiddleSquid] #{http.error}"
69
+ ]
66
70
  }
67
71
 
68
72
  Fiber.yield
@@ -54,8 +54,8 @@ module MiddleSquid
54
54
  # @param directories [Array<String>]
55
55
  def index(directories)
56
56
  if !@full_index && @cats_in_use.empty?
57
- warn 'ERROR: the loaded configuration does not use any blacklist'
58
- puts 'nothing to do in minimal indexing mode'
57
+ oops 'the loaded configuration does not use any blacklist'
58
+ info 'nothing to do in minimal indexing mode'
59
59
  return
60
60
  end
61
61
 
@@ -72,28 +72,41 @@ module MiddleSquid
72
72
  commit_or_rollback
73
73
 
74
74
  end_time = Time.now
75
- puts "finished after #{end_time - start_time} seconds"
75
+ info "finished after #{end_time - start_time} seconds"
76
76
  ensure
77
77
  db.rollback if db.transaction_active?
78
78
  end
79
79
 
80
80
  private
81
- def puts(*args)
82
- super *args unless @quiet
81
+ def output(string, always: false)
82
+ $stderr.print string if always || !@quiet
83
+ end
84
+
85
+ def oops(msg)
86
+ output "ERROR: #{msg}\n", always: true
87
+ end
88
+
89
+ def warn(msg)
90
+ output "WARNING: #{msg}\n", always: true
91
+ end
92
+
93
+ def info(line = "")
94
+ line << "\n"
95
+ output line
83
96
  end
84
97
 
85
98
  def truncate
86
- puts 'truncating database'
99
+ info 'truncating database'
87
100
 
88
101
  db.execute 'DELETE FROM domains'
89
102
  db.execute 'DELETE FROM urls'
90
103
  end
91
104
 
92
105
  def walk_in(directory)
93
- puts "reading #{directory}"
106
+ info "reading #{directory}"
94
107
 
95
108
  unless File.directory? directory
96
- warn "WARNING: #{directory}: no such directory"
109
+ warn "#{directory}: no such directory"
97
110
  return
98
111
  end
99
112
 
@@ -120,24 +133,38 @@ module MiddleSquid
120
133
 
121
134
  @indexed_cats << category
122
135
 
123
- puts "indexing #{dirname}/#{pn.basename}"
136
+ total_size = File.size path
137
+ current_pos = percent = reported = 0
138
+
139
+ status = "\rindexing #{dirname}/#{pn.basename} [%d%%]"
140
+ output status % percent
124
141
 
125
142
  File.foreach(path) {|line|
143
+ current_pos += line.bytesize
144
+ percent = (current_pos.to_f / total_size * 100).to_i
145
+
146
+ if percent != reported
147
+ output status % percent
148
+ reported = percent
149
+ end
150
+
126
151
  type = append_to category, line
127
152
  @total[type] += 1
128
153
  }
154
+
155
+ output "\n"
129
156
  end
130
157
 
131
158
  def append_to(category, line)
159
+ # fix invalid UTF-8 byte sequences
160
+ line.scrub! ''
161
+
132
162
  # remove trailing whitespace
133
163
  line.strip!
134
164
 
135
165
  # ignore regex lists
136
166
  return :ignored unless line[0] =~ /\w/
137
167
 
138
- # fix invalid bytes
139
- line.scrub! ''
140
-
141
168
  # fix for dirty lists
142
169
  line.tr! '\\', '/'
143
170
 
@@ -167,26 +194,26 @@ module MiddleSquid
167
194
  @indexed_cats.uniq!
168
195
  missing_cats = @cats_in_use - @indexed_cats
169
196
 
170
- puts
171
- puts "indexed #{@indexed_cats.size} categorie(s): #{@indexed_cats}"
172
- warn "WARNING: could not find #{missing_cats}" unless missing_cats.empty?
197
+ info
198
+ info "indexed #{@indexed_cats.size} categorie(s): #{@indexed_cats}"
199
+ warn "could not find #{missing_cats}" unless missing_cats.empty?
173
200
  end
174
201
 
175
202
  def stats
176
- puts "found #{@total[:domain]} domain(s)"
177
- puts "found #{@total[:url]} url(s)"
178
- puts "found #{@total[:duplicate]} duplicate(s)"
179
- puts "found #{@total[:ignored]} ignored expression(s)"
180
- puts
203
+ info "found #{@total[:domain]} domain(s)"
204
+ info "found #{@total[:url]} url(s)"
205
+ info "found #{@total[:duplicate]} duplicate(s)"
206
+ info "found #{@total[:ignored]} ignored expression(s)"
207
+ info
181
208
  end
182
209
 
183
210
  def commit_or_rollback
184
211
  if @total[:domain] > 0 || @total[:url] > 0
185
- puts 'committing changes'
212
+ info 'committing changes'
186
213
  db.commit
187
214
  else
188
- warn 'ERROR: nothing to commit'
189
- puts 'reverting changes'
215
+ oops 'nothing to commit'
216
+ info 'reverting changes'
190
217
  db.rollback
191
218
  end
192
219
  end
@@ -1,3 +1,3 @@
1
1
  module MiddleSquid
2
- VERSION = '1.0'
2
+ VERSION = '1.0.1'
3
3
  end
@@ -0,0 +1 @@
1
+ run proc {}
@@ -1 +1,2 @@
1
1
  host.com/path_with_�_invalid_byte
2
+ host.com/invalid�byte
@@ -48,28 +48,30 @@ class TestCLI < MiniTest::Test
48
48
  MiddleSquid::CLI.start(%W[index #{list} -C #{conf} --full])
49
49
  end
50
50
 
51
- assert_match /\Ahello #<MiddleSquid:.+>$/, stdout
52
- assert_match "reading #{list}", stdout
51
+ assert_match /\Ahello #<MiddleSquid:.+>\Z/, stdout
52
+
53
+ assert_match "reading #{list}", stderr
53
54
  end
54
55
 
55
56
  def test_index_relative_path
56
57
  absolute = File.expand_path '../resources', __FILE__
57
58
  path = Pathname.new(absolute).relative_path_from(Pathname.new(Dir.home))
58
59
 
59
- conf = File.join '~', path, 'hello.rb'
60
+ conf = File.join '~', path, 'empty_config.rb'
60
61
  list = File.join '~', path, 'black'
61
62
 
62
63
  stdout, stderr = capture_io do
63
64
  MiddleSquid::CLI.start(%W[index #{list} -C #{conf} --full])
64
65
  end
65
66
 
66
- assert_match /\Ahello #<MiddleSquid:.+>$/, stdout
67
- assert_match "reading #{absolute}/black", stdout
67
+ assert_empty stdout
68
+
69
+ assert_match "reading #{absolute}/black", stderr
68
70
  end
69
71
 
70
72
  def test_index_multiple
71
73
  path = File.expand_path '../resources', __FILE__
72
- config = File.join path, 'hello.rb'
74
+ config = File.join path, 'empty_config.rb'
73
75
  list_1 = File.join path, 'black'
74
76
  list_2 = File.join path, 'gray'
75
77
 
@@ -77,8 +79,10 @@ class TestCLI < MiniTest::Test
77
79
  MiddleSquid::CLI.start(%W[index #{list_1} #{list_2} -C #{config} --full])
78
80
  end
79
81
 
80
- assert_match "reading #{list_1}", stdout
81
- assert_match "reading #{list_2}", stdout
82
+ assert_empty stdout
83
+
84
+ assert_match "reading #{list_1}", stderr
85
+ assert_match "reading #{list_2}", stderr
82
86
  end
83
87
 
84
88
  def test_version
@@ -70,7 +70,20 @@ class TestHelpers < MiniTest::Test
70
70
  assert_requested stub
71
71
 
72
72
  assert_equal 520, status
73
- assert_empty headers
73
+ assert_equal({'Content-Type' => 'text/plain'}, headers)
74
74
  assert_equal '[MiddleSquid] WebMock timeout error', body
75
75
  end
76
+
77
+ def test_download_from_string
78
+ uri = 'http://test.com/' # not a URI object
79
+
80
+ stub = stub_request(:get, uri).
81
+ to_return(:status => 200, :body => '')
82
+
83
+ download_wrapper uri,
84
+ 'REQUEST_METHOD' => 'GET',
85
+ 'rack.input' => StringIO.new
86
+
87
+ assert_requested stub
88
+ end
76
89
  end
@@ -42,22 +42,22 @@ class TestIndexer < MiniTest::Test
42
42
  @obj.index [File.join(@path, 'black')]
43
43
  end
44
44
 
45
- assert_equal "nothing to do in minimal indexing mode\n", stdout
46
-
47
- assert_match 'ERROR', stderr
45
+ assert_equal <<-OUT, stderr
46
+ ERROR: the loaded configuration does not use any blacklist
47
+ nothing to do in minimal indexing mode
48
+ OUT
48
49
 
49
50
  assert has_test_data?
50
51
  end
51
52
 
52
- def test_empty_rollback
53
+ def test_empty_file
53
54
  stdout, stderr = capture_io do
54
55
  @obj.index [File.join(@path, 'empty')]
55
56
  end
56
57
 
57
- assert_match 'indexing cat/emptylist', stdout
58
- assert_match 'reverting changes', stdout
59
-
58
+ assert_match 'indexing cat/emptylist', stderr
60
59
  assert_match 'ERROR: nothing to commit', stderr
60
+ assert_match 'reverting changes', stderr
61
61
 
62
62
  assert has_test_data?
63
63
  end
@@ -84,18 +84,18 @@ class TestIndexer < MiniTest::Test
84
84
  ['tracker', '.cloudfront-labs.amazonaws.com', 'x.png/'],
85
85
  ], urls
86
86
 
87
- assert_match 'indexing ads/urls', stdout
88
- assert_match 'indexing ads/domains', stdout
89
- assert_match 'indexing tracker/urls', stdout
90
- assert_match 'indexing tracker/domains', stdout
91
- assert_match 'indexed 2 categorie(s): ["ads", "tracker"]', stdout
92
- assert_match 'found 4 domain(s)', stdout
93
- assert_match 'found 3 url(s)', stdout
94
- assert_match 'found 0 duplicate(s)', stdout
95
- assert_match 'found 0 ignored expression(s)', stdout
96
- assert_match 'committing changes', stdout
97
-
98
- assert_empty stderr
87
+ assert_match 'indexing ads/urls', stderr
88
+ assert_match 'indexing ads/urls', stderr
89
+ assert_match 'indexing ads/domains', stderr
90
+ assert_match 'indexing ads/domains', stderr
91
+ assert_match 'indexing tracker/urls', stderr
92
+ assert_match 'indexing tracker/domains', stderr
93
+ assert_match 'indexed 2 categorie(s): ["ads", "tracker"]', stderr
94
+ assert_match 'found 4 domain(s)', stderr
95
+ assert_match 'found 3 url(s)', stderr
96
+ assert_match 'found 0 duplicate(s)', stderr
97
+ assert_match 'found 0 ignored expression(s)', stderr
98
+ assert_match 'committing changes', stderr
99
99
  end
100
100
 
101
101
  def test_index_multiple
@@ -128,12 +128,10 @@ class TestIndexer < MiniTest::Test
128
128
  ['isp', '.telus.com', 'content/internet/'],
129
129
  ], urls
130
130
 
131
- assert_match 'indexed 4 categorie(s): ["ads", "tracker", "isp", "news"]', stdout
132
- assert_match 'found 8 domain(s)', stdout
133
- assert_match 'found 4 url(s)', stdout
134
- assert_match 'found 0 duplicate(s)', stdout
135
-
136
- assert_empty stderr
131
+ assert_match 'indexed 4 categorie(s): ["ads", "tracker", "isp", "news"]', stderr
132
+ assert_match 'found 8 domain(s)', stderr
133
+ assert_match 'found 4 url(s)', stderr
134
+ assert_match 'found 0 duplicate(s)', stderr
137
135
  end
138
136
 
139
137
  def test_ignore_subdirectories
@@ -141,7 +139,8 @@ class TestIndexer < MiniTest::Test
141
139
  @obj.index [File.join(@path, 'subdirectory')]
142
140
  end
143
141
 
144
- refute_match 'cat/ignore', stdout
142
+ refute_match 'cat/ignore', stderr
143
+
145
144
  assert has_test_data?
146
145
  end
147
146
 
@@ -166,10 +165,8 @@ class TestIndexer < MiniTest::Test
166
165
  ['ads', '.google.com', 'adsense/'],
167
166
  ], urls
168
167
 
169
- refute_match 'tracker', stdout
170
- assert_match 'indexed 1 categorie(s): ["ads"]', stdout
171
-
172
- assert_empty stderr
168
+ refute_match 'tracker', stderr
169
+ assert_match 'indexed 1 categorie(s): ["ads"]', stderr
173
170
  end
174
171
 
175
172
  def test_not_found
@@ -179,7 +176,7 @@ class TestIndexer < MiniTest::Test
179
176
 
180
177
  assert has_test_data?
181
178
 
182
- assert_match "reading #{File.join @path, '404'}", stdout
179
+ assert_match "reading #{File.join @path, '404'}", stderr
183
180
 
184
181
  assert_match "WARNING: #{File.join @path, '404'}: no such directory\n", stderr
185
182
  assert_match "ERROR: nothing to commit", stderr
@@ -195,8 +192,8 @@ class TestIndexer < MiniTest::Test
195
192
 
196
193
  refute has_test_data?
197
194
 
198
- assert_match "reading #{File.join @path, '404'}", stdout
199
- assert_match "reading #{File.join @path, 'gray'}", stdout
195
+ assert_match "reading #{File.join @path, '404'}", stderr
196
+ assert_match "reading #{File.join @path, 'gray'}", stderr
200
197
 
201
198
  assert_match "WARNING: #{File.join @path, '404'}: no such directory\n", stderr
202
199
  end
@@ -248,6 +245,7 @@ class TestIndexer < MiniTest::Test
248
245
  urls = db.execute 'SELECT category, host, path FROM urls'
249
246
  assert_equal [
250
247
  ['cat', '.host.com', 'path_with__invalid_byte/'],
248
+ ['cat', '.host.com', 'invalidbyte/'],
251
249
  ], urls
252
250
  end
253
251
 
@@ -289,10 +287,8 @@ class TestIndexer < MiniTest::Test
289
287
  ['copy_of_cat', '.host.com', 'path/'],
290
288
  ], urls
291
289
 
292
- assert_match 'found 12 duplicate(s)', stdout
293
- assert_match 'found 0 ignored expression(s)', stdout
294
-
295
- assert_empty stderr
290
+ assert_match 'found 12 duplicate(s)', stderr
291
+ assert_match 'found 0 ignored expression(s)', stderr
296
292
  end
297
293
 
298
294
  def test_missing_category
@@ -315,7 +311,7 @@ class TestIndexer < MiniTest::Test
315
311
 
316
312
  assert has_test_data?
317
313
 
318
- assert_match 'found 3 ignored expression(s)', stdout
314
+ assert_match 'found 3 ignored expression(s)', stderr
319
315
  assert_match 'ERROR: nothing to commit', stderr
320
316
  end
321
317
 
@@ -342,11 +338,9 @@ class TestIndexer < MiniTest::Test
342
338
  ['cat_name', '.google.com', 'adsense/'],
343
339
  ], urls
344
340
 
345
- refute_match 'tracker', stdout
346
- assert_match 'indexing ads/', stdout
347
- assert_match 'indexed 1 categorie(s): ["cat_name"]', stdout
348
-
349
- assert_empty stderr
341
+ refute_match 'tracker', stderr
342
+ assert_match 'indexing ads/', stderr
343
+ assert_match 'indexed 1 categorie(s): ["cat_name"]', stderr
350
344
  end
351
345
 
352
346
  def test_domains_only
@@ -369,11 +363,9 @@ class TestIndexer < MiniTest::Test
369
363
  urls = db.execute 'SELECT category, host, path FROM urls'
370
364
  assert_empty urls
371
365
 
372
- assert_match 'found 4 domain(s)', stdout
373
- assert_match 'found 0 url(s)', stdout
374
- assert_match 'found 3 ignored expression(s)', stdout
375
-
376
- assert_empty stderr
366
+ assert_match 'found 4 domain(s)', stderr
367
+ assert_match 'found 0 url(s)', stderr
368
+ assert_match 'found 3 ignored expression(s)', stderr
377
369
  end
378
370
 
379
371
  def test_urls_only
@@ -395,11 +387,9 @@ class TestIndexer < MiniTest::Test
395
387
  ['tracker', '.cloudfront-labs.amazonaws.com', 'x.png/'],
396
388
  ], urls
397
389
 
398
- assert_match 'found 0 domain(s)', stdout
399
- assert_match 'found 3 url(s)', stdout
400
- assert_match 'found 4 ignored expression(s)', stdout
401
-
402
- assert_empty stderr
390
+ assert_match 'found 0 domain(s)', stderr
391
+ assert_match 'found 3 url(s)', stderr
392
+ assert_match 'found 4 ignored expression(s)', stderr
403
393
  end
404
394
 
405
395
  def test_append
@@ -410,10 +400,10 @@ class TestIndexer < MiniTest::Test
410
400
  end
411
401
 
412
402
  assert has_test_data?, 'should not be truncated'
413
- refute_match 'truncating', stdout
414
403
 
415
- assert_match 'found 4 domain(s)', stdout
416
- assert_match 'found 3 url(s)', stdout
404
+ refute_match 'truncating', stderr
405
+ assert_match 'found 4 domain(s)', stderr
406
+ assert_match 'found 3 url(s)', stderr
417
407
  end
418
408
 
419
409
  def test_quiet
@@ -454,4 +444,19 @@ class TestIndexer < MiniTest::Test
454
444
  ['cat', '.after.com', 'path/'],
455
445
  ], urls
456
446
  end
447
+
448
+ def test_progress
449
+ stdout, stderr = capture_io do
450
+ @obj.index [File.join(@path, 'black')]
451
+ end
452
+
453
+ lines = stderr.lines.select {|l| l =~ /indexing/ }
454
+
455
+ assert_equal [
456
+ "\rindexing ads/domains [0%]\rindexing ads/domains [48%]\rindexing ads/domains [100%]\n",
457
+ "\rindexing ads/urls [0%]\rindexing ads/urls [100%]\n",
458
+ "\rindexing tracker/domains [0%]\rindexing tracker/domains [30%]\rindexing tracker/domains [100%]\n",
459
+ "\rindexing tracker/urls [0%]\rindexing tracker/urls [40%]\rindexing tracker/urls [100%]\n",
460
+ ], lines
461
+ end
457
462
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: middle_squid
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.0'
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Fillion
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-06 00:00:00.000000000 Z
11
+ date: 2014-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -270,6 +270,7 @@ files:
270
270
  - test/resources/duplicates/copy_of_cat/copy_of_list
271
271
  - test/resources/duplicates/copy_of_cat/list
272
272
  - test/resources/empty/cat/emptylist
273
+ - test/resources/empty_config.rb
273
274
  - test/resources/empty_path/cat/list
274
275
  - test/resources/expressions/cat/list
275
276
  - test/resources/gray/isp/domains
@@ -335,6 +336,7 @@ test_files:
335
336
  - test/resources/duplicates/copy_of_cat/copy_of_list
336
337
  - test/resources/duplicates/copy_of_cat/list
337
338
  - test/resources/empty/cat/emptylist
339
+ - test/resources/empty_config.rb
338
340
  - test/resources/empty_path/cat/list
339
341
  - test/resources/expressions/cat/list
340
342
  - test/resources/gray/isp/domains