spidr 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.editorconfig +11 -0
- data/.github/workflows/ruby.yml +26 -0
- data/.gitignore +4 -5
- data/ChangeLog.md +17 -0
- data/Gemfile +8 -5
- data/LICENSE.txt +1 -1
- data/README.md +137 -78
- data/Rakefile +1 -0
- data/gemspec.yml +8 -1
- data/lib/spidr/agent/actions.rb +1 -1
- data/lib/spidr/agent/events.rb +1 -1
- data/lib/spidr/agent/filters.rb +55 -56
- data/lib/spidr/agent/sanitizers.rb +6 -9
- data/lib/spidr/agent.rb +230 -120
- data/lib/spidr/auth_store.rb +10 -6
- data/lib/spidr/page/content_types.rb +51 -0
- data/lib/spidr/page/html.rb +17 -19
- data/lib/spidr/page/status_codes.rb +12 -10
- data/lib/spidr/proxy.rb +6 -14
- data/lib/spidr/rules.rb +5 -8
- data/lib/spidr/session_cache.rb +23 -21
- data/lib/spidr/settings/proxy.rb +19 -5
- data/lib/spidr/spidr.rb +16 -6
- data/lib/spidr/version.rb +1 -1
- data/spec/agent_spec.rb +357 -10
- data/spec/example_page.rb +2 -0
- data/spec/page/content_types_spec.rb +22 -0
- data/spec/page/html_spec.rb +255 -51
- data/spec/page/status_codes_spec.rb +4 -4
- data/spec/proxy_spec.rb +2 -2
- data/spec/settings/proxy_examples.rb +31 -11
- data/spec/spec_helper.rb +3 -0
- metadata +19 -19
- data/.travis.yml +0 -14
data/spec/page/html_spec.rb
CHANGED
@@ -297,7 +297,7 @@ describe Page do
|
|
297
297
|
context "when the page contains iframes" do
|
298
298
|
let(:iframe1) { '/iframe1' }
|
299
299
|
let(:iframe2) { '/iframe2' }
|
300
|
-
let(:body) { %{<html><body><iframe src="#{iframe1}"
|
300
|
+
let(:body) { %{<html><body><iframe src="#{iframe1}"></iframe><iframe src="#{iframe2}"></iframe></body></html>} }
|
301
301
|
|
302
302
|
it "should yield each iframe/@src value" do
|
303
303
|
expect { |b|
|
@@ -332,32 +332,100 @@ describe Page do
|
|
332
332
|
end
|
333
333
|
|
334
334
|
describe "#links" do
|
335
|
-
context "when the page contains
|
335
|
+
context "when the page contains an 'a' link" do
|
336
336
|
let(:link) { '/link' }
|
337
|
+
let(:body) do
|
338
|
+
<<-HTML
|
339
|
+
<html>
|
340
|
+
<body>
|
341
|
+
<a href="#{link}">link</a>
|
342
|
+
</body>
|
343
|
+
</html>
|
344
|
+
HTML
|
345
|
+
end
|
346
|
+
|
347
|
+
it "should return an Array of links" do
|
348
|
+
expect(subject.links).to be == [
|
349
|
+
link
|
350
|
+
]
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
context "when the page contains a 'frame'" do
|
337
355
|
let(:frame) { '/frame' }
|
356
|
+
let(:body) do
|
357
|
+
<<-HTML
|
358
|
+
<html>
|
359
|
+
<frameset>
|
360
|
+
<frame src="#{frame}"></frame>
|
361
|
+
</frameset>
|
362
|
+
</html>
|
363
|
+
HTML
|
364
|
+
end
|
365
|
+
|
366
|
+
it "should return an Array of links" do
|
367
|
+
expect(subject.links).to be == [
|
368
|
+
frame
|
369
|
+
]
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
context "when the page contains a 'iframe'" do
|
338
374
|
let(:iframe) { '/iframe' }
|
375
|
+
let(:body) do
|
376
|
+
<<-HTML
|
377
|
+
<html>
|
378
|
+
<body>
|
379
|
+
<iframe src="#{iframe}"></iframe>
|
380
|
+
</body>
|
381
|
+
</html>
|
382
|
+
HTML
|
383
|
+
end
|
384
|
+
|
385
|
+
it "should return an Array of links" do
|
386
|
+
expect(subject.links).to be == [
|
387
|
+
iframe
|
388
|
+
]
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
context "when the page contains a 'link' element" do
|
339
393
|
let(:stylesheet) { '/stylesheet.css' }
|
394
|
+
let(:body) do
|
395
|
+
<<-HTML
|
396
|
+
<html>
|
397
|
+
<head>
|
398
|
+
<link type="stylesheet" href="#{stylesheet}" />
|
399
|
+
</head>
|
400
|
+
<body>
|
401
|
+
</body>
|
402
|
+
</html>
|
403
|
+
HTML
|
404
|
+
end
|
405
|
+
|
406
|
+
it "should return an Array of links" do
|
407
|
+
expect(subject.links).to be == [
|
408
|
+
stylesheet
|
409
|
+
]
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
context "when the page contains a 'script' element" do
|
340
414
|
let(:javascript) { '/script.js' }
|
341
415
|
let(:body) do
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
%{</body>} +
|
352
|
-
%{</html>}
|
416
|
+
<<-HTML
|
417
|
+
<html>
|
418
|
+
<head>
|
419
|
+
<script src="#{javascript}" />
|
420
|
+
</head>
|
421
|
+
<body>
|
422
|
+
</body>
|
423
|
+
</html>
|
424
|
+
HTML
|
353
425
|
end
|
354
426
|
|
355
427
|
it "should return an Array of links" do
|
356
428
|
expect(subject.links).to be == [
|
357
|
-
link,
|
358
|
-
frame,
|
359
|
-
iframe,
|
360
|
-
stylesheet,
|
361
429
|
javascript
|
362
430
|
]
|
363
431
|
end
|
@@ -369,32 +437,100 @@ describe Page do
|
|
369
437
|
end
|
370
438
|
|
371
439
|
describe "#each_url" do
|
372
|
-
context "when the page contains
|
440
|
+
context "when the page contains an 'a' link" do
|
373
441
|
let(:link) { '/link' }
|
442
|
+
let(:body) do
|
443
|
+
<<-HTML
|
444
|
+
<html>
|
445
|
+
<body>
|
446
|
+
<a href="#{link}">link</a>
|
447
|
+
</body>
|
448
|
+
</html>
|
449
|
+
HTML
|
450
|
+
end
|
451
|
+
|
452
|
+
it "should yield successive absolute URIs" do
|
453
|
+
expect { |b| subject.each_url(&b) }.to yield_successive_args(
|
454
|
+
URI("http://#{host}#{link}")
|
455
|
+
)
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
459
|
+
context "when the page contains a 'frame'" do
|
374
460
|
let(:frame) { '/frame' }
|
461
|
+
let(:body) do
|
462
|
+
<<-HTML
|
463
|
+
<html>
|
464
|
+
<frameset>
|
465
|
+
<frame src="#{frame}"></frame>
|
466
|
+
</frameset>
|
467
|
+
</html>
|
468
|
+
HTML
|
469
|
+
end
|
470
|
+
|
471
|
+
it "should yield successive absolute URIs" do
|
472
|
+
expect { |b| subject.each_url(&b) }.to yield_successive_args(
|
473
|
+
URI("http://#{host}#{frame}")
|
474
|
+
)
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
context "when the page contains a 'iframe'" do
|
375
479
|
let(:iframe) { '/iframe' }
|
480
|
+
let(:body) do
|
481
|
+
<<-HTML
|
482
|
+
<html>
|
483
|
+
<body>
|
484
|
+
<iframe src="#{iframe}"></iframe>
|
485
|
+
</body>
|
486
|
+
</html>
|
487
|
+
HTML
|
488
|
+
end
|
489
|
+
|
490
|
+
it "should yield successive absolute URIs" do
|
491
|
+
expect { |b| subject.each_url(&b) }.to yield_successive_args(
|
492
|
+
URI("http://#{host}#{iframe}")
|
493
|
+
)
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
context "when the page contains a 'link' element" do
|
376
498
|
let(:stylesheet) { '/stylesheet.css' }
|
377
|
-
let(:javascript) { '/script.js' }
|
378
499
|
let(:body) do
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
500
|
+
<<-HTML
|
501
|
+
<html>
|
502
|
+
<head>
|
503
|
+
<link type="stylesheet" href="#{stylesheet}" />
|
504
|
+
</head>
|
505
|
+
<body>
|
506
|
+
</body>
|
507
|
+
</html>
|
508
|
+
HTML
|
509
|
+
end
|
510
|
+
|
511
|
+
it "should yield successive absolute URIs" do
|
512
|
+
expect { |b| subject.each_url(&b) }.to yield_successive_args(
|
513
|
+
URI("http://#{host}#{stylesheet}")
|
514
|
+
)
|
390
515
|
end
|
516
|
+
end
|
391
517
|
|
392
|
-
|
518
|
+
context "when the page contains a 'script' element" do
|
519
|
+
let(:javascript) { '/script.js' }
|
520
|
+
let(:body) do
|
521
|
+
<<-HTML
|
522
|
+
<html>
|
523
|
+
<head>
|
524
|
+
<script src="#{javascript}" />
|
525
|
+
</head>
|
526
|
+
<body>
|
527
|
+
</body>
|
528
|
+
</html>
|
529
|
+
HTML
|
530
|
+
end
|
531
|
+
|
532
|
+
it "should yield successive absolute URIs" do
|
393
533
|
expect { |b| subject.each_url(&b) }.to yield_successive_args(
|
394
|
-
URI("http://#{host}#{link}"),
|
395
|
-
URI("http://#{host}#{frame}"),
|
396
|
-
URI("http://#{host}#{iframe}"),
|
397
|
-
URI("http://#{host}#{stylesheet}"),
|
398
534
|
URI("http://#{host}#{javascript}")
|
399
535
|
)
|
400
536
|
end
|
@@ -410,32 +546,100 @@ describe Page do
|
|
410
546
|
end
|
411
547
|
|
412
548
|
describe "#urls" do
|
413
|
-
context "when the page contains
|
549
|
+
context "when the page contains an 'a' link" do
|
414
550
|
let(:link) { '/link' }
|
551
|
+
let(:body) do
|
552
|
+
<<-HTML
|
553
|
+
<html>
|
554
|
+
<body>
|
555
|
+
<a href="#{link}">link</a>
|
556
|
+
</body>
|
557
|
+
</html>
|
558
|
+
HTML
|
559
|
+
end
|
560
|
+
|
561
|
+
it "should return an Array of absolute URIs" do
|
562
|
+
expect(subject.urls).to be == [
|
563
|
+
URI("http://#{host}#{link}")
|
564
|
+
]
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
context "when the page contains a 'frame'" do
|
415
569
|
let(:frame) { '/frame' }
|
570
|
+
let(:body) do
|
571
|
+
<<-HTML
|
572
|
+
<html>
|
573
|
+
<frameset>
|
574
|
+
<frame src="#{frame}"></frame>
|
575
|
+
</frameset>
|
576
|
+
</html>
|
577
|
+
HTML
|
578
|
+
end
|
579
|
+
|
580
|
+
it "should return an Array of absolute URIs" do
|
581
|
+
expect(subject.urls).to be == [
|
582
|
+
URI("http://#{host}#{frame}")
|
583
|
+
]
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
context "when the page contains a 'iframe'" do
|
416
588
|
let(:iframe) { '/iframe' }
|
589
|
+
let(:body) do
|
590
|
+
<<-HTML
|
591
|
+
<html>
|
592
|
+
<body>
|
593
|
+
<iframe src="#{iframe}"></iframe>
|
594
|
+
</body>
|
595
|
+
</html>
|
596
|
+
HTML
|
597
|
+
end
|
598
|
+
|
599
|
+
it "should return an Array of absolute URIs" do
|
600
|
+
expect(subject.urls).to be == [
|
601
|
+
URI("http://#{host}#{iframe}")
|
602
|
+
]
|
603
|
+
end
|
604
|
+
end
|
605
|
+
|
606
|
+
context "when the page contains a 'link' element" do
|
417
607
|
let(:stylesheet) { '/stylesheet.css' }
|
608
|
+
let(:body) do
|
609
|
+
<<-HTML
|
610
|
+
<html>
|
611
|
+
<head>
|
612
|
+
<link type="stylesheet" href="#{stylesheet}" />
|
613
|
+
</head>
|
614
|
+
<body>
|
615
|
+
</body>
|
616
|
+
</html>
|
617
|
+
HTML
|
618
|
+
end
|
619
|
+
|
620
|
+
it "should return an Array of absolute URIs" do
|
621
|
+
expect(subject.urls).to be == [
|
622
|
+
URI("http://#{host}#{stylesheet}")
|
623
|
+
]
|
624
|
+
end
|
625
|
+
end
|
626
|
+
|
627
|
+
context "when the page contains a 'script' element" do
|
418
628
|
let(:javascript) { '/script.js' }
|
419
629
|
let(:body) do
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
%{</body>} +
|
430
|
-
%{</html>}
|
630
|
+
<<-HTML
|
631
|
+
<html>
|
632
|
+
<head>
|
633
|
+
<script src="#{javascript}" />
|
634
|
+
</head>
|
635
|
+
<body>
|
636
|
+
</body>
|
637
|
+
</html>
|
638
|
+
HTML
|
431
639
|
end
|
432
640
|
|
433
641
|
it "should return an Array of absolute URIs" do
|
434
642
|
expect(subject.urls).to be == [
|
435
|
-
URI("http://#{host}#{link}"),
|
436
|
-
URI("http://#{host}#{frame}"),
|
437
|
-
URI("http://#{host}#{iframe}"),
|
438
|
-
URI("http://#{host}#{stylesheet}"),
|
439
643
|
URI("http://#{host}#{javascript}")
|
440
644
|
]
|
441
645
|
end
|
@@ -26,10 +26,6 @@ describe Page do
|
|
26
26
|
include_examples "status code method", :is_ok?, {200 => true, 500 => false}
|
27
27
|
end
|
28
28
|
|
29
|
-
describe "#timedout?" do
|
30
|
-
include_examples "status code method", :timedout?, {308 => true, 200 => false}
|
31
|
-
end
|
32
|
-
|
33
29
|
describe "#bad_request?" do
|
34
30
|
include_examples "status code method", :bad_request?, {400 => true, 200 => false}
|
35
31
|
end
|
@@ -46,6 +42,10 @@ describe Page do
|
|
46
42
|
include_examples "status code method", :is_missing?, {404 => true, 200 => false}
|
47
43
|
end
|
48
44
|
|
45
|
+
describe "#is_timedout?" do
|
46
|
+
include_examples "status code method", :is_timedout?, {408 => true, 200 => false}
|
47
|
+
end
|
48
|
+
|
49
49
|
describe "#had_internal_server_error?" do
|
50
50
|
include_examples "status code method", :had_internal_server_error?, {500 => true, 200 => false}
|
51
51
|
end
|
data/spec/proxy_spec.rb
CHANGED
@@ -26,13 +26,13 @@ describe Spidr::Proxy do
|
|
26
26
|
it { expect(subject.enabled?).to be true }
|
27
27
|
end
|
28
28
|
|
29
|
-
context "when
|
29
|
+
context "when host is not set" do
|
30
30
|
it { expect(subject.enabled?).to be false }
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
34
|
describe "#disabled?" do
|
35
|
-
context "when
|
35
|
+
context "when host is not set" do
|
36
36
|
it { expect(subject.disabled?).to be true }
|
37
37
|
end
|
38
38
|
|
@@ -16,7 +16,7 @@ shared_examples "includes Spidr::Settings::Proxy" do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
it "should retain the default value" do
|
19
|
-
expect(subject.proxy.object_id).to be
|
19
|
+
expect(subject.proxy.object_id).to be(subject.proxy.object_id)
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
@@ -26,7 +26,7 @@ shared_examples "includes Spidr::Settings::Proxy" do
|
|
26
26
|
end
|
27
27
|
|
28
28
|
it "should return the set @proxy" do
|
29
|
-
expect(subject.proxy).to be
|
29
|
+
expect(subject.proxy).to be(proxy)
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
@@ -35,12 +35,10 @@ shared_examples "includes Spidr::Settings::Proxy" do
|
|
35
35
|
context "when given a Proxy object" do
|
36
36
|
let(:proxy) { Proxy.new(host: proxy_host, port: proxy_port) }
|
37
37
|
|
38
|
-
before
|
39
|
-
subject.proxy = proxy
|
40
|
-
end
|
38
|
+
before { subject.proxy = proxy }
|
41
39
|
|
42
40
|
it "should save it" do
|
43
|
-
expect(subject.proxy).to be
|
41
|
+
expect(subject.proxy).to be(proxy)
|
44
42
|
end
|
45
43
|
end
|
46
44
|
|
@@ -51,15 +49,37 @@ shared_examples "includes Spidr::Settings::Proxy" do
|
|
51
49
|
|
52
50
|
it "should create a new Proxy object" do
|
53
51
|
expect(subject.proxy).to be_kind_of(Proxy)
|
54
|
-
expect(subject.proxy[:host]).to be
|
55
|
-
expect(subject.proxy[:port]).to be
|
52
|
+
expect(subject.proxy[:host]).to be(proxy_host)
|
53
|
+
expect(subject.proxy[:port]).to be(proxy_port)
|
56
54
|
end
|
57
55
|
end
|
58
56
|
|
59
|
-
context "when given
|
60
|
-
|
61
|
-
|
57
|
+
context "when given a URI::HTTP" do
|
58
|
+
let(:uri) { URI::HTTP.build(host: proxy_host, port: proxy_port) }
|
59
|
+
|
60
|
+
before { subject.proxy = uri }
|
61
|
+
|
62
|
+
it "should create a new Proxy object based on the URI" do
|
63
|
+
expect(subject.proxy).to be_kind_of(Proxy)
|
64
|
+
expect(subject.proxy[:host]).to eq(proxy_host)
|
65
|
+
expect(subject.proxy[:port]).to eq(proxy_port)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
context "when given a String" do
|
70
|
+
let(:url) { "http://#{proxy_host}:#{proxy_port}" }
|
71
|
+
|
72
|
+
before { subject.proxy = url }
|
73
|
+
|
74
|
+
it "should parse the String as a URI and create a new Proxy object" do
|
75
|
+
expect(subject.proxy).to be_kind_of(Proxy)
|
76
|
+
expect(subject.proxy[:host]).to eq(proxy_host)
|
77
|
+
expect(subject.proxy[:port]).to eq(proxy_port)
|
62
78
|
end
|
79
|
+
end
|
80
|
+
|
81
|
+
context "when given nil" do
|
82
|
+
before { subject.proxy = nil }
|
63
83
|
|
64
84
|
it "should leave an empty proxy" do
|
65
85
|
expect(subject.proxy).to be_kind_of(Proxy)
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spidr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - ~>
|
31
|
+
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '2.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - ~>
|
38
|
+
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '2.0'
|
41
41
|
description: Spidr is a versatile Ruby web spidering library that can spider a site,
|
42
42
|
multiple domains, certain links or infinitely. Spidr is designed to be fast and
|
43
43
|
easy to use.
|
@@ -49,10 +49,11 @@ extra_rdoc_files:
|
|
49
49
|
- LICENSE.txt
|
50
50
|
- README.md
|
51
51
|
files:
|
52
|
-
- .
|
53
|
-
- .
|
54
|
-
- .
|
55
|
-
- .
|
52
|
+
- ".editorconfig"
|
53
|
+
- ".github/workflows/ruby.yml"
|
54
|
+
- ".gitignore"
|
55
|
+
- ".rspec"
|
56
|
+
- ".yardopts"
|
56
57
|
- ChangeLog.md
|
57
58
|
- Gemfile
|
58
59
|
- LICENSE.txt
|
@@ -112,24 +113,23 @@ homepage: https://github.com/postmodern/spidr#readme
|
|
112
113
|
licenses:
|
113
114
|
- MIT
|
114
115
|
metadata: {}
|
115
|
-
post_install_message:
|
116
|
+
post_install_message:
|
116
117
|
rdoc_options: []
|
117
118
|
require_paths:
|
118
119
|
- lib
|
119
120
|
required_ruby_version: !ruby/object:Gem::Requirement
|
120
121
|
requirements:
|
121
|
-
- -
|
122
|
+
- - ">="
|
122
123
|
- !ruby/object:Gem::Version
|
123
124
|
version: 2.0.0
|
124
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
125
126
|
requirements:
|
126
|
-
- -
|
127
|
+
- - ">="
|
127
128
|
- !ruby/object:Gem::Version
|
128
129
|
version: '0'
|
129
130
|
requirements: []
|
130
|
-
|
131
|
-
|
132
|
-
signing_key:
|
131
|
+
rubygems_version: 3.3.26
|
132
|
+
signing_key:
|
133
133
|
specification_version: 4
|
134
134
|
summary: A versatile Ruby web spidering library
|
135
135
|
test_files: []
|