ronin-web-spider 0.2.0.rc1 → 0.2.0.rc2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ronin/web/spider/agent.rb +24 -9
- data/lib/ronin/web/spider/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7aedd94cd4b3f96a07824419722763a91374590e6944ab3ae58c6ff1432837ca
|
4
|
+
data.tar.gz: 67d2d63b5468838f60f8b3aa3975dcb8f0842f66ca1d776384fb29c6a7f6a8ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9e7588f16084226b812db561261122af81fea2bd2d4c1b529c7cc1763e48060c90f716475e930029159f9cd1f886866f47ea03be8c4131c850af0b6d73da8c0
|
7
|
+
data.tar.gz: 253283e0e5f8046d4d41fb9bf445933017e81b92e72c817aa0d4c6850c4952e0ec687e727196707f71e0343a97ba0f492a409e070a3c00e8d0fd8640feda75d5
|
@@ -325,7 +325,7 @@ module Ronin
|
|
325
325
|
# @api private
|
326
326
|
#
|
327
327
|
# @since 0.1.1
|
328
|
-
|
328
|
+
JAVASCRIPT_INLINE_REGEX_REGEX = %r{
|
329
329
|
(?# match before the regex to avoid matching division operators )
|
330
330
|
(?:[\{\[\(;:,]\s*|=\s*)
|
331
331
|
/
|
@@ -350,7 +350,7 @@ module Ronin
|
|
350
350
|
# @api private
|
351
351
|
#
|
352
352
|
# @since 0.1.1
|
353
|
-
|
353
|
+
JAVASCRIPT_TEMPLATE_LITERAL_REGEX = /`(?:\\`|[^`])+`/m
|
354
354
|
|
355
355
|
#
|
356
356
|
# Passes every JavaScript string value to the given block.
|
@@ -395,8 +395,8 @@ module Ronin
|
|
395
395
|
yield string
|
396
396
|
end
|
397
397
|
else
|
398
|
-
scanner.skip(
|
399
|
-
scanner.skip(
|
398
|
+
scanner.skip(JAVASCRIPT_INLINE_REGEX_REGEX) ||
|
399
|
+
scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL_REGEX) ||
|
400
400
|
scanner.getch
|
401
401
|
end
|
402
402
|
end
|
@@ -410,7 +410,11 @@ module Ronin
|
|
410
410
|
# @note
|
411
411
|
# This matches `foo/bar`, `foo/bar.ext`, `../foo`, and `foo.ext`,
|
412
412
|
# but *not* `/foo`, `foo`, or `foo.`.
|
413
|
-
|
413
|
+
#
|
414
|
+
# @api private
|
415
|
+
#
|
416
|
+
# @since 0.2.0
|
417
|
+
JAVASCRIPT_RELATIVE_PATH_REGEX = %r{
|
414
418
|
\A
|
415
419
|
(?:
|
416
420
|
[^/\\. ]+\.[a-z0-9]+ (?# filename.ext)
|
@@ -450,7 +454,7 @@ module Ronin
|
|
450
454
|
#
|
451
455
|
def every_javascript_relative_path_string(&block)
|
452
456
|
every_javascript_string do |string,page|
|
453
|
-
if string =~
|
457
|
+
if string =~ JAVASCRIPT_RELATIVE_PATH_REGEX
|
454
458
|
if block.arity == 2
|
455
459
|
yield string, page
|
456
460
|
else
|
@@ -463,7 +467,11 @@ module Ronin
|
|
463
467
|
alias every_js_relative_path_string every_javascript_relative_path_string
|
464
468
|
|
465
469
|
# Regular expression that matches absolute paths within JavaScript.
|
466
|
-
|
470
|
+
#
|
471
|
+
# @api private
|
472
|
+
#
|
473
|
+
# @since 0.2.0
|
474
|
+
JAVASCRIPT_ABSOLUTE_PATH_REGEX = %r{\A(?:/[^/\\ ]+)+\z}
|
467
475
|
|
468
476
|
#
|
469
477
|
# Passes every JavaScript absolute path string to the given block.
|
@@ -495,7 +503,7 @@ module Ronin
|
|
495
503
|
#
|
496
504
|
def every_javascript_absolute_path_string(&block)
|
497
505
|
every_javascript_string do |string,page|
|
498
|
-
if string =~
|
506
|
+
if string =~ JAVASCRIPT_ABSOLUTE_PATH_REGEX
|
499
507
|
if block.arity == 2
|
500
508
|
yield string, page
|
501
509
|
else
|
@@ -541,6 +549,13 @@ module Ronin
|
|
541
549
|
|
542
550
|
alias every_js_path_string every_javascript_path_string
|
543
551
|
|
552
|
+
# Regular expression for identifying URLs.
|
553
|
+
#
|
554
|
+
# @api private
|
555
|
+
#
|
556
|
+
# @since 0.2.0
|
557
|
+
URL_REGEX = /\A#{Support::Text::Patterns::URL}\z/
|
558
|
+
|
544
559
|
#
|
545
560
|
# Passes every JavaScript URL string to the given block.
|
546
561
|
#
|
@@ -570,7 +585,7 @@ module Ronin
|
|
570
585
|
#
|
571
586
|
def every_javascript_url_string(&block)
|
572
587
|
every_javascript_string do |string,page|
|
573
|
-
if string =~
|
588
|
+
if string =~ URL_REGEX
|
574
589
|
if block.arity == 2
|
575
590
|
yield string, page
|
576
591
|
else
|