ronin-web-spider 0.2.0.rc1 → 0.2.0.rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ronin/web/spider/agent.rb +24 -9
- data/lib/ronin/web/spider/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7aedd94cd4b3f96a07824419722763a91374590e6944ab3ae58c6ff1432837ca
|
4
|
+
data.tar.gz: 67d2d63b5468838f60f8b3aa3975dcb8f0842f66ca1d776384fb29c6a7f6a8ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9e7588f16084226b812db561261122af81fea2bd2d4c1b529c7cc1763e48060c90f716475e930029159f9cd1f886866f47ea03be8c4131c850af0b6d73da8c0
|
7
|
+
data.tar.gz: 253283e0e5f8046d4d41fb9bf445933017e81b92e72c817aa0d4c6850c4952e0ec687e727196707f71e0343a97ba0f492a409e070a3c00e8d0fd8640feda75d5
|
@@ -325,7 +325,7 @@ module Ronin
|
|
325
325
|
# @api private
|
326
326
|
#
|
327
327
|
# @since 0.1.1
|
328
|
-
|
328
|
+
JAVASCRIPT_INLINE_REGEX_REGEX = %r{
|
329
329
|
(?# match before the regex to avoid matching division operators )
|
330
330
|
(?:[\{\[\(;:,]\s*|=\s*)
|
331
331
|
/
|
@@ -350,7 +350,7 @@ module Ronin
|
|
350
350
|
# @api private
|
351
351
|
#
|
352
352
|
# @since 0.1.1
|
353
|
-
|
353
|
+
JAVASCRIPT_TEMPLATE_LITERAL_REGEX = /`(?:\\`|[^`])+`/m
|
354
354
|
|
355
355
|
#
|
356
356
|
# Passes every JavaScript string value to the given block.
|
@@ -395,8 +395,8 @@ module Ronin
|
|
395
395
|
yield string
|
396
396
|
end
|
397
397
|
else
|
398
|
-
scanner.skip(
|
399
|
-
scanner.skip(
|
398
|
+
scanner.skip(JAVASCRIPT_INLINE_REGEX_REGEX) ||
|
399
|
+
scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL_REGEX) ||
|
400
400
|
scanner.getch
|
401
401
|
end
|
402
402
|
end
|
@@ -410,7 +410,11 @@ module Ronin
|
|
410
410
|
# @note
|
411
411
|
# This matches `foo/bar`, `foo/bar.ext`, `../foo`, and `foo.ext`,
|
412
412
|
# but *not* `/foo`, `foo`, or `foo.`.
|
413
|
-
|
413
|
+
#
|
414
|
+
# @api private
|
415
|
+
#
|
416
|
+
# @since 0.2.0
|
417
|
+
JAVASCRIPT_RELATIVE_PATH_REGEX = %r{
|
414
418
|
\A
|
415
419
|
(?:
|
416
420
|
[^/\\. ]+\.[a-z0-9]+ (?# filename.ext)
|
@@ -450,7 +454,7 @@ module Ronin
|
|
450
454
|
#
|
451
455
|
def every_javascript_relative_path_string(&block)
|
452
456
|
every_javascript_string do |string,page|
|
453
|
-
if string =~
|
457
|
+
if string =~ JAVASCRIPT_RELATIVE_PATH_REGEX
|
454
458
|
if block.arity == 2
|
455
459
|
yield string, page
|
456
460
|
else
|
@@ -463,7 +467,11 @@ module Ronin
|
|
463
467
|
alias every_js_relative_path_string every_javascript_relative_path_string
|
464
468
|
|
465
469
|
# Regular expression that matches absolute paths within JavaScript.
|
466
|
-
|
470
|
+
#
|
471
|
+
# @api private
|
472
|
+
#
|
473
|
+
# @since 0.2.0
|
474
|
+
JAVASCRIPT_ABSOLUTE_PATH_REGEX = %r{\A(?:/[^/\\ ]+)+\z}
|
467
475
|
|
468
476
|
#
|
469
477
|
# Passes every JavaScript absolute path string to the given block.
|
@@ -495,7 +503,7 @@ module Ronin
|
|
495
503
|
#
|
496
504
|
def every_javascript_absolute_path_string(&block)
|
497
505
|
every_javascript_string do |string,page|
|
498
|
-
if string =~
|
506
|
+
if string =~ JAVASCRIPT_ABSOLUTE_PATH_REGEX
|
499
507
|
if block.arity == 2
|
500
508
|
yield string, page
|
501
509
|
else
|
@@ -541,6 +549,13 @@ module Ronin
|
|
541
549
|
|
542
550
|
alias every_js_path_string every_javascript_path_string
|
543
551
|
|
552
|
+
# Regular expression for identifying URLs.
|
553
|
+
#
|
554
|
+
# @api private
|
555
|
+
#
|
556
|
+
# @since 0.2.0
|
557
|
+
URL_REGEX = /\A#{Support::Text::Patterns::URL}\z/
|
558
|
+
|
544
559
|
#
|
545
560
|
# Passes every JavaScript URL string to the given block.
|
546
561
|
#
|
@@ -570,7 +585,7 @@ module Ronin
|
|
570
585
|
#
|
571
586
|
def every_javascript_url_string(&block)
|
572
587
|
every_javascript_string do |string,page|
|
573
|
-
if string =~
|
588
|
+
if string =~ URL_REGEX
|
574
589
|
if block.arity == 2
|
575
590
|
yield string, page
|
576
591
|
else
|