ronin-web-spider 0.2.0.rc1 → 0.2.0.rc3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/lib/ronin/web/spider/agent.rb +25 -10
- data/lib/ronin/web/spider/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ab74fe34ec9f37cba9a8269fa6df15cd4f5404d8b01904ead735fe7d98159ace
|
4
|
+
data.tar.gz: 0d8547dfdd92cef99193fe79c5a944500adb04ab80c6c0a89b000f727e5fafbe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e10c97975ce2dd40b80ec204ac39f2fcecb74ee62af2dd0758e662214c5ce806352107309ece72fd38b0724e8de5e022692661a987975e6bb6266162d958749
|
7
|
+
data.tar.gz: 619387f4795f1efcea2d88e3e06162a05f33aabe9b1cae4da1267e5106ad94e97475e3078ae39497a2616f0636dfa362cc902185f9fc30b9683ca8c92c63d7ae
|
data/README.md
CHANGED
@@ -32,6 +32,14 @@ ronin-web-spider is a collection of common web spidering routines using the
|
|
32
32
|
* [every_javascript_string][docs-every_javascript_string] - yields every
|
33
33
|
single-quoted or double-quoted String literal from all JavaScript source
|
34
34
|
code.
|
35
|
+
* [every_javascript_relative_path_string][docs-every_javascript_relative_path_string] -
|
36
|
+
yields every relative path JavaScript string (ex: `foo/bar`).
|
37
|
+
* [every_javascript_absolute_path_string][docs-every_javascript_absolute_path_string] -
|
38
|
+
yields every relative path JavaScript string (ex: `/foo/bar`).
|
39
|
+
* [every_javascript_path_string][docs-every_javascript_path_string] -
|
40
|
+
yields every relative path JavaScript string (ex: `foo/bar` or `/foo/bar`).
|
41
|
+
* [every_javascript_url_string][docs-every_javascript_url_string] -
|
42
|
+
yields every URL JavaScript string (ex: `https://example.com/foo/bar`).
|
35
43
|
* [every_javascript_comment][docs-every_javascript_comment] - yields every
|
36
44
|
JavaScript comment.
|
37
45
|
* [every_comment][docs-every_comment] - yields every HTML or JavaScript
|
@@ -46,6 +54,10 @@ ronin-web-spider is a collection of common web spidering routines using the
|
|
46
54
|
[docs-every_html_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_html_comment-instance_method
|
47
55
|
[docs-every_javascript]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript-instance_method
|
48
56
|
[docs-every_javascript_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_string-instance_method
|
57
|
+
[docs-every_javascript_relative_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_relative_path_string-instance_method
|
58
|
+
[docs-every_javascript_absolute_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_absolute_path_string-instance_method
|
59
|
+
[docs-every_javascript_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_path_string-instance_method
|
60
|
+
[docs-every_javascript_url_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_url_string-instance_method
|
49
61
|
[docs-every_javascript_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_comment-instance_method
|
50
62
|
[docs-every_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_comment-instance_method
|
51
63
|
|
@@ -325,9 +325,9 @@ module Ronin
|
|
325
325
|
# @api private
|
326
326
|
#
|
327
327
|
# @since 0.1.1
|
328
|
-
|
328
|
+
JAVASCRIPT_INLINE_REGEX_REGEX = %r{
|
329
329
|
(?# match before the regex to avoid matching division operators )
|
330
|
-
(?:[\{\[\(;:,]\s*|=\s*)
|
330
|
+
(?:[\{\[\(;:,]\s*|=\s*|return\s*)
|
331
331
|
/
|
332
332
|
(?# inline regex contents )
|
333
333
|
(?:
|
@@ -350,7 +350,7 @@ module Ronin
|
|
350
350
|
# @api private
|
351
351
|
#
|
352
352
|
# @since 0.1.1
|
353
|
-
|
353
|
+
JAVASCRIPT_TEMPLATE_LITERAL_REGEX = /`(?:\\`|[^`])+`/m
|
354
354
|
|
355
355
|
#
|
356
356
|
# Passes every JavaScript string value to the given block.
|
@@ -395,8 +395,8 @@ module Ronin
|
|
395
395
|
yield string
|
396
396
|
end
|
397
397
|
else
|
398
|
-
scanner.skip(
|
399
|
-
scanner.skip(
|
398
|
+
scanner.skip(JAVASCRIPT_INLINE_REGEX_REGEX) ||
|
399
|
+
scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL_REGEX) ||
|
400
400
|
scanner.getch
|
401
401
|
end
|
402
402
|
end
|
@@ -410,7 +410,11 @@ module Ronin
|
|
410
410
|
# @note
|
411
411
|
# This matches `foo/bar`, `foo/bar.ext`, `../foo`, and `foo.ext`,
|
412
412
|
# but *not* `/foo`, `foo`, or `foo.`.
|
413
|
-
|
413
|
+
#
|
414
|
+
# @api private
|
415
|
+
#
|
416
|
+
# @since 0.2.0
|
417
|
+
JAVASCRIPT_RELATIVE_PATH_REGEX = %r{
|
414
418
|
\A
|
415
419
|
(?:
|
416
420
|
[^/\\. ]+\.[a-z0-9]+ (?# filename.ext)
|
@@ -450,7 +454,7 @@ module Ronin
|
|
450
454
|
#
|
451
455
|
def every_javascript_relative_path_string(&block)
|
452
456
|
every_javascript_string do |string,page|
|
453
|
-
if string =~
|
457
|
+
if string =~ JAVASCRIPT_RELATIVE_PATH_REGEX
|
454
458
|
if block.arity == 2
|
455
459
|
yield string, page
|
456
460
|
else
|
@@ -463,7 +467,11 @@ module Ronin
|
|
463
467
|
alias every_js_relative_path_string every_javascript_relative_path_string
|
464
468
|
|
465
469
|
# Regular expression that matches absolute paths within JavaScript.
|
466
|
-
|
470
|
+
#
|
471
|
+
# @api private
|
472
|
+
#
|
473
|
+
# @since 0.2.0
|
474
|
+
JAVASCRIPT_ABSOLUTE_PATH_REGEX = %r{\A(?:/[^/\\ ]+)+\z}
|
467
475
|
|
468
476
|
#
|
469
477
|
# Passes every JavaScript absolute path string to the given block.
|
@@ -495,7 +503,7 @@ module Ronin
|
|
495
503
|
#
|
496
504
|
def every_javascript_absolute_path_string(&block)
|
497
505
|
every_javascript_string do |string,page|
|
498
|
-
if string =~
|
506
|
+
if string =~ JAVASCRIPT_ABSOLUTE_PATH_REGEX
|
499
507
|
if block.arity == 2
|
500
508
|
yield string, page
|
501
509
|
else
|
@@ -541,6 +549,13 @@ module Ronin
|
|
541
549
|
|
542
550
|
alias every_js_path_string every_javascript_path_string
|
543
551
|
|
552
|
+
# Regular expression for identifying URLs.
|
553
|
+
#
|
554
|
+
# @api private
|
555
|
+
#
|
556
|
+
# @since 0.2.0
|
557
|
+
URL_REGEX = /\A#{Support::Text::Patterns::URL}\z/
|
558
|
+
|
544
559
|
#
|
545
560
|
# Passes every JavaScript URL string to the given block.
|
546
561
|
#
|
@@ -570,7 +585,7 @@ module Ronin
|
|
570
585
|
#
|
571
586
|
def every_javascript_url_string(&block)
|
572
587
|
every_javascript_string do |string,page|
|
573
|
-
if string =~
|
588
|
+
if string =~ URL_REGEX
|
574
589
|
if block.arity == 2
|
575
590
|
yield string, page
|
576
591
|
else
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ronin-web-spider
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.
|
4
|
+
version: 0.2.0.rc3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: spidr
|