ronin-web-spider 0.2.0.rc1 → 0.2.0.rc3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1637185a37e17f587cab3bb0ec451dfa763ab58b167404ec5b1161a4a80316f
4
- data.tar.gz: ea11da89c3c232feaca90c103c45cdb653eac9b94c6c97be34998d6b5089e896
3
+ metadata.gz: ab74fe34ec9f37cba9a8269fa6df15cd4f5404d8b01904ead735fe7d98159ace
4
+ data.tar.gz: 0d8547dfdd92cef99193fe79c5a944500adb04ab80c6c0a89b000f727e5fafbe
5
5
  SHA512:
6
- metadata.gz: 4d2f5ac7b650096856b87f5e37cf42044a12db416eab2375715a39073606a1f4c30103fda27cb1faaf9a53533bdf323ae9912787078f24f120366fb519a3b4a1
7
- data.tar.gz: b2ae98ce51187a5a65cd2355816b98d1916edb0d731f158e83a9366ec70261ef2e3eae9d5f3abe95b311a766c489064f855b4ee6afa4606592a5f9b560fbcb9d
6
+ metadata.gz: 0e10c97975ce2dd40b80ec204ac39f2fcecb74ee62af2dd0758e662214c5ce806352107309ece72fd38b0724e8de5e022692661a987975e6bb6266162d958749
7
+ data.tar.gz: 619387f4795f1efcea2d88e3e06162a05f33aabe9b1cae4da1267e5106ad94e97475e3078ae39497a2616f0636dfa362cc902185f9fc30b9683ca8c92c63d7ae
data/README.md CHANGED
@@ -32,6 +32,14 @@ ronin-web-spider is a collection of common web spidering routines using the
32
32
  * [every_javascript_string][docs-every_javascript_string] - yields every
33
33
  single-quoted or double-quoted String literal from all JavaScript source
34
34
  code.
35
+ * [every_javascript_relative_path_string][docs-every_javascript_relative_path_string] -
36
+ yields every relative path JavaScript string (ex: `foo/bar`).
37
+ * [every_javascript_absolute_path_string][docs-every_javascript_absolute_path_string] -
38
+ yields every relative path JavaScript string (ex: `/foo/bar`).
39
+ * [every_javascript_path_string][docs-every_javascript_path_string] -
40
+ yields every relative path JavaScript string (ex: `foo/bar` or `/foo/bar`).
41
+ * [every_javascript_url_string][docs-every_javascript_url_string] -
42
+ yields every URL JavaScript string (ex: `https://example.com/foo/bar`).
35
43
  * [every_javascript_comment][docs-every_javascript_comment] - yields every
36
44
  JavaScript comment.
37
45
  * [every_comment][docs-every_comment] - yields every HTML or JavaScript
@@ -46,6 +54,10 @@ ronin-web-spider is a collection of common web spidering routines using the
46
54
  [docs-every_html_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_html_comment-instance_method
47
55
  [docs-every_javascript]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript-instance_method
48
56
  [docs-every_javascript_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_string-instance_method
57
+ [docs-every_javascript_relative_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_relative_path_string-instance_method
58
+ [docs-every_javascript_absolute_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_absolute_path_string-instance_method
59
+ [docs-every_javascript_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_path_string-instance_method
60
+ [docs-every_javascript_url_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_url_string-instance_method
49
61
  [docs-every_javascript_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_comment-instance_method
50
62
  [docs-every_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_comment-instance_method
51
63
 
@@ -325,9 +325,9 @@ module Ronin
325
325
  # @api private
326
326
  #
327
327
  # @since 0.1.1
328
- JAVASCRIPT_INLINE_REGEX = %r{
328
+ JAVASCRIPT_INLINE_REGEX_REGEX = %r{
329
329
  (?# match before the regex to avoid matching division operators )
330
- (?:[\{\[\(;:,]\s*|=\s*)
330
+ (?:[\{\[\(;:,]\s*|=\s*|return\s*)
331
331
  /
332
332
  (?# inline regex contents )
333
333
  (?:
@@ -350,7 +350,7 @@ module Ronin
350
350
  # @api private
351
351
  #
352
352
  # @since 0.1.1
353
- JAVASCRIPT_TEMPLATE_LITERAL = /`(?:\\`|[^`])+`/m
353
+ JAVASCRIPT_TEMPLATE_LITERAL_REGEX = /`(?:\\`|[^`])+`/m
354
354
 
355
355
  #
356
356
  # Passes every JavaScript string value to the given block.
@@ -395,8 +395,8 @@ module Ronin
395
395
  yield string
396
396
  end
397
397
  else
398
- scanner.skip(JAVASCRIPT_INLINE_REGEX) ||
399
- scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL) ||
398
+ scanner.skip(JAVASCRIPT_INLINE_REGEX_REGEX) ||
399
+ scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL_REGEX) ||
400
400
  scanner.getch
401
401
  end
402
402
  end
@@ -410,7 +410,11 @@ module Ronin
410
410
  # @note
411
411
  # This matches `foo/bar`, `foo/bar.ext`, `../foo`, and `foo.ext`,
412
412
  # but *not* `/foo`, `foo`, or `foo.`.
413
- JAVASCRIPT_RELATIVE_PATH = %r{
413
+ #
414
+ # @api private
415
+ #
416
+ # @since 0.2.0
417
+ JAVASCRIPT_RELATIVE_PATH_REGEX = %r{
414
418
  \A
415
419
  (?:
416
420
  [^/\\. ]+\.[a-z0-9]+ (?# filename.ext)
@@ -450,7 +454,7 @@ module Ronin
450
454
  #
451
455
  def every_javascript_relative_path_string(&block)
452
456
  every_javascript_string do |string,page|
453
- if string =~ JAVASCRIPT_RELATIVE_PATH
457
+ if string =~ JAVASCRIPT_RELATIVE_PATH_REGEX
454
458
  if block.arity == 2
455
459
  yield string, page
456
460
  else
@@ -463,7 +467,11 @@ module Ronin
463
467
  alias every_js_relative_path_string every_javascript_relative_path_string
464
468
 
465
469
  # Regular expression that matches absolute paths within JavaScript.
466
- JAVASCRIPT_ABSOLUTE_PATH = %r{\A(?:/[^/\\ ]+)+\z}
470
+ #
471
+ # @api private
472
+ #
473
+ # @since 0.2.0
474
+ JAVASCRIPT_ABSOLUTE_PATH_REGEX = %r{\A(?:/[^/\\ ]+)+\z}
467
475
 
468
476
  #
469
477
  # Passes every JavaScript absolute path string to the given block.
@@ -495,7 +503,7 @@ module Ronin
495
503
  #
496
504
  def every_javascript_absolute_path_string(&block)
497
505
  every_javascript_string do |string,page|
498
- if string =~ JAVASCRIPT_ABSOLUTE_PATH
506
+ if string =~ JAVASCRIPT_ABSOLUTE_PATH_REGEX
499
507
  if block.arity == 2
500
508
  yield string, page
501
509
  else
@@ -541,6 +549,13 @@ module Ronin
541
549
 
542
550
  alias every_js_path_string every_javascript_path_string
543
551
 
552
+ # Regular expression for identifying URLs.
553
+ #
554
+ # @api private
555
+ #
556
+ # @since 0.2.0
557
+ URL_REGEX = /\A#{Support::Text::Patterns::URL}\z/
558
+
544
559
  #
545
560
  # Passes every JavaScript URL string to the given block.
546
561
  #
@@ -570,7 +585,7 @@ module Ronin
570
585
  #
571
586
  def every_javascript_url_string(&block)
572
587
  every_javascript_string do |string,page|
573
- if string =~ Support::Text::Patterns::URL
588
+ if string =~ URL_REGEX
574
589
  if block.arity == 2
575
590
  yield string, page
576
591
  else
@@ -22,7 +22,7 @@ module Ronin
22
22
  module Web
23
23
  module Spider
24
24
  # ronin-web-spider version
25
- VERSION = '0.2.0.rc1'
25
+ VERSION = '0.2.0.rc3'
26
26
  end
27
27
  end
28
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ronin-web-spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.rc1
4
+ version: 0.2.0.rc3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Postmodern
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-23 00:00:00.000000000 Z
11
+ date: 2024-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spidr