ronin-web-spider 0.2.0.rc1 → 0.2.0.rc3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1637185a37e17f587cab3bb0ec451dfa763ab58b167404ec5b1161a4a80316f
4
- data.tar.gz: ea11da89c3c232feaca90c103c45cdb653eac9b94c6c97be34998d6b5089e896
3
+ metadata.gz: ab74fe34ec9f37cba9a8269fa6df15cd4f5404d8b01904ead735fe7d98159ace
4
+ data.tar.gz: 0d8547dfdd92cef99193fe79c5a944500adb04ab80c6c0a89b000f727e5fafbe
5
5
  SHA512:
6
- metadata.gz: 4d2f5ac7b650096856b87f5e37cf42044a12db416eab2375715a39073606a1f4c30103fda27cb1faaf9a53533bdf323ae9912787078f24f120366fb519a3b4a1
7
- data.tar.gz: b2ae98ce51187a5a65cd2355816b98d1916edb0d731f158e83a9366ec70261ef2e3eae9d5f3abe95b311a766c489064f855b4ee6afa4606592a5f9b560fbcb9d
6
+ metadata.gz: 0e10c97975ce2dd40b80ec204ac39f2fcecb74ee62af2dd0758e662214c5ce806352107309ece72fd38b0724e8de5e022692661a987975e6bb6266162d958749
7
+ data.tar.gz: 619387f4795f1efcea2d88e3e06162a05f33aabe9b1cae4da1267e5106ad94e97475e3078ae39497a2616f0636dfa362cc902185f9fc30b9683ca8c92c63d7ae
data/README.md CHANGED
@@ -32,6 +32,14 @@ ronin-web-spider is a collection of common web spidering routines using the
32
32
  * [every_javascript_string][docs-every_javascript_string] - yields every
33
33
  single-quoted or double-quoted String literal from all JavaScript source
34
34
  code.
35
+ * [every_javascript_relative_path_string][docs-every_javascript_relative_path_string] -
36
+ yields every relative path JavaScript string (ex: `foo/bar`).
37
+ * [every_javascript_absolute_path_string][docs-every_javascript_absolute_path_string] -
38
+ yields every relative path JavaScript string (ex: `/foo/bar`).
39
+ * [every_javascript_path_string][docs-every_javascript_path_string] -
40
+ yields every relative path JavaScript string (ex: `foo/bar` or `/foo/bar`).
41
+ * [every_javascript_url_string][docs-every_javascript_url_string] -
42
+ yields every URL JavaScript string (ex: `https://example.com/foo/bar`).
35
43
  * [every_javascript_comment][docs-every_javascript_comment] - yields every
36
44
  JavaScript comment.
37
45
  * [every_comment][docs-every_comment] - yields every HTML or JavaScript
@@ -46,6 +54,10 @@ ronin-web-spider is a collection of common web spidering routines using the
46
54
  [docs-every_html_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_html_comment-instance_method
47
55
  [docs-every_javascript]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript-instance_method
48
56
  [docs-every_javascript_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_string-instance_method
57
+ [docs-every_javascript_relative_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_relative_path_string-instance_method
58
+ [docs-every_javascript_absolute_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_absolute_path_string-instance_method
59
+ [docs-every_javascript_path_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_path_string-instance_method
60
+ [docs-every_javascript_url_string]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_url_string-instance_method
49
61
  [docs-every_javascript_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_javascript_comment-instance_method
50
62
  [docs-every_comment]: https://ronin-rb.dev/docs/ronin-web-spider/Ronin/Web/Spider/Agent.html#every_comment-instance_method
51
63
 
@@ -325,9 +325,9 @@ module Ronin
325
325
  # @api private
326
326
  #
327
327
  # @since 0.1.1
328
- JAVASCRIPT_INLINE_REGEX = %r{
328
+ JAVASCRIPT_INLINE_REGEX_REGEX = %r{
329
329
  (?# match before the regex to avoid matching division operators )
330
- (?:[\{\[\(;:,]\s*|=\s*)
330
+ (?:[\{\[\(;:,]\s*|=\s*|return\s*)
331
331
  /
332
332
  (?# inline regex contents )
333
333
  (?:
@@ -350,7 +350,7 @@ module Ronin
350
350
  # @api private
351
351
  #
352
352
  # @since 0.1.1
353
- JAVASCRIPT_TEMPLATE_LITERAL = /`(?:\\`|[^`])+`/m
353
+ JAVASCRIPT_TEMPLATE_LITERAL_REGEX = /`(?:\\`|[^`])+`/m
354
354
 
355
355
  #
356
356
  # Passes every JavaScript string value to the given block.
@@ -395,8 +395,8 @@ module Ronin
395
395
  yield string
396
396
  end
397
397
  else
398
- scanner.skip(JAVASCRIPT_INLINE_REGEX) ||
399
- scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL) ||
398
+ scanner.skip(JAVASCRIPT_INLINE_REGEX_REGEX) ||
399
+ scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL_REGEX) ||
400
400
  scanner.getch
401
401
  end
402
402
  end
@@ -410,7 +410,11 @@ module Ronin
410
410
  # @note
411
411
  # This matches `foo/bar`, `foo/bar.ext`, `../foo`, and `foo.ext`,
412
412
  # but *not* `/foo`, `foo`, or `foo.`.
413
- JAVASCRIPT_RELATIVE_PATH = %r{
413
+ #
414
+ # @api private
415
+ #
416
+ # @since 0.2.0
417
+ JAVASCRIPT_RELATIVE_PATH_REGEX = %r{
414
418
  \A
415
419
  (?:
416
420
  [^/\\. ]+\.[a-z0-9]+ (?# filename.ext)
@@ -450,7 +454,7 @@ module Ronin
450
454
  #
451
455
  def every_javascript_relative_path_string(&block)
452
456
  every_javascript_string do |string,page|
453
- if string =~ JAVASCRIPT_RELATIVE_PATH
457
+ if string =~ JAVASCRIPT_RELATIVE_PATH_REGEX
454
458
  if block.arity == 2
455
459
  yield string, page
456
460
  else
@@ -463,7 +467,11 @@ module Ronin
463
467
  alias every_js_relative_path_string every_javascript_relative_path_string
464
468
 
465
469
  # Regular expression that matches absolute paths within JavaScript.
466
- JAVASCRIPT_ABSOLUTE_PATH = %r{\A(?:/[^/\\ ]+)+\z}
470
+ #
471
+ # @api private
472
+ #
473
+ # @since 0.2.0
474
+ JAVASCRIPT_ABSOLUTE_PATH_REGEX = %r{\A(?:/[^/\\ ]+)+\z}
467
475
 
468
476
  #
469
477
  # Passes every JavaScript absolute path string to the given block.
@@ -495,7 +503,7 @@ module Ronin
495
503
  #
496
504
  def every_javascript_absolute_path_string(&block)
497
505
  every_javascript_string do |string,page|
498
- if string =~ JAVASCRIPT_ABSOLUTE_PATH
506
+ if string =~ JAVASCRIPT_ABSOLUTE_PATH_REGEX
499
507
  if block.arity == 2
500
508
  yield string, page
501
509
  else
@@ -541,6 +549,13 @@ module Ronin
541
549
 
542
550
  alias every_js_path_string every_javascript_path_string
543
551
 
552
+ # Regular expression for identifying URLs.
553
+ #
554
+ # @api private
555
+ #
556
+ # @since 0.2.0
557
+ URL_REGEX = /\A#{Support::Text::Patterns::URL}\z/
558
+
544
559
  #
545
560
  # Passes every JavaScript URL string to the given block.
546
561
  #
@@ -570,7 +585,7 @@ module Ronin
570
585
  #
571
586
  def every_javascript_url_string(&block)
572
587
  every_javascript_string do |string,page|
573
- if string =~ Support::Text::Patterns::URL
588
+ if string =~ URL_REGEX
574
589
  if block.arity == 2
575
590
  yield string, page
576
591
  else
@@ -22,7 +22,7 @@ module Ronin
22
22
  module Web
23
23
  module Spider
24
24
  # ronin-web-spider version
25
- VERSION = '0.2.0.rc1'
25
+ VERSION = '0.2.0.rc3'
26
26
  end
27
27
  end
28
28
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ronin-web-spider
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.rc1
4
+ version: 0.2.0.rc3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Postmodern
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-23 00:00:00.000000000 Z
11
+ date: 2024-07-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: spidr