ronin-web-spider 0.2.0.rc1 → 0.2.0.rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/ronin/web/spider/agent.rb +24 -9
 - data/lib/ronin/web/spider/version.rb +1 -1
 - metadata +1 -1
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 7aedd94cd4b3f96a07824419722763a91374590e6944ab3ae58c6ff1432837ca
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 67d2d63b5468838f60f8b3aa3975dcb8f0842f66ca1d776384fb29c6a7f6a8ca
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: b9e7588f16084226b812db561261122af81fea2bd2d4c1b529c7cc1763e48060c90f716475e930029159f9cd1f886866f47ea03be8c4131c850af0b6d73da8c0
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 253283e0e5f8046d4d41fb9bf445933017e81b92e72c817aa0d4c6850c4952e0ec687e727196707f71e0343a97ba0f492a409e070a3c00e8d0fd8640feda75d5
         
     | 
| 
         @@ -325,7 +325,7 @@ module Ronin 
     | 
|
| 
       325 
325 
     | 
    
         
             
                    # @api private
         
     | 
| 
       326 
326 
     | 
    
         
             
                    #
         
     | 
| 
       327 
327 
     | 
    
         
             
                    # @since 0.1.1
         
     | 
| 
       328 
     | 
    
         
            -
                     
     | 
| 
      
 328 
     | 
    
         
            +
                    JAVASCRIPT_INLINE_REGEX_REGEX = %r{
         
     | 
| 
       329 
329 
     | 
    
         
             
                      (?# match before the regex to avoid matching division operators )
         
     | 
| 
       330 
330 
     | 
    
         
             
                      (?:[\{\[\(;:,]\s*|=\s*)
         
     | 
| 
       331 
331 
     | 
    
         
             
                      /
         
     | 
| 
         @@ -350,7 +350,7 @@ module Ronin 
     | 
|
| 
       350 
350 
     | 
    
         
             
                    # @api private
         
     | 
| 
       351 
351 
     | 
    
         
             
                    #
         
     | 
| 
       352 
352 
     | 
    
         
             
                    # @since 0.1.1
         
     | 
| 
       353 
     | 
    
         
            -
                     
     | 
| 
      
 353 
     | 
    
         
            +
                    JAVASCRIPT_TEMPLATE_LITERAL_REGEX = /`(?:\\`|[^`])+`/m
         
     | 
| 
       354 
354 
     | 
    
         | 
| 
       355 
355 
     | 
    
         
             
                    #
         
     | 
| 
       356 
356 
     | 
    
         
             
                    # Passes every JavaScript string value to the given block.
         
     | 
| 
         @@ -395,8 +395,8 @@ module Ronin 
     | 
|
| 
       395 
395 
     | 
    
         
             
                              yield string
         
     | 
| 
       396 
396 
     | 
    
         
             
                            end
         
     | 
| 
       397 
397 
     | 
    
         
             
                          else
         
     | 
| 
       398 
     | 
    
         
            -
                            scanner.skip( 
     | 
| 
       399 
     | 
    
         
            -
                              scanner.skip( 
     | 
| 
      
 398 
     | 
    
         
            +
                            scanner.skip(JAVASCRIPT_INLINE_REGEX_REGEX) ||
         
     | 
| 
      
 399 
     | 
    
         
            +
                              scanner.skip(JAVASCRIPT_TEMPLATE_LITERAL_REGEX) ||
         
     | 
| 
       400 
400 
     | 
    
         
             
                              scanner.getch
         
     | 
| 
       401 
401 
     | 
    
         
             
                          end
         
     | 
| 
       402 
402 
     | 
    
         
             
                        end
         
     | 
| 
         @@ -410,7 +410,11 @@ module Ronin 
     | 
|
| 
       410 
410 
     | 
    
         
             
                    # @note
         
     | 
| 
       411 
411 
     | 
    
         
             
                    #   This matches `foo/bar`, `foo/bar.ext`, `../foo`, and `foo.ext`,
         
     | 
| 
       412 
412 
     | 
    
         
             
                    #   but *not* `/foo`, `foo`, or `foo.`.
         
     | 
| 
       413 
     | 
    
         
            -
                     
     | 
| 
      
 413 
     | 
    
         
            +
                    #
         
     | 
| 
      
 414 
     | 
    
         
            +
                    # @api private
         
     | 
| 
      
 415 
     | 
    
         
            +
                    #
         
     | 
| 
      
 416 
     | 
    
         
            +
                    # @since 0.2.0
         
     | 
| 
      
 417 
     | 
    
         
            +
                    JAVASCRIPT_RELATIVE_PATH_REGEX = %r{
         
     | 
| 
       414 
418 
     | 
    
         
             
                      \A
         
     | 
| 
       415 
419 
     | 
    
         
             
                        (?:
         
     | 
| 
       416 
420 
     | 
    
         
             
                           [^/\\. ]+\.[a-z0-9]+ (?# filename.ext)
         
     | 
| 
         @@ -450,7 +454,7 @@ module Ronin 
     | 
|
| 
       450 
454 
     | 
    
         
             
                    #
         
     | 
| 
       451 
455 
     | 
    
         
             
                    def every_javascript_relative_path_string(&block)
         
     | 
| 
       452 
456 
     | 
    
         
             
                      every_javascript_string do |string,page|
         
     | 
| 
       453 
     | 
    
         
            -
                        if string =~  
     | 
| 
      
 457 
     | 
    
         
            +
                        if string =~ JAVASCRIPT_RELATIVE_PATH_REGEX
         
     | 
| 
       454 
458 
     | 
    
         
             
                          if block.arity == 2
         
     | 
| 
       455 
459 
     | 
    
         
             
                            yield string, page
         
     | 
| 
       456 
460 
     | 
    
         
             
                          else
         
     | 
| 
         @@ -463,7 +467,11 @@ module Ronin 
     | 
|
| 
       463 
467 
     | 
    
         
             
                    alias every_js_relative_path_string every_javascript_relative_path_string
         
     | 
| 
       464 
468 
     | 
    
         | 
| 
       465 
469 
     | 
    
         
             
                    # Regular expression that matches absolute paths within JavaScript.
         
     | 
| 
       466 
     | 
    
         
            -
                     
     | 
| 
      
 470 
     | 
    
         
            +
                    #
         
     | 
| 
      
 471 
     | 
    
         
            +
                    # @api private
         
     | 
| 
      
 472 
     | 
    
         
            +
                    #
         
     | 
| 
      
 473 
     | 
    
         
            +
                    # @since 0.2.0
         
     | 
| 
      
 474 
     | 
    
         
            +
                    JAVASCRIPT_ABSOLUTE_PATH_REGEX = %r{\A(?:/[^/\\ ]+)+\z}
         
     | 
| 
       467 
475 
     | 
    
         | 
| 
       468 
476 
     | 
    
         
             
                    #
         
     | 
| 
       469 
477 
     | 
    
         
             
                    # Passes every JavaScript absolute path string to the given block.
         
     | 
| 
         @@ -495,7 +503,7 @@ module Ronin 
     | 
|
| 
       495 
503 
     | 
    
         
             
                    #
         
     | 
| 
       496 
504 
     | 
    
         
             
                    def every_javascript_absolute_path_string(&block)
         
     | 
| 
       497 
505 
     | 
    
         
             
                      every_javascript_string do |string,page|
         
     | 
| 
       498 
     | 
    
         
            -
                        if string =~  
     | 
| 
      
 506 
     | 
    
         
            +
                        if string =~ JAVASCRIPT_ABSOLUTE_PATH_REGEX
         
     | 
| 
       499 
507 
     | 
    
         
             
                          if block.arity == 2
         
     | 
| 
       500 
508 
     | 
    
         
             
                            yield string, page
         
     | 
| 
       501 
509 
     | 
    
         
             
                          else
         
     | 
| 
         @@ -541,6 +549,13 @@ module Ronin 
     | 
|
| 
       541 
549 
     | 
    
         | 
| 
       542 
550 
     | 
    
         
             
                    alias every_js_path_string every_javascript_path_string
         
     | 
| 
       543 
551 
     | 
    
         | 
| 
      
 552 
     | 
    
         
            +
                    # Regular expression for identifying URLs.
         
     | 
| 
      
 553 
     | 
    
         
            +
                    #
         
     | 
| 
      
 554 
     | 
    
         
            +
                    # @api private
         
     | 
| 
      
 555 
     | 
    
         
            +
                    #
         
     | 
| 
      
 556 
     | 
    
         
            +
                    # @since 0.2.0
         
     | 
| 
      
 557 
     | 
    
         
            +
                    URL_REGEX = /\A#{Support::Text::Patterns::URL}\z/
         
     | 
| 
      
 558 
     | 
    
         
            +
             
     | 
| 
       544 
559 
     | 
    
         
             
                    #
         
     | 
| 
       545 
560 
     | 
    
         
             
                    # Passes every JavaScript URL string to the given block.
         
     | 
| 
       546 
561 
     | 
    
         
             
                    #
         
     | 
| 
         @@ -570,7 +585,7 @@ module Ronin 
     | 
|
| 
       570 
585 
     | 
    
         
             
                    #
         
     | 
| 
       571 
586 
     | 
    
         
             
                    def every_javascript_url_string(&block)
         
     | 
| 
       572 
587 
     | 
    
         
             
                      every_javascript_string do |string,page|
         
     | 
| 
       573 
     | 
    
         
            -
                        if string =~  
     | 
| 
      
 588 
     | 
    
         
            +
                        if string =~ URL_REGEX
         
     | 
| 
       574 
589 
     | 
    
         
             
                          if block.arity == 2
         
     | 
| 
       575 
590 
     | 
    
         
             
                            yield string, page
         
     | 
| 
       576 
591 
     | 
    
         
             
                          else
         
     |