digger 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/digger/pattern.rb +29 -7
- data/lib/digger/version.rb +1 -1
- data/spec/pattern_spec.rb +10 -3
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 29c3945e9377348e1152eea7f46e0f11aa2e59cc5568fad57d25ecd3d271a9df
         | 
| 4 | 
            +
              data.tar.gz: 1e4862f9939aa9c62e175a39df078fe12a2f51190af1f280f6d418cbab7e6390
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 177e393de76bc35e31d6cc0eeda839d543d13fd81c40eba7d08704a131ec01396872734ea689c49fb1d09ceb4ba604fae76c3c201a2706dc9c889161038e0323
         | 
| 7 | 
            +
              data.tar.gz: da76004a179aaed5cf75a96f90da3ebc739416e0ec162ff95c6aa27625590826fcc179b749156ea96937e364fb6a251515cb335a14b317001f3a47ea30330aeb
         | 
    
        data/lib/digger/pattern.rb
    CHANGED
    
    | @@ -6,7 +6,9 @@ module Digger | |
| 6 6 | 
             
                attr_accessor :type, :value, :block
         | 
| 7 7 |  | 
| 8 8 | 
             
                def initialize(hash = {})
         | 
| 9 | 
            -
                  hash.each_pair  | 
| 9 | 
            +
                  hash.each_pair do |key, value|
         | 
| 10 | 
            +
                    send("#{key}=", value) if %w[type value block].include?(key.to_s)
         | 
| 11 | 
            +
                  end
         | 
| 10 12 | 
             
                end
         | 
| 11 13 |  | 
| 12 14 | 
             
                def safe_block(&default_block)
         | 
| @@ -31,8 +33,9 @@ module Digger | |
| 31 33 | 
             
                TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
         | 
| 32 34 | 
             
                TYPES_CSS = %w[css_one css_many].freeze
         | 
| 33 35 | 
             
                TYPES_JSON = %w[json jsonp].freeze
         | 
| 36 | 
            +
                TYPES_OTHER = %w[cookie plain lines header body].freeze
         | 
| 34 37 |  | 
| 35 | 
            -
                TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON +  | 
| 38 | 
            +
                TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + TYPES_OTHER
         | 
| 36 39 |  | 
| 37 40 | 
             
                def match_page(page)
         | 
| 38 41 | 
             
                  return unless page.success?
         | 
| @@ -43,13 +46,31 @@ module Digger | |
| 43 46 | 
             
                    css_match(page.doc)
         | 
| 44 47 | 
             
                  elsif TYPES_JSON.include?(type)
         | 
| 45 48 | 
             
                    json_match(page)
         | 
| 46 | 
            -
                   | 
| 47 | 
            -
                     | 
| 49 | 
            +
                  elsif TYPES_OTHER.include?(type)
         | 
| 50 | 
            +
                    send("get_#{type}", page)
         | 
| 48 51 | 
             
                  end
         | 
| 49 52 | 
             
                end
         | 
| 50 53 |  | 
| 51 | 
            -
                def  | 
| 52 | 
            -
                   | 
| 54 | 
            +
                def get_header(page)
         | 
| 55 | 
            +
                  header = (page.headers[value.to_s.downcase] || []).first
         | 
| 56 | 
            +
                  safe_block.call(header)
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                def get_body(page)
         | 
| 60 | 
            +
                  safe_block.call(page.body)
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                def get_plain(page)
         | 
| 64 | 
            +
                  safe_block.call(page.doc.text)
         | 
| 65 | 
            +
                end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                def get_lines(page)
         | 
| 68 | 
            +
                  block = safe_block
         | 
| 69 | 
            +
                  page.body.split("\n").map(&:strip).filter { |line| !line.empty? }.map { |line| block.call(line) }
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                def get_cookie(page)
         | 
| 73 | 
            +
                  cookie = page.cookies.find { |c| c.name == value }&.value
         | 
| 53 74 | 
             
                  safe_block.call(cookie)
         | 
| 54 75 | 
             
                end
         | 
| 55 76 |  | 
| @@ -75,7 +96,8 @@ module Digger | |
| 75 96 | 
             
                  block = safe_block(&:strip)
         | 
| 76 97 | 
             
                  # content is String
         | 
| 77 98 | 
             
                  if type == 'match_many'
         | 
| 78 | 
            -
                     | 
| 99 | 
            +
                    regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
         | 
| 100 | 
            +
                    body.gsub(regexp).to_a.map { |node| block.call(node) }.uniq
         | 
| 79 101 | 
             
                  else
         | 
| 80 102 | 
             
                    index = TYPES_REGEXP.index(type)
         | 
| 81 103 | 
             
                    matches = body.match(value)
         | 
    
        data/lib/digger/version.rb
    CHANGED
    
    
    
        data/spec/pattern_spec.rb
    CHANGED
    
    | @@ -11,10 +11,17 @@ describe Digger::Pattern do | |
| 11 11 | 
             
              #   expect(pt.json_fetch(json, '$[0].b[2]')).to eq(3)
         | 
| 12 12 | 
             
              # end
         | 
| 13 13 |  | 
| 14 | 
            -
              it 'parse  | 
| 14 | 
            +
              it 'parse cookie & others' do
         | 
| 15 15 | 
             
                page = Digger::HTTP.new.fetch_page('https://xueqiu.com/')
         | 
| 16 | 
            -
                 | 
| 17 | 
            -
                 | 
| 16 | 
            +
                p1 = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
         | 
| 17 | 
            +
                # cookie
         | 
| 18 | 
            +
                result = p1.match_page(page)
         | 
| 18 19 | 
             
                expect(result.length).to eq(42)
         | 
| 20 | 
            +
                # header
         | 
| 21 | 
            +
                p2 = Digger::Pattern.new({ type: 'header', value: 'transfer-encoding' })
         | 
| 22 | 
            +
                expect(p2.match_page(page)).to eq('chunked')
         | 
| 23 | 
            +
                # get_plain
         | 
| 24 | 
            +
                p3 = Digger::Pattern.new({ type: 'plain' })
         | 
| 25 | 
            +
                expect(p3.match_page(page).length).to be > 100
         | 
| 19 26 | 
             
              end
         | 
| 20 27 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: digger
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - binz
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2021-12- | 
| 11 | 
            +
            date: 2021-12-31 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rake
         |