digger 0.1.9 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed96af1c5ae92569e1de4885958ac5852864c045f6c6337d5f17d91747d8ed80
4
- data.tar.gz: 82003ae80f54cd3f9b805757e5dcb4c7894bba91c4c376cf08ebd43e6de6e80b
3
+ metadata.gz: 29c3945e9377348e1152eea7f46e0f11aa2e59cc5568fad57d25ecd3d271a9df
4
+ data.tar.gz: 1e4862f9939aa9c62e175a39df078fe12a2f51190af1f280f6d418cbab7e6390
5
5
  SHA512:
6
- metadata.gz: b7aad69fd46c7d1688026ece2e1efe14d7dea29b42f94656d794655e12a92677bd3e1034f0c776bf197bcd75c96bd49377df399433bd2e1c13507520af1addc5
7
- data.tar.gz: 60055a69ec3ad77e80fc4f1b50bb3a6c298e2274827ae21b64d1d82dae53a1d5338ccc99bcdb09c7d2e946abd007a488ff3034a753bc54586e9b651aeb3c5ce7
6
+ metadata.gz: 177e393de76bc35e31d6cc0eeda839d543d13fd81c40eba7d08704a131ec01396872734ea689c49fb1d09ceb4ba604fae76c3c201a2706dc9c889161038e0323
7
+ data.tar.gz: da76004a179aaed5cf75a96f90da3ebc739416e0ec162ff95c6aa27625590826fcc179b749156ea96937e364fb6a251515cb335a14b317001f3a47ea30330aeb
@@ -6,7 +6,9 @@ module Digger
6
6
  attr_accessor :type, :value, :block
7
7
 
8
8
  def initialize(hash = {})
9
- hash.each_pair { |key, value| send("#{key}=", value) if %w[type value block].include?(key.to_s)}
9
+ hash.each_pair do |key, value|
10
+ send("#{key}=", value) if %w[type value block].include?(key.to_s)
11
+ end
10
12
  end
11
13
 
12
14
  def safe_block(&default_block)
@@ -31,8 +33,9 @@ module Digger
31
33
  TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
32
34
  TYPES_CSS = %w[css_one css_many].freeze
33
35
  TYPES_JSON = %w[json jsonp].freeze
36
+ TYPES_OTHER = %w[cookie plain lines header body].freeze
34
37
 
35
- TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + ['cookie']
38
+ TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + TYPES_OTHER
36
39
 
37
40
  def match_page(page)
38
41
  return unless page.success?
@@ -43,13 +46,31 @@ module Digger
43
46
  css_match(page.doc)
44
47
  elsif TYPES_JSON.include?(type)
45
48
  json_match(page)
46
- else
47
- cookie_get(page.cookies)
49
+ elsif TYPES_OTHER.include?(type)
50
+ send("get_#{type}", page)
48
51
  end
49
52
  end
50
53
 
51
- def cookie_get(cookies)
52
- cookie = cookies.find { |c| c.name == value }&.value
54
+ def get_header(page)
55
+ header = (page.headers[value.to_s.downcase] || []).first
56
+ safe_block.call(header)
57
+ end
58
+
59
+ def get_body(page)
60
+ safe_block.call(page.body)
61
+ end
62
+
63
+ def get_plain(page)
64
+ safe_block.call(page.doc.text)
65
+ end
66
+
67
+ def get_lines(page)
68
+ block = safe_block
69
+ page.body.split("\n").map(&:strip).filter { |line| !line.empty? }.map { |line| block.call(line) }
70
+ end
71
+
72
+ def get_cookie(page)
73
+ cookie = page.cookies.find { |c| c.name == value }&.value
53
74
  safe_block.call(cookie)
54
75
  end
55
76
 
@@ -75,7 +96,8 @@ module Digger
75
96
  block = safe_block(&:strip)
76
97
  # content is String
77
98
  if type == 'match_many'
78
- body.gsub(value).to_a.map { |node| block.call(node) }.uniq
99
+ regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
100
+ body.gsub(regexp).to_a.map { |node| block.call(node) }.uniq
79
101
  else
80
102
  index = TYPES_REGEXP.index(type)
81
103
  matches = body.match(value)
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = '0.1.9'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
data/spec/pattern_spec.rb CHANGED
@@ -11,10 +11,17 @@ describe Digger::Pattern do
11
11
  # expect(pt.json_fetch(json, '$[0].b[2]')).to eq(3)
12
12
  # end
13
13
 
14
- it 'parse cookoe' do
14
+ it 'parse cookie & others' do
15
15
  page = Digger::HTTP.new.fetch_page('https://xueqiu.com/')
16
- pt = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
17
- result = pt.match_page(page)
16
+ p1 = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
17
+ # cookie
18
+ result = p1.match_page(page)
18
19
  expect(result.length).to eq(42)
20
+ # header
21
+ p2 = Digger::Pattern.new({ type: 'header', value: 'transfer-encoding' })
22
+ expect(p2.match_page(page)).to eq('chunked')
23
+ # get_plain
24
+ p3 = Digger::Pattern.new({ type: 'plain' })
25
+ expect(p3.match_page(page).length).to be > 100
19
26
  end
20
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-29 00:00:00.000000000 Z
11
+ date: 2021-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake