digger 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ed96af1c5ae92569e1de4885958ac5852864c045f6c6337d5f17d91747d8ed80
4
- data.tar.gz: 82003ae80f54cd3f9b805757e5dcb4c7894bba91c4c376cf08ebd43e6de6e80b
3
+ metadata.gz: 29c3945e9377348e1152eea7f46e0f11aa2e59cc5568fad57d25ecd3d271a9df
4
+ data.tar.gz: 1e4862f9939aa9c62e175a39df078fe12a2f51190af1f280f6d418cbab7e6390
5
5
  SHA512:
6
- metadata.gz: b7aad69fd46c7d1688026ece2e1efe14d7dea29b42f94656d794655e12a92677bd3e1034f0c776bf197bcd75c96bd49377df399433bd2e1c13507520af1addc5
7
- data.tar.gz: 60055a69ec3ad77e80fc4f1b50bb3a6c298e2274827ae21b64d1d82dae53a1d5338ccc99bcdb09c7d2e946abd007a488ff3034a753bc54586e9b651aeb3c5ce7
6
+ metadata.gz: 177e393de76bc35e31d6cc0eeda839d543d13fd81c40eba7d08704a131ec01396872734ea689c49fb1d09ceb4ba604fae76c3c201a2706dc9c889161038e0323
7
+ data.tar.gz: da76004a179aaed5cf75a96f90da3ebc739416e0ec162ff95c6aa27625590826fcc179b749156ea96937e364fb6a251515cb335a14b317001f3a47ea30330aeb
@@ -6,7 +6,9 @@ module Digger
6
6
  attr_accessor :type, :value, :block
7
7
 
8
8
  def initialize(hash = {})
9
- hash.each_pair { |key, value| send("#{key}=", value) if %w[type value block].include?(key.to_s)}
9
+ hash.each_pair do |key, value|
10
+ send("#{key}=", value) if %w[type value block].include?(key.to_s)
11
+ end
10
12
  end
11
13
 
12
14
  def safe_block(&default_block)
@@ -31,8 +33,9 @@ module Digger
31
33
  TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
32
34
  TYPES_CSS = %w[css_one css_many].freeze
33
35
  TYPES_JSON = %w[json jsonp].freeze
36
+ TYPES_OTHER = %w[cookie plain lines header body].freeze
34
37
 
35
- TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + ['cookie']
38
+ TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + TYPES_OTHER
36
39
 
37
40
  def match_page(page)
38
41
  return unless page.success?
@@ -43,13 +46,31 @@ module Digger
43
46
  css_match(page.doc)
44
47
  elsif TYPES_JSON.include?(type)
45
48
  json_match(page)
46
- else
47
- cookie_get(page.cookies)
49
+ elsif TYPES_OTHER.include?(type)
50
+ send("get_#{type}", page)
48
51
  end
49
52
  end
50
53
 
51
- def cookie_get(cookies)
52
- cookie = cookies.find { |c| c.name == value }&.value
54
+ def get_header(page)
55
+ header = (page.headers[value.to_s.downcase] || []).first
56
+ safe_block.call(header)
57
+ end
58
+
59
+ def get_body(page)
60
+ safe_block.call(page.body)
61
+ end
62
+
63
+ def get_plain(page)
64
+ safe_block.call(page.doc.text)
65
+ end
66
+
67
+ def get_lines(page)
68
+ block = safe_block
69
+ page.body.split("\n").map(&:strip).filter { |line| !line.empty? }.map { |line| block.call(line) }
70
+ end
71
+
72
+ def get_cookie(page)
73
+ cookie = page.cookies.find { |c| c.name == value }&.value
53
74
  safe_block.call(cookie)
54
75
  end
55
76
 
@@ -75,7 +96,8 @@ module Digger
75
96
  block = safe_block(&:strip)
76
97
  # content is String
77
98
  if type == 'match_many'
78
- body.gsub(value).to_a.map { |node| block.call(node) }.uniq
99
+ regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
100
+ body.gsub(regexp).to_a.map { |node| block.call(node) }.uniq
79
101
  else
80
102
  index = TYPES_REGEXP.index(type)
81
103
  matches = body.match(value)
@@ -1,3 +1,3 @@
1
1
  module Digger
2
- VERSION = '0.1.9'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
data/spec/pattern_spec.rb CHANGED
@@ -11,10 +11,17 @@ describe Digger::Pattern do
11
11
  # expect(pt.json_fetch(json, '$[0].b[2]')).to eq(3)
12
12
  # end
13
13
 
14
- it 'parse cookoe' do
14
+ it 'parse cookie & others' do
15
15
  page = Digger::HTTP.new.fetch_page('https://xueqiu.com/')
16
- pt = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
17
- result = pt.match_page(page)
16
+ p1 = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
17
+ # cookie
18
+ result = p1.match_page(page)
18
19
  expect(result.length).to eq(42)
20
+ # header
21
+ p2 = Digger::Pattern.new({ type: 'header', value: 'transfer-encoding' })
22
+ expect(p2.match_page(page)).to eq('chunked')
23
+ # get_plain
24
+ p3 = Digger::Pattern.new({ type: 'plain' })
25
+ expect(p3.match_page(page).length).to be > 100
19
26
  end
20
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.9
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - binz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-29 00:00:00.000000000 Z
11
+ date: 2021-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake