digger 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/digger/pattern.rb +29 -7
- data/lib/digger/version.rb +1 -1
- data/spec/pattern_spec.rb +10 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 29c3945e9377348e1152eea7f46e0f11aa2e59cc5568fad57d25ecd3d271a9df
|
4
|
+
data.tar.gz: 1e4862f9939aa9c62e175a39df078fe12a2f51190af1f280f6d418cbab7e6390
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 177e393de76bc35e31d6cc0eeda839d543d13fd81c40eba7d08704a131ec01396872734ea689c49fb1d09ceb4ba604fae76c3c201a2706dc9c889161038e0323
|
7
|
+
data.tar.gz: da76004a179aaed5cf75a96f90da3ebc739416e0ec162ff95c6aa27625590826fcc179b749156ea96937e364fb6a251515cb335a14b317001f3a47ea30330aeb
|
data/lib/digger/pattern.rb
CHANGED
@@ -6,7 +6,9 @@ module Digger
|
|
6
6
|
attr_accessor :type, :value, :block
|
7
7
|
|
8
8
|
def initialize(hash = {})
|
9
|
-
hash.each_pair
|
9
|
+
hash.each_pair do |key, value|
|
10
|
+
send("#{key}=", value) if %w[type value block].include?(key.to_s)
|
11
|
+
end
|
10
12
|
end
|
11
13
|
|
12
14
|
def safe_block(&default_block)
|
@@ -31,8 +33,9 @@ module Digger
|
|
31
33
|
TYPES_REGEXP = 0.upto(MATCH_MAX).map { |i| "match_#{i}" } + %w[match_many]
|
32
34
|
TYPES_CSS = %w[css_one css_many].freeze
|
33
35
|
TYPES_JSON = %w[json jsonp].freeze
|
36
|
+
TYPES_OTHER = %w[cookie plain lines header body].freeze
|
34
37
|
|
35
|
-
TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON +
|
38
|
+
TYPES = TYPES_REGEXP + TYPES_CSS + TYPES_JSON + TYPES_OTHER
|
36
39
|
|
37
40
|
def match_page(page)
|
38
41
|
return unless page.success?
|
@@ -43,13 +46,31 @@ module Digger
|
|
43
46
|
css_match(page.doc)
|
44
47
|
elsif TYPES_JSON.include?(type)
|
45
48
|
json_match(page)
|
46
|
-
|
47
|
-
|
49
|
+
elsif TYPES_OTHER.include?(type)
|
50
|
+
send("get_#{type}", page)
|
48
51
|
end
|
49
52
|
end
|
50
53
|
|
51
|
-
def
|
52
|
-
|
54
|
+
def get_header(page)
|
55
|
+
header = (page.headers[value.to_s.downcase] || []).first
|
56
|
+
safe_block.call(header)
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_body(page)
|
60
|
+
safe_block.call(page.body)
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_plain(page)
|
64
|
+
safe_block.call(page.doc.text)
|
65
|
+
end
|
66
|
+
|
67
|
+
def get_lines(page)
|
68
|
+
block = safe_block
|
69
|
+
page.body.split("\n").map(&:strip).filter { |line| !line.empty? }.map { |line| block.call(line) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def get_cookie(page)
|
73
|
+
cookie = page.cookies.find { |c| c.name == value }&.value
|
53
74
|
safe_block.call(cookie)
|
54
75
|
end
|
55
76
|
|
@@ -75,7 +96,8 @@ module Digger
|
|
75
96
|
block = safe_block(&:strip)
|
76
97
|
# content is String
|
77
98
|
if type == 'match_many'
|
78
|
-
|
99
|
+
regexp = value.is_a?(Regexp) ? value : Regexp.new(value.to_s)
|
100
|
+
body.gsub(regexp).to_a.map { |node| block.call(node) }.uniq
|
79
101
|
else
|
80
102
|
index = TYPES_REGEXP.index(type)
|
81
103
|
matches = body.match(value)
|
data/lib/digger/version.rb
CHANGED
data/spec/pattern_spec.rb
CHANGED
@@ -11,10 +11,17 @@ describe Digger::Pattern do
|
|
11
11
|
# expect(pt.json_fetch(json, '$[0].b[2]')).to eq(3)
|
12
12
|
# end
|
13
13
|
|
14
|
-
it 'parse
|
14
|
+
it 'parse cookie & others' do
|
15
15
|
page = Digger::HTTP.new.fetch_page('https://xueqiu.com/')
|
16
|
-
|
17
|
-
|
16
|
+
p1 = Digger::Pattern.new({ type: 'cookie', value: 'xq_a_token', block: ->(v) { "!!#{v}" } })
|
17
|
+
# cookie
|
18
|
+
result = p1.match_page(page)
|
18
19
|
expect(result.length).to eq(42)
|
20
|
+
# header
|
21
|
+
p2 = Digger::Pattern.new({ type: 'header', value: 'transfer-encoding' })
|
22
|
+
expect(p2.match_page(page)).to eq('chunked')
|
23
|
+
# get_plain
|
24
|
+
p3 = Digger::Pattern.new({ type: 'plain' })
|
25
|
+
expect(p3.match_page(page).length).to be > 100
|
19
26
|
end
|
20
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- binz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-12-
|
11
|
+
date: 2021-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|