http_crawler 0.3.1.6 → 0.3.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/http_crawler/common/string.rb +1 -1
- data/lib/http_crawler/http/response.rb +33 -27
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d1fd5d70b3339e212db19695ab111c2fdef94f8f76fbf912709ab079e36bf25
|
4
|
+
data.tar.gz: ca1007ea22b501cc30d8209ba1ab55bc7128250cbb71859fe37e55ba470a2e13
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee63e0d0c13240c1ca443ca3788ab10dc1393e44d4e6b3f6b907e0cb45a302ac7f5eb38fc1888993b0e8e5db3ae44f0561cb7bfb732907b073d41328e7edfe3c
|
7
|
+
data.tar.gz: 052b85418bfad358af10778e2cbaa1fe90dbce61fea662f713ffc2a58bcc3c69e6271096dda990def0ba0dbceec023388df26b832cbdf99d4f93c6b2fa8d7e39
|
@@ -57,32 +57,38 @@ module HTTP
|
|
57
57
|
|
58
58
|
# 转换html格式
|
59
59
|
# @return [Nokogiri::HTML::Document]
|
60
|
-
def html
|
61
|
-
return @html if defined? @html
|
62
|
-
self.html = self.dec
|
63
|
-
self.html
|
64
|
-
end
|
60
|
+
def html(data = nil)
|
65
61
|
|
66
|
-
|
67
|
-
|
62
|
+
if (data.blank? && defined? @html)
|
63
|
+
# 如果 data 为空 并且 @json 有值,直接返回 @json
|
64
|
+
return @html
|
65
|
+
end
|
66
|
+
|
67
|
+
# 如果data为空初始化为 self.dec
|
68
|
+
data ||= self.dec
|
68
69
|
if (Nokogiri::HTML::Document === data)
|
69
70
|
@html = data
|
70
71
|
else
|
71
72
|
@html = Nokogiri::HTML(data)
|
72
73
|
end
|
73
74
|
@html
|
75
|
+
|
76
|
+
return @html if defined? @html
|
77
|
+
self.html = self.dec
|
78
|
+
self.html
|
74
79
|
end
|
75
80
|
|
81
|
+
|
76
82
|
# 转换json格式
|
77
83
|
# @return [Hash]
|
78
|
-
def json
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
84
|
+
def json(data = nil)
|
85
|
+
if (data.blank? && defined? @json)
|
86
|
+
# 如果 data 为空 并且 @json 有值,直接返回 @json
|
87
|
+
return @json
|
88
|
+
end
|
83
89
|
|
84
|
-
|
85
|
-
|
90
|
+
# 如果data为空初始化为 self.dec
|
91
|
+
data ||= self.dec
|
86
92
|
if (Hash === data)
|
87
93
|
@json = data
|
88
94
|
else
|
@@ -94,14 +100,14 @@ module HTTP
|
|
94
100
|
|
95
101
|
# 通过readability 解析数据
|
96
102
|
# @return [Readability::Document]
|
97
|
-
def readability
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
103
|
+
def readability(data = nil)
|
104
|
+
if (data.blank? && defined? @readability)
|
105
|
+
# 如果 data 为空 并且 @readability 有值,直接返回 @readability
|
106
|
+
return @readability
|
107
|
+
end
|
102
108
|
|
103
|
-
|
104
|
-
|
109
|
+
# 如果data为空初始化为 self.dec
|
110
|
+
data ||= self.dec
|
105
111
|
if (Readability::Document === data)
|
106
112
|
@readability = data
|
107
113
|
else
|
@@ -110,20 +116,20 @@ module HTTP
|
|
110
116
|
@readability
|
111
117
|
end
|
112
118
|
|
113
|
-
|
114
|
-
def content
|
115
|
-
Nokogiri::HTML(
|
119
|
+
# 获取正文内容
|
120
|
+
def content(data = readability.content)
|
121
|
+
Nokogiri::HTML(data).text.del_inter
|
116
122
|
end
|
117
123
|
|
118
124
|
# 解析
|
119
125
|
# 默认使用 json 的值
|
120
|
-
def parsing
|
126
|
+
def parsing(parameter = {})
|
121
127
|
self.json
|
122
128
|
end
|
123
129
|
|
124
130
|
# 获取解析结果
|
125
|
-
def results
|
126
|
-
@results ||= parsing
|
131
|
+
def results(parameter = {})
|
132
|
+
@results ||= parsing(parameter)
|
127
133
|
end
|
128
134
|
|
129
135
|
def get_date(str)
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|