http_crawler 0.3.1.6 → 0.3.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/http_crawler/common/string.rb +1 -1
- data/lib/http_crawler/http/response.rb +33 -27
- data/lib/http_crawler/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d1fd5d70b3339e212db19695ab111c2fdef94f8f76fbf912709ab079e36bf25
|
4
|
+
data.tar.gz: ca1007ea22b501cc30d8209ba1ab55bc7128250cbb71859fe37e55ba470a2e13
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee63e0d0c13240c1ca443ca3788ab10dc1393e44d4e6b3f6b907e0cb45a302ac7f5eb38fc1888993b0e8e5db3ae44f0561cb7bfb732907b073d41328e7edfe3c
|
7
|
+
data.tar.gz: 052b85418bfad358af10778e2cbaa1fe90dbce61fea662f713ffc2a58bcc3c69e6271096dda990def0ba0dbceec023388df26b832cbdf99d4f93c6b2fa8d7e39
|
@@ -57,32 +57,38 @@ module HTTP
|
|
57
57
|
|
58
58
|
# 转换html格式
|
59
59
|
# @return [Nokogiri::HTML::Document]
|
60
|
-
def html
|
61
|
-
return @html if defined? @html
|
62
|
-
self.html = self.dec
|
63
|
-
self.html
|
64
|
-
end
|
60
|
+
def html(data = nil)
|
65
61
|
|
66
|
-
|
67
|
-
|
62
|
+
if (data.blank? && defined? @html)
|
63
|
+
# 如果 data 为空 并且 @json 有值,直接返回 @json
|
64
|
+
return @html
|
65
|
+
end
|
66
|
+
|
67
|
+
# 如果data为空初始化为 self.dec
|
68
|
+
data ||= self.dec
|
68
69
|
if (Nokogiri::HTML::Document === data)
|
69
70
|
@html = data
|
70
71
|
else
|
71
72
|
@html = Nokogiri::HTML(data)
|
72
73
|
end
|
73
74
|
@html
|
75
|
+
|
76
|
+
return @html if defined? @html
|
77
|
+
self.html = self.dec
|
78
|
+
self.html
|
74
79
|
end
|
75
80
|
|
81
|
+
|
76
82
|
# 转换json格式
|
77
83
|
# @return [Hash]
|
78
|
-
def json
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
84
|
+
def json(data = nil)
|
85
|
+
if (data.blank? && defined? @json)
|
86
|
+
# 如果 data 为空 并且 @json 有值,直接返回 @json
|
87
|
+
return @json
|
88
|
+
end
|
83
89
|
|
84
|
-
|
85
|
-
|
90
|
+
# 如果data为空初始化为 self.dec
|
91
|
+
data ||= self.dec
|
86
92
|
if (Hash === data)
|
87
93
|
@json = data
|
88
94
|
else
|
@@ -94,14 +100,14 @@ module HTTP
|
|
94
100
|
|
95
101
|
# 通过readability 解析数据
|
96
102
|
# @return [Readability::Document]
|
97
|
-
def readability
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
103
|
+
def readability(data = nil)
|
104
|
+
if (data.blank? && defined? @readability)
|
105
|
+
# 如果 data 为空 并且 @readability 有值,直接返回 @readability
|
106
|
+
return @readability
|
107
|
+
end
|
102
108
|
|
103
|
-
|
104
|
-
|
109
|
+
# 如果data为空初始化为 self.dec
|
110
|
+
data ||= self.dec
|
105
111
|
if (Readability::Document === data)
|
106
112
|
@readability = data
|
107
113
|
else
|
@@ -110,20 +116,20 @@ module HTTP
|
|
110
116
|
@readability
|
111
117
|
end
|
112
118
|
|
113
|
-
|
114
|
-
def content
|
115
|
-
Nokogiri::HTML(
|
119
|
+
# 获取正文内容
|
120
|
+
def content(data = readability.content)
|
121
|
+
Nokogiri::HTML(data).text.del_inter
|
116
122
|
end
|
117
123
|
|
118
124
|
# 解析
|
119
125
|
# 默认使用 json 的值
|
120
|
-
def parsing
|
126
|
+
def parsing(parameter = {})
|
121
127
|
self.json
|
122
128
|
end
|
123
129
|
|
124
130
|
# 获取解析结果
|
125
|
-
def results
|
126
|
-
@results ||= parsing
|
131
|
+
def results(parameter = {})
|
132
|
+
@results ||= parsing(parameter)
|
127
133
|
end
|
128
134
|
|
129
135
|
def get_date(str)
|
data/lib/http_crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: http_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.1.
|
4
|
+
version: 0.3.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- jagger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-03-
|
11
|
+
date: 2019-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|