http_crawler 0.3.1.6 → 0.3.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 781a489a3c6d8b0e6d09f79c58d09593372a36cb2faf57dc93194e4b265b2021
4
- data.tar.gz: 108cb3d5763cc6ed49b71cdf4526212d4851a314db3440b0d50df4520bd52840
3
+ metadata.gz: 0d1fd5d70b3339e212db19695ab111c2fdef94f8f76fbf912709ab079e36bf25
4
+ data.tar.gz: ca1007ea22b501cc30d8209ba1ab55bc7128250cbb71859fe37e55ba470a2e13
5
5
  SHA512:
6
- metadata.gz: 53c07f32f718d5a86c61988987b3a4a8c32b1b70e75a3e4735c2dc18561c94082cbf537988ca594de65282e697556db769d7256fc1a7a203a289950ae02e12cf
7
- data.tar.gz: e37325d54b10229d39b118e878782cfb3827a370d48280d86615121b03a0feb8b7d2b655e41abc76bf799e2898840c1085fb3ab02e5d6a12988279b618577217
6
+ metadata.gz: ee63e0d0c13240c1ca443ca3788ab10dc1393e44d4e6b3f6b907e0cb45a302ac7f5eb38fc1888993b0e8e5db3ae44f0561cb7bfb732907b073d41328e7edfe3c
7
+ data.tar.gz: 052b85418bfad358af10778e2cbaa1fe90dbce61fea662f713ffc2a58bcc3c69e6271096dda990def0ba0dbceec023388df26b832cbdf99d4f93c6b2fa8d7e39
@@ -4,6 +4,6 @@ class String
4
4
  # 清除包含: 空格,回车
5
5
  #
6
6
  def del_inter
7
- self.gsub(/(?:\n|\t| )/,"")
7
+ self.gsub(/(?:\n|\t|\r| )/,"")
8
8
  end
9
9
  end
@@ -57,32 +57,38 @@ module HTTP
57
57
 
58
58
  # 转换html格式
59
59
  # @return [Nokogiri::HTML::Document]
60
- def html
61
- return @html if defined? @html
62
- self.html = self.dec
63
- self.html
64
- end
60
+ def html(data = nil)
65
61
 
66
- # @return [Nokogiri::HTML::Document]
67
- def html=(data)
62
+ if (data.blank? && defined? @html)
63
+ # 如果 data 为空 并且 @json 有值,直接返回 @json
64
+ return @html
65
+ end
66
+
67
+ # 如果data为空初始化为 self.dec
68
+ data ||= self.dec
68
69
  if (Nokogiri::HTML::Document === data)
69
70
  @html = data
70
71
  else
71
72
  @html = Nokogiri::HTML(data)
72
73
  end
73
74
  @html
75
+
76
+ return @html if defined? @html
77
+ self.html = self.dec
78
+ self.html
74
79
  end
75
80
 
81
+
76
82
  # 转换json格式
77
83
  # @return [Hash]
78
- def json
79
- return @json if defined? @json
80
- self.json = self.dec
81
- self.json
82
- end
84
+ def json(data = nil)
85
+ if (data.blank? && defined? @json)
86
+ # 如果 data 为空 并且 @json 有值,直接返回 @json
87
+ return @json
88
+ end
83
89
 
84
- # @return [Hash]
85
- def json=(data)
90
+ # 如果data为空初始化为 self.dec
91
+ data ||= self.dec
86
92
  if (Hash === data)
87
93
  @json = data
88
94
  else
@@ -94,14 +100,14 @@ module HTTP
94
100
 
95
101
  # 通过readability 解析数据
96
102
  # @return [Readability::Document]
97
- def readability
98
- return @readability if defined? @readability
99
- self.readability = self.dec
100
- self.readability
101
- end
103
+ def readability(data = nil)
104
+ if (data.blank? && defined? @readability)
105
+ # 如果 data 为空 并且 @readability 有值,直接返回 @readability
106
+ return @readability
107
+ end
102
108
 
103
- # @return [Readability::Document]
104
- def readability=(data)
109
+ # 如果data为空初始化为 self.dec
110
+ data ||= self.dec
105
111
  if (Readability::Document === data)
106
112
  @readability = data
107
113
  else
@@ -110,20 +116,20 @@ module HTTP
110
116
  @readability
111
117
  end
112
118
 
113
-
114
- def content
115
- Nokogiri::HTML(readability.content).text.del_inter
119
+ # 获取正文内容
120
+ def content(data = readability.content)
121
+ Nokogiri::HTML(data).text.del_inter
116
122
  end
117
123
 
118
124
  # 解析
119
125
  # 默认使用 json 的值
120
- def parsing
126
+ def parsing(parameter = {})
121
127
  self.json
122
128
  end
123
129
 
124
130
  # 获取解析结果
125
- def results
126
- @results ||= parsing
131
+ def results(parameter = {})
132
+ @results ||= parsing(parameter)
127
133
  end
128
134
 
129
135
  def get_date(str)
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.6"
2
+ VERSION = "0.3.1.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.6
4
+ version: 0.3.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-08 00:00:00.000000000 Z
11
+ date: 2019-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec