http_crawler 0.3.1.6 → 0.3.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 781a489a3c6d8b0e6d09f79c58d09593372a36cb2faf57dc93194e4b265b2021
4
- data.tar.gz: 108cb3d5763cc6ed49b71cdf4526212d4851a314db3440b0d50df4520bd52840
3
+ metadata.gz: 0d1fd5d70b3339e212db19695ab111c2fdef94f8f76fbf912709ab079e36bf25
4
+ data.tar.gz: ca1007ea22b501cc30d8209ba1ab55bc7128250cbb71859fe37e55ba470a2e13
5
5
  SHA512:
6
- metadata.gz: 53c07f32f718d5a86c61988987b3a4a8c32b1b70e75a3e4735c2dc18561c94082cbf537988ca594de65282e697556db769d7256fc1a7a203a289950ae02e12cf
7
- data.tar.gz: e37325d54b10229d39b118e878782cfb3827a370d48280d86615121b03a0feb8b7d2b655e41abc76bf799e2898840c1085fb3ab02e5d6a12988279b618577217
6
+ metadata.gz: ee63e0d0c13240c1ca443ca3788ab10dc1393e44d4e6b3f6b907e0cb45a302ac7f5eb38fc1888993b0e8e5db3ae44f0561cb7bfb732907b073d41328e7edfe3c
7
+ data.tar.gz: 052b85418bfad358af10778e2cbaa1fe90dbce61fea662f713ffc2a58bcc3c69e6271096dda990def0ba0dbceec023388df26b832cbdf99d4f93c6b2fa8d7e39
@@ -4,6 +4,6 @@ class String
4
4
  # 清除包含: 空格,回车
5
5
  #
6
6
  def del_inter
7
- self.gsub(/(?:\n|\t| )/,"")
7
+ self.gsub(/(?:\n|\t|\r| )/,"")
8
8
  end
9
9
  end
@@ -57,32 +57,38 @@ module HTTP
57
57
 
58
58
  # 转换html格式
59
59
  # @return [Nokogiri::HTML::Document]
60
- def html
61
- return @html if defined? @html
62
- self.html = self.dec
63
- self.html
64
- end
60
+ def html(data = nil)
65
61
 
66
- # @return [Nokogiri::HTML::Document]
67
- def html=(data)
62
+ if (data.blank? && defined? @html)
63
+ # 如果 data 为空 并且 @json 有值,直接返回 @json
64
+ return @html
65
+ end
66
+
67
+ # 如果data为空初始化为 self.dec
68
+ data ||= self.dec
68
69
  if (Nokogiri::HTML::Document === data)
69
70
  @html = data
70
71
  else
71
72
  @html = Nokogiri::HTML(data)
72
73
  end
73
74
  @html
75
+
76
+ return @html if defined? @html
77
+ self.html = self.dec
78
+ self.html
74
79
  end
75
80
 
81
+
76
82
  # 转换json格式
77
83
  # @return [Hash]
78
- def json
79
- return @json if defined? @json
80
- self.json = self.dec
81
- self.json
82
- end
84
+ def json(data = nil)
85
+ if (data.blank? && defined? @json)
86
+ # 如果 data 为空 并且 @json 有值,直接返回 @json
87
+ return @json
88
+ end
83
89
 
84
- # @return [Hash]
85
- def json=(data)
90
+ # 如果data为空初始化为 self.dec
91
+ data ||= self.dec
86
92
  if (Hash === data)
87
93
  @json = data
88
94
  else
@@ -94,14 +100,14 @@ module HTTP
94
100
 
95
101
  # 通过readability 解析数据
96
102
  # @return [Readability::Document]
97
- def readability
98
- return @readability if defined? @readability
99
- self.readability = self.dec
100
- self.readability
101
- end
103
+ def readability(data = nil)
104
+ if (data.blank? && defined? @readability)
105
+ # 如果 data 为空 并且 @readability 有值,直接返回 @readability
106
+ return @readability
107
+ end
102
108
 
103
- # @return [Readability::Document]
104
- def readability=(data)
109
+ # 如果data为空初始化为 self.dec
110
+ data ||= self.dec
105
111
  if (Readability::Document === data)
106
112
  @readability = data
107
113
  else
@@ -110,20 +116,20 @@ module HTTP
110
116
  @readability
111
117
  end
112
118
 
113
-
114
- def content
115
- Nokogiri::HTML(readability.content).text.del_inter
119
+ # 获取正文内容
120
+ def content(data = readability.content)
121
+ Nokogiri::HTML(data).text.del_inter
116
122
  end
117
123
 
118
124
  # 解析
119
125
  # 默认使用 json 的值
120
- def parsing
126
+ def parsing(parameter = {})
121
127
  self.json
122
128
  end
123
129
 
124
130
  # 获取解析结果
125
- def results
126
- @results ||= parsing
131
+ def results(parameter = {})
132
+ @results ||= parsing(parameter)
127
133
  end
128
134
 
129
135
  def get_date(str)
@@ -1,3 +1,3 @@
1
1
  module HttpCrawler
2
- VERSION = "0.3.1.6"
2
+ VERSION = "0.3.1.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: http_crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1.6
4
+ version: 0.3.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - jagger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-03-08 00:00:00.000000000 Z
11
+ date: 2019-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec