wx_ext 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/lib/wx_ext/sougou_weixin.rb +27 -21
- data/lib/wx_ext/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d2286ab5475583239a332f578ae2f43054ba264
|
4
|
+
data.tar.gz: 333b03607b64a19cbbb7a8aceac273e7c721533c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4179d378f97c60d3eee82dfbf8c1016123b15207bf07b80dade2bba36606a4e6f954307b4a6153e108ea0512a17fc38f8ea896890c6d49a37c3a6940c44d5131
|
7
|
+
data.tar.gz: 379d657c157449af7db4efd494fc3b074f2072077d2074c8000d7b178bfe387d6a3c9dc78f65b60218bbb769046c1aa2f2a5f292d96e1e81a5bc6b12f96acd27
|
data/README.md
CHANGED
data/lib/wx_ext/sougou_weixin.rb
CHANGED
@@ -5,9 +5,9 @@ require 'json'
|
|
5
5
|
require 'open-uri'
|
6
6
|
module WxExt
|
7
7
|
class SougouWeixin
|
8
|
-
def self.spider_posts_from_sougou(openid, page_index = 1, date_last =
|
9
|
-
|
10
|
-
res = RestClient.get
|
8
|
+
def self.spider_posts_from_sougou(openid, page_index = 1, date_last = (Time.now - 3600 * 24 * 10).strftime("%Y-%m-%d"))
|
9
|
+
json_url = "http://weixin.sogou.com/gzhjs?&openid=#{openid}&page=#{page_index}"
|
10
|
+
res = RestClient.get json_url
|
11
11
|
|
12
12
|
date_last_arr = date_last.to_s.split('-')
|
13
13
|
date_last_to_com = Time.new(date_last_arr[0], date_last_arr[1], date_last_arr[2])
|
@@ -30,38 +30,44 @@ module WxExt
|
|
30
30
|
end
|
31
31
|
spider_posts = []
|
32
32
|
xml_articles.each do |xml|
|
33
|
-
doc = Nokogiri::XML(xml
|
33
|
+
doc = Nokogiri::XML(xml, nil, 'UTF-8')
|
34
34
|
date = doc.at_xpath('//DOCUMENT/item/display/date').text
|
35
|
-
|
36
35
|
spider_post = {}
|
37
36
|
|
38
37
|
date_arr = date.to_s.split('-')
|
39
38
|
date_to_com = Time.new(date_arr[0], date_arr[1], date_arr[2])
|
40
39
|
if date_last_to_com < date_to_com
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
# sourcename = doc.at_xpath('//DOCUMENT/item/display/sourcename').text
|
46
|
-
spider_post[:content_short] = doc.at_xpath('//DOCUMENT/item/display/content168').text
|
40
|
+
title = doc.at_xpath('//DOCUMENT/item/display/title1').text
|
41
|
+
url = doc.at_xpath('//DOCUMENT/item/display/url').text
|
42
|
+
img = doc.at_xpath('//DOCUMENT/item/display/imglink').text
|
43
|
+
content_short = doc.at_xpath('//DOCUMENT/item/display/content168').text
|
47
44
|
|
48
|
-
doc_post = Nokogiri::HTML(open(url), nil,
|
45
|
+
doc_post = Nokogiri::HTML(open(url), nil, 'UTF-8')
|
49
46
|
node_author = doc_post.css('div.rich_media_meta_list > em.rich_media_meta.rich_media_meta_text')[1]
|
50
|
-
|
51
|
-
|
47
|
+
author = node_author ? node_author.content : '无'
|
48
|
+
content = doc_post.css('div#js_content').first.to_s
|
49
|
+
spider_post = {
|
50
|
+
title: title,
|
51
|
+
url: url,
|
52
|
+
img: img,
|
53
|
+
content_short: content_short,
|
54
|
+
author: author,
|
55
|
+
content: content,
|
56
|
+
date: date
|
57
|
+
}
|
52
58
|
spider_posts.push spider_post
|
53
59
|
else
|
54
60
|
break
|
55
61
|
end
|
56
62
|
end
|
57
63
|
{
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
64
|
+
total_items: total_items,
|
65
|
+
total_pages: total_pages,
|
66
|
+
page: page,
|
67
|
+
response_time: response_time,
|
68
|
+
spider_posts: spider_posts,
|
69
|
+
original_count: xml_articles.count,
|
70
|
+
count: spider_posts.count
|
65
71
|
}
|
66
72
|
end
|
67
73
|
end
|
data/lib/wx_ext/version.rb
CHANGED