wx_ext 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4d4dc7c81727a8fe95cec098764ca3b6ae5a6cb1
4
- data.tar.gz: 455e5391ca5abb4c9d687b3d48d3881cea885157
3
+ metadata.gz: 0d2286ab5475583239a332f578ae2f43054ba264
4
+ data.tar.gz: 333b03607b64a19cbbb7a8aceac273e7c721533c
5
5
  SHA512:
6
- metadata.gz: bc430085462ce84e094c10a7c1e713e45d0e3fa57be0b28467e757a4a3965ddb9cdae7e4d9cc97baf8c53cb95641b6ae91ba66d241ea4026a41a5ec8e1282b62
7
- data.tar.gz: 4969fa8eaa4cbdabf692838c9ba35a8dc09a016197ebf4c3f11039312ee79577cdddf94aff52807b7d30d6aa08541dc2d08481060cd47e23c095206cee5fd455
6
+ metadata.gz: 4179d378f97c60d3eee82dfbf8c1016123b15207bf07b80dade2bba36606a4e6f954307b4a6153e108ea0512a17fc38f8ea896890c6d49a37c3a6940c44d5131
7
+ data.tar.gz: 379d657c157449af7db4efd494fc3b074f2072077d2074c8000d7b178bfe387d6a3c9dc78f65b60218bbb769046c1aa2f2a5f292d96e1e81a5bc6b12f96acd27
data/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
  Add this line to your application's Gemfile:
8
8
 
9
9
  ```ruby
10
- gem 'wx_ext', '~> 0.1.1'
10
+ gem 'wx_ext', '~> 0.1.2'
11
11
  ```
12
12
 
13
13
  And then execute:
@@ -5,9 +5,9 @@ require 'json'
5
5
  require 'open-uri'
6
6
  module WxExt
7
7
  class SougouWeixin
8
- def self.spider_posts_from_sougou(openid, page_index = 1, date_last = '2000-01-01')
9
- url = "http://weixin.sogou.com/gzhjs?openid=#{openid}&page=#{page_index}"
10
- res = RestClient.get url, {:accept => :json}
8
+ def self.spider_posts_from_sougou(openid, page_index = 1, date_last = (Time.now - 3600 * 24 * 10).strftime("%Y-%m-%d"))
9
+ json_url = "http://weixin.sogou.com/gzhjs?&openid=#{openid}&page=#{page_index}"
10
+ res = RestClient.get json_url
11
11
 
12
12
  date_last_arr = date_last.to_s.split('-')
13
13
  date_last_to_com = Time.new(date_last_arr[0], date_last_arr[1], date_last_arr[2])
@@ -30,38 +30,44 @@ module WxExt
30
30
  end
31
31
  spider_posts = []
32
32
  xml_articles.each do |xml|
33
- doc = Nokogiri::XML(xml.to_s, nil, "UTF-8")
33
+ doc = Nokogiri::XML(xml, nil, 'UTF-8')
34
34
  date = doc.at_xpath('//DOCUMENT/item/display/date').text
35
-
36
35
  spider_post = {}
37
36
 
38
37
  date_arr = date.to_s.split('-')
39
38
  date_to_com = Time.new(date_arr[0], date_arr[1], date_arr[2])
40
39
  if date_last_to_com < date_to_com
41
- spider_post[:title] = doc.at_xpath('//DOCUMENT/item/display/title1').text
42
- spider_post[:url] = doc.at_xpath('//DOCUMENT/item/display/url').text
43
- spider_post[:img] = doc.at_xpath('//DOCUMENT/item/display/imglink').text
44
- # logo = doc.at_xpath('//DOCUMENT/item/display/headimage').text
45
- # sourcename = doc.at_xpath('//DOCUMENT/item/display/sourcename').text
46
- spider_post[:content_short] = doc.at_xpath('//DOCUMENT/item/display/content168').text
40
+ title = doc.at_xpath('//DOCUMENT/item/display/title1').text
41
+ url = doc.at_xpath('//DOCUMENT/item/display/url').text
42
+ img = doc.at_xpath('//DOCUMENT/item/display/imglink').text
43
+ content_short = doc.at_xpath('//DOCUMENT/item/display/content168').text
47
44
 
48
- doc_post = Nokogiri::HTML(open(url), nil, "UTF-8")
45
+ doc_post = Nokogiri::HTML(open(url), nil, 'UTF-8')
49
46
  node_author = doc_post.css('div.rich_media_meta_list > em.rich_media_meta.rich_media_meta_text')[1]
50
- spider_post[:author] = node_author ? node_author.content : '无'
51
- spider_post[:content] = doc_post.css('div.rich_media_content').first.to_s
47
+ author = node_author ? node_author.content : '无'
48
+ content = doc_post.css('div#js_content').first.to_s
49
+ spider_post = {
50
+ title: title,
51
+ url: url,
52
+ img: img,
53
+ content_short: content_short,
54
+ author: author,
55
+ content: content,
56
+ date: date
57
+ }
52
58
  spider_posts.push spider_post
53
59
  else
54
60
  break
55
61
  end
56
62
  end
57
63
  {
58
- total_items: total_items,
59
- total_pages: total_pages,
60
- page: page,
61
- response_time: response_time,
62
- spider_posts: spider_posts,
63
- original_count: xml_articles.count,
64
- count: spider_posts.count
64
+ total_items: total_items,
65
+ total_pages: total_pages,
66
+ page: page,
67
+ response_time: response_time,
68
+ spider_posts: spider_posts,
69
+ original_count: xml_articles.count,
70
+ count: spider_posts.count
65
71
  }
66
72
  end
67
73
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: UTF-8
2
2
  module WxExt
3
- VERSION = '0.1.1'
3
+ VERSION = '0.1.2'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wx_ext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - flowerwrong