wx_ext 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4d4dc7c81727a8fe95cec098764ca3b6ae5a6cb1
4
- data.tar.gz: 455e5391ca5abb4c9d687b3d48d3881cea885157
3
+ metadata.gz: 0d2286ab5475583239a332f578ae2f43054ba264
4
+ data.tar.gz: 333b03607b64a19cbbb7a8aceac273e7c721533c
5
5
  SHA512:
6
- metadata.gz: bc430085462ce84e094c10a7c1e713e45d0e3fa57be0b28467e757a4a3965ddb9cdae7e4d9cc97baf8c53cb95641b6ae91ba66d241ea4026a41a5ec8e1282b62
7
- data.tar.gz: 4969fa8eaa4cbdabf692838c9ba35a8dc09a016197ebf4c3f11039312ee79577cdddf94aff52807b7d30d6aa08541dc2d08481060cd47e23c095206cee5fd455
6
+ metadata.gz: 4179d378f97c60d3eee82dfbf8c1016123b15207bf07b80dade2bba36606a4e6f954307b4a6153e108ea0512a17fc38f8ea896890c6d49a37c3a6940c44d5131
7
+ data.tar.gz: 379d657c157449af7db4efd494fc3b074f2072077d2074c8000d7b178bfe387d6a3c9dc78f65b60218bbb769046c1aa2f2a5f292d96e1e81a5bc6b12f96acd27
data/README.md CHANGED
@@ -7,7 +7,7 @@
7
7
  Add this line to your application's Gemfile:
8
8
 
9
9
  ```ruby
10
- gem 'wx_ext', '~> 0.1.1'
10
+ gem 'wx_ext', '~> 0.1.2'
11
11
  ```
12
12
 
13
13
  And then execute:
@@ -5,9 +5,9 @@ require 'json'
5
5
  require 'open-uri'
6
6
  module WxExt
7
7
  class SougouWeixin
8
- def self.spider_posts_from_sougou(openid, page_index = 1, date_last = '2000-01-01')
9
- url = "http://weixin.sogou.com/gzhjs?openid=#{openid}&page=#{page_index}"
10
- res = RestClient.get url, {:accept => :json}
8
+ def self.spider_posts_from_sougou(openid, page_index = 1, date_last = (Time.now - 3600 * 24 * 10).strftime("%Y-%m-%d"))
9
+ json_url = "http://weixin.sogou.com/gzhjs?&openid=#{openid}&page=#{page_index}"
10
+ res = RestClient.get json_url
11
11
 
12
12
  date_last_arr = date_last.to_s.split('-')
13
13
  date_last_to_com = Time.new(date_last_arr[0], date_last_arr[1], date_last_arr[2])
@@ -30,38 +30,44 @@ module WxExt
30
30
  end
31
31
  spider_posts = []
32
32
  xml_articles.each do |xml|
33
- doc = Nokogiri::XML(xml.to_s, nil, "UTF-8")
33
+ doc = Nokogiri::XML(xml, nil, 'UTF-8')
34
34
  date = doc.at_xpath('//DOCUMENT/item/display/date').text
35
-
36
35
  spider_post = {}
37
36
 
38
37
  date_arr = date.to_s.split('-')
39
38
  date_to_com = Time.new(date_arr[0], date_arr[1], date_arr[2])
40
39
  if date_last_to_com < date_to_com
41
- spider_post[:title] = doc.at_xpath('//DOCUMENT/item/display/title1').text
42
- spider_post[:url] = doc.at_xpath('//DOCUMENT/item/display/url').text
43
- spider_post[:img] = doc.at_xpath('//DOCUMENT/item/display/imglink').text
44
- # logo = doc.at_xpath('//DOCUMENT/item/display/headimage').text
45
- # sourcename = doc.at_xpath('//DOCUMENT/item/display/sourcename').text
46
- spider_post[:content_short] = doc.at_xpath('//DOCUMENT/item/display/content168').text
40
+ title = doc.at_xpath('//DOCUMENT/item/display/title1').text
41
+ url = doc.at_xpath('//DOCUMENT/item/display/url').text
42
+ img = doc.at_xpath('//DOCUMENT/item/display/imglink').text
43
+ content_short = doc.at_xpath('//DOCUMENT/item/display/content168').text
47
44
 
48
- doc_post = Nokogiri::HTML(open(url), nil, "UTF-8")
45
+ doc_post = Nokogiri::HTML(open(url), nil, 'UTF-8')
49
46
  node_author = doc_post.css('div.rich_media_meta_list > em.rich_media_meta.rich_media_meta_text')[1]
50
- spider_post[:author] = node_author ? node_author.content : '无'
51
- spider_post[:content] = doc_post.css('div.rich_media_content').first.to_s
47
+ author = node_author ? node_author.content : '无'
48
+ content = doc_post.css('div#js_content').first.to_s
49
+ spider_post = {
50
+ title: title,
51
+ url: url,
52
+ img: img,
53
+ content_short: content_short,
54
+ author: author,
55
+ content: content,
56
+ date: date
57
+ }
52
58
  spider_posts.push spider_post
53
59
  else
54
60
  break
55
61
  end
56
62
  end
57
63
  {
58
- total_items: total_items,
59
- total_pages: total_pages,
60
- page: page,
61
- response_time: response_time,
62
- spider_posts: spider_posts,
63
- original_count: xml_articles.count,
64
- count: spider_posts.count
64
+ total_items: total_items,
65
+ total_pages: total_pages,
66
+ page: page,
67
+ response_time: response_time,
68
+ spider_posts: spider_posts,
69
+ original_count: xml_articles.count,
70
+ count: spider_posts.count
65
71
  }
66
72
  end
67
73
  end
@@ -1,4 +1,4 @@
1
1
  # encoding: UTF-8
2
2
  module WxExt
3
- VERSION = '0.1.1'
3
+ VERSION = '0.1.2'
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wx_ext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - flowerwrong