meiriyigua 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ N2VhZDc2ODBjYTliYzkxMTliNzQyNDYyNmZiZGI3NGM3YTJkZmM3MA==
5
+ data.tar.gz: !binary |-
6
+ YmZlZjE0YTE4N2U2ODY1ZDdkNTAwOTRiNjE5YjMyMTMwZWJlMjU1Mg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YWE3NmNlYmIwYjExZTA3MDIxOWRmMTc4NDMxYWEwMmJmNDEzNTE3OTcxYzMy
10
+ ZGViMjNiNGZlZDkyYTBhZDBkZWJkNTgxNGNiYzkyNDMxN2UxZTg5ZmMzODgx
11
+ NDdjYWFmYmYzZjlkYTE4MGExNDQ2N2ZlMzYzNzdhMTRiNjRmZGE=
12
+ data.tar.gz: !binary |-
13
+ ZmI0MTQ2MzJmZDgyOTcyNzRlMGEwNDMwNDQ5OWMwNDRhOTZiMTA0MWM4NTg3
14
+ ODk4NWJmMmVlMzJjMWNkODMxMDZjZmJlZGRkNzg0ZWJkNWJmNGVjNDNmOGFk
15
+ MmMxMjdkNTk1NjFmYWQwYmVlZDBjYWIwNTcwNTk1OTQwNGJjN2U=
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+
19
+ *.db
20
+ set.yml
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in meiriyigua.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 mangege
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Meiriyigua
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'meiriyigua'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install meiriyigua
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,41 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'meiriyigua/crawl_client'
3
+ require 'meiriyigua/models'
4
+
5
+ module Meiriyigua
6
+ class BaiduCrawl
7
+ include Meiriyigua::Models
8
+
9
+ def initialize
10
+ @agent = CrawlClient.create_agent
11
+ end
12
+
13
+ def run
14
+ UrlRecord.all(:baidu_at => nil).each do |url_record|
15
+ page_record = url_record.page_record
16
+ baidu_intro = get_intro(page_record.title)
17
+ CrawlClient.random_sleep
18
+ if baidu_intro.empty?
19
+ print "抓取百度简介 #{url_record.url} "
20
+ puts "失败"
21
+ else
22
+ page_record.baidu_intro = baidu_intro
23
+ page_record.save
24
+ url_record.baidu_at = Time.now
25
+ url_record.save
26
+ print "抓取百度简介 #{url_record.url} "
27
+ puts "成功"
28
+ end
29
+ end
30
+ end
31
+
32
+ def get_intro(title)
33
+ page = @agent.get('http://www.baidu.com/')
34
+ sleep 1
35
+ search_form = page.form_with(:name => "f1")
36
+ search_form.field_with(:name => "wd").value = title
37
+ search_results = @agent.submit search_form
38
+ search_results.search('div.c-container:first-of-type div.c-abstract').text
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,36 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'uri'
3
+ require 'yaml'
4
+
5
+ module Meiriyigua
6
+ class Config
7
+ class << self
8
+ def load
9
+ YAML::ENGINE.yamler = 'syck'
10
+ @@hashs = YAML.load(File.read "#{Dir.pwd}/set.yml")
11
+ end
12
+
13
+ def site_host
14
+ URI(login_url).host
15
+ end
16
+
17
+ {login_url: '提交地址',
18
+ username: '用户名',
19
+ password: '密码',
20
+ encode: '编码',
21
+ day_num: '每天发布',
22
+ post_time: '发布延时',
23
+ update_time: '监控延时'}.each do |k, v|
24
+ define_method(k) do
25
+ @@hashs['基础'][v]
26
+ end
27
+ end
28
+
29
+ def categories
30
+ @@hashs['栏目']
31
+ end
32
+ end
33
+
34
+ self.load
35
+ end
36
+ end
@@ -0,0 +1,32 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'mechanize'
3
+
4
+ module Meiriyigua
5
+ class CrawlClient
6
+ USER_AGENTS = [
7
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6",
8
+ "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10",
9
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22",
10
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.32 Safari/537.36",
11
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
12
+ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
13
+ "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
14
+ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)",
15
+ ]
16
+
17
+ def self.create_agent
18
+ Mechanize.new do |a|
19
+ a.user_agent = USER_AGENTS.shuffle.first
20
+ a.max_history = 1
21
+ end
22
+ end
23
+
24
+ def self.random_sleep
25
+ sleep(rand(1..3))
26
+ end
27
+
28
+ def self.set_page_encoding(page)
29
+ page.encoding = 'gbk' if page.encoding.downcase == 'gb2312'
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,160 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'meiriyigua/crawl_client'
3
+ require 'meiriyigua/models'
4
+
5
+ module Meiriyigua
6
+ class DetailCrawl
7
+ include Meiriyigua::Models
8
+
9
+ def initialize(detail_urls)
10
+ @detail_urls = detail_urls
11
+ @agent = CrawlClient.create_agent
12
+ end
13
+
14
+ def run
15
+ while !@detail_urls.empty?
16
+ uri = URI(@detail_urls.pop)
17
+ handle_url(uri)
18
+ end
19
+ end
20
+
21
+ def handle_url(uri)
22
+
23
+ if UrlRecord.exist_url?(uri.to_s)
24
+ #print "抓取详情页 #{uri.to_s} "
25
+ #puts "重复,跳过"
26
+ return
27
+ end
28
+
29
+ page = @agent.get(uri)
30
+ CrawlClient.set_page_encoding(page)
31
+ name = uri.host.to_s.split('.')[1]
32
+
33
+ url_record = UrlRecord.new
34
+ url_record.url = uri.to_s
35
+ url_record.detail_at = Time.now
36
+
37
+ page_record = PageRecord.new
38
+ url_record.page_record = page_record
39
+
40
+ page_record = send("handle_#{name}", page, page_record)
41
+ if page_record.nil?
42
+ print "抓取详情页 #{uri.to_s} "
43
+ puts "失败"
44
+ else
45
+ print "抓取详情页 #{uri.to_s} "
46
+ if url_record.save
47
+ puts "成功"
48
+ else
49
+ puts "保存失败"
50
+ end
51
+ end
52
+
53
+ CrawlClient.random_sleep
54
+ end
55
+
56
+ def handle_1234wg(page, page_record)
57
+ page_record.title = page.search('td[width="583"] > font > strong font').text.strip
58
+ return if page_record.title.empty?
59
+ page_record.category = page.search('body > table[background="/images/hgf-4.gif"] td[style="padding-left:6px;"] a:last-of-type').text
60
+ page_record.content = strip_content(page.search('td#intro'))
61
+ filename = page.search('td[valign="top"] > script:last-of-type').text.split(',')[1][6..-2]
62
+ page_record.downloads = "http://dx2down.bugwg.com:801/#{URI.escape filename}"
63
+ page_record
64
+ end
65
+
66
+ def handle_qh24(page, page_record)
67
+ page_record.title = page.search('//*[@id="sintro"]/h1/text()').text.strip
68
+ return if page_record.title.empty?
69
+ page_record.category = page.search('h2.classname > a:last-of-type').text
70
+ page_record.content = strip_content(page.search('div.cnt'))
71
+ page_record.downloads = join_downloads(page.search('div#intext dd a').collect{|a| a['href']})
72
+ page_record
73
+ end
74
+
75
+ def handle_nanawg(page, page_record)
76
+ page_record.title = page.search('div.right_tit').text.strip
77
+ return if page_record.title.empty?
78
+ page_record.category = page.search('div#index3 a:last-of-type').text
79
+ page_record.content = page.search('div.rightsum_text4').text
80
+ page_record.downloads = join_downloads(page.search('ul.ul2 a').collect{|a| a['href'] =~ /^http/ ? a['href'] : "http://www.nanawg.com#{a['href']}"})
81
+ page_record
82
+ end
83
+
84
+ def handle_ucbug(page, page_record)
85
+ page_record.title = page.search('div.spmain_1 a').text.strip
86
+ return if page_record.title.empty?
87
+ page_record.category = page.search('div.slhead_1 a:last-of-type').text
88
+ page_record.content = page.search('div.spmain_5').text
89
+ page_record.downloads = join_downloads(page.search('ul.ul_Address a').collect{|a| a['href']})
90
+ page_record
91
+ end
92
+
93
+ def handle_gg1z(page, page_record)
94
+ page_record.title = page.search('div.software-info > div.cp-top > h3').text.strip
95
+ return if page_record.title.empty?
96
+ page_record.category = page.search('div.nav-breadcrumb a:nth-last-of-type(2)').text
97
+ content = page.search('div.cp-main > div.cp-main')
98
+ content.search('font[color="red"]').remove
99
+ page_record.content = strip_content(content)
100
+
101
+ downloads = page.search('ul.download-list a').collect{|a| "http://www.gg1z.com#{a['href']}"}
102
+ downloads = [downloads.first, downloads.last].uniq
103
+ final_downloads = []
104
+ downloads.each do |down|
105
+ down_page = @agent.get(down, nil, page.uri.to_s)
106
+ CrawlClient.set_page_encoding(down_page)
107
+ final_downloads.concat( down_page.search('div.downarea a').collect{|a| a['href'] =~ /^http/ ? a['href'] : "http://www.gg1z.com#{a['href']}"} )
108
+ end
109
+ page_record.downloads = join_downloads(final_downloads)
110
+ page_record
111
+ end
112
+
113
+ def handle_dongdongwg(page, page_record)
114
+ page_record.title = page.search('//div[@class="pageMainArea"]/h1/text()').text.strip
115
+ return if page_record.title.empty?
116
+ page_record.category = page.search('span.current1 a:last-of-type').text
117
+ content = page.search('div#mainSoftIntro')
118
+ content.search('p:last-of-type').remove
119
+ page_record.content = strip_content(content)
120
+ page_record.downloads = join_downloads(page.search('ul.downlistbox a').collect{|a| a['href']})
121
+ page_record
122
+ end
123
+
124
+ def handle_uuuwg(page, page_record)
125
+ page_record.title = page.search('div.spmain_1').text.strip
126
+ return if page_record.title.empty?
127
+ page_record.category = page.search('div.slhead_1 a:last-of-type').text
128
+ page_record.content = strip_content(page.search('div.spmain_5'))
129
+ page_record.downloads = join_downloads(page.search('ul.spmain_3_2 > li:last-of-type a').collect{|a| a['href']})
130
+ page_record
131
+ end
132
+
133
+ def handle_xixiwg(page, page_record)
134
+ page_record.title = page.search('div.r2 h2').text.strip
135
+ return if page_record.title.empty?
136
+ page_record.category = page.search('div.location a:last-of-type').text
137
+ page_record.content = strip_content(page.search('div#intro'))
138
+ filename = page.search('div.xzk script:last-of-type').text.split(',')[1].strip[6..-3]
139
+ page_record.downloads = "http://dxdown1.xixiwg.com/#{URI.escape filename}"
140
+ page_record
141
+ end
142
+
143
+ def handle_xiaolinzi(page, page_record)
144
+ page_record.title = page.search('div.dlbt_wz').text.strip
145
+ return if page_record.title.empty?
146
+ page_record.category = page.search('div.head_dh a:last-of-type').text
147
+ page_record.content = strip_content(page.search('div#content_all'))
148
+ page_record.downloads = join_downloads(page.search('div.dl_link_bd a[target="_blank"]').collect{|a| a['href']})
149
+ page_record
150
+ end
151
+
152
+ def strip_content(content)
153
+ content.text
154
+ end
155
+
156
+ def join_downloads(downloads)
157
+ downloads.uniq.join('#!#')
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,97 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'meiriyigua/crawl_client'
3
+
4
+ module Meiriyigua
5
+ class ListCrawl
6
+ attr_reader :detail_urls
7
+
8
+ def initialize
9
+ @list_urls = Queue.new
10
+ @detail_urls = Queue.new
11
+ @agent = CrawlClient.create_agent
12
+
13
+ init_url
14
+ end
15
+
16
+ def run
17
+ while !@list_urls.empty?
18
+ uri = URI(@list_urls.pop)
19
+ handle_url(uri)
20
+ end
21
+ end
22
+
23
+ def handle_url(uri)
24
+ page = @agent.get(uri)
25
+ CrawlClient.set_page_encoding(page)
26
+ name = uri.host.to_s.split('.')[1]
27
+ urls = send("handle_#{name}", page)
28
+ if urls.empty?
29
+ print "抓取列表页 #{uri.to_s} "
30
+ puts "失败"
31
+ else
32
+ urls.each {|a| @detail_urls << a}
33
+ print "抓取列表页 #{uri.to_s} "
34
+ puts "成功"
35
+ end
36
+ CrawlClient.random_sleep
37
+ end
38
+
39
+ def handle_1234wg(page)
40
+ urls = page.search('td[width="470"] a[href^="/1234/"]')
41
+ urls.collect {|a| "http://www.1234wg.com#{a['href']}" }
42
+ end
43
+
44
+ def handle_qh24(page)
45
+ urls = page.search('div#downhot table a')
46
+ urls.collect{|a| "http://www.qh24.com#{a['href']}"}
47
+ end
48
+
49
+ def handle_nanawg(page)
50
+ urls = page.search('td[width="362"] a:last-of-type')
51
+ urls.collect{|a| "http://www.nanawg.com#{a['href']}"}
52
+ end
53
+
54
+ def handle_ucbug(page)
55
+ urls = page.search('li.slmain2_2_2 a')
56
+ urls.collect{|a| a['href']}
57
+ end
58
+
59
+ def handle_gg1z(page)
60
+ urls = page.search('span.app-name a')
61
+ urls.collect{|a| "http://www.gg1z.com#{a['href']}"}
62
+ end
63
+
64
+ def handle_dongdongwg(page)
65
+ urls = page.search('span.list_title > a')
66
+ urls.collect{|a| "http://www.dongdongwg.com#{a['href']}"}
67
+ end
68
+
69
+ def handle_uuuwg(page)
70
+ urls = page.search('table.main_table tr > td:nth-child(2) a')
71
+ urls.collect{|a| "http://www.uuuwg.com#{a['href']}"}
72
+ end
73
+
74
+ def handle_xixiwg(page)
75
+ urls = page.search('div.entry > h2 > a')
76
+ urls.collect{|a| "http://www.xixiwg.com#{a['href']}"}
77
+ end
78
+
79
+ def handle_xiaolinzi(page)
80
+ urls = page.search('td.rewid1 > a')
81
+ urls.collect{|a| "http://www.xiaolinzi.com#{a['href']}"}
82
+ end
83
+
84
+ private
85
+ def init_url
86
+ @list_urls << 'http://www.1234wg.com/new.html'
87
+ @list_urls << 'http://www.qh24.com/new.html'
88
+ @list_urls << 'http://www.nanawg.com/soft/html/newlist-1.html'
89
+ @list_urls << 'http://www.ucbug.com/new.html'
90
+ @list_urls << 'http://www.gg1z.com/soft/html/newlist-1.html'
91
+ @list_urls << 'http://www.dongdongwg.com/soft/html/newlist-1.html'
92
+ @list_urls << 'http://www.uuuwg.com/newlist.html'
93
+ @list_urls << 'http://www.xixiwg.com/new/'
94
+ @list_urls << 'http://www.xiaolinzi.com/update/'
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,47 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'data_mapper'
3
+
4
+ #DataMapper::Logger.new($stdout, :debug)
5
+ DataMapper::Model.raise_on_save_failure = true
6
+ DataMapper.setup(:default, "sqlite://#{Dir.pwd}/data.db")
7
+
8
+ module Meiriyigua
9
+ module Models
10
+ class UrlRecord
11
+ include DataMapper::Resource
12
+
13
+ property :id, Serial
14
+ property :url, String, length: 1024
15
+ property :detail_at, DateTime
16
+ property :baidu_at, DateTime
17
+ property :publish_at, DateTime
18
+ property :created_at, DateTime
19
+ property :updated_at, DateTime
20
+
21
+ has 1, :page_record
22
+
23
+ def self.exist_url?(url)
24
+ self.count(url: url) > 0
25
+ end
26
+ end
27
+
28
+ class PageRecord
29
+ include DataMapper::Resource
30
+
31
+ property :id, Serial
32
+ property :title, String, length: 1024
33
+ property :category, String, length: 1024
34
+ property :content, Text
35
+ property :downloads, Text
36
+ property :baidu_intro, Text
37
+ property :created_at, DateTime
38
+ property :updated_at, DateTime
39
+
40
+ belongs_to :url_record
41
+ end
42
+ end
43
+ end
44
+
45
+ DataMapper.finalize
46
+
47
+ DataMapper.auto_upgrade!
@@ -0,0 +1,105 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'meiriyigua/models'
3
+ require 'meiriyigua/config'
4
+ require 'meiriyigua/crawl_client'
5
+
6
+ module Meiriyigua
7
+ class PostClient
8
+ include Meiriyigua::Models
9
+
10
+ ZHS = 0x4e00..0x9fff
11
+
12
+ attr_reader :category_ids
13
+
14
+ def initialize
15
+ @agent = CrawlClient.create_agent
16
+ @category_ids = {}
17
+
18
+ login
19
+ check_category
20
+ end
21
+
22
+ def run
23
+ today_count = UrlRecord.count(:publish_at.gte => Date.today)
24
+
25
+ if today_count >= Meiriyigua::Config.day_num
26
+ return
27
+ end
28
+
29
+ time_limit = Time.now - 60 * Meiriyigua::Config.post_time.to_i
30
+ UrlRecord.all(:detail_at.lte => time_limit, :baidu_at.not => nil, :publish_at => nil).each do |url_record|
31
+ today_count += 1
32
+ if today_count >= Meiriyigua::Config.day_num
33
+ return
34
+ end
35
+
36
+ page_record = url_record.page_record
37
+ if post_news(page_record, @category_ids[URI(url_record.url).host])
38
+ url_record.publish_at = Time.now
39
+ url_record.save
40
+ print "发布 #{url_record.url} "
41
+ puts "成功"
42
+ else
43
+ print "发布 #{url_record.url} "
44
+ puts "失败"
45
+ end
46
+ sleep 1
47
+ end
48
+ end
49
+
50
+ def login
51
+ page = @agent.get(Meiriyigua::Config.login_url)
52
+ login_form = page.form_with(name: 'login')
53
+ login_form.field_with(name: 'ad_name').value = Meiriyigua::Config.username
54
+ login_form.field_with(name: 'ad_pwd').value = Meiriyigua::Config.password
55
+ login_results = @agent.submit(login_form)
56
+ login_results.search('a[href="/admin/admin.php/Index/index"]').size > 0
57
+ end
58
+
59
+ def check_category
60
+ page = @agent.get "http://#{Meiriyigua::Config.site_host}/admin/admin.php/News/add"
61
+ options = page.search('select#news_type > option').collect{|o| [o.text.strip, o['value'].to_i]}
62
+ options = Hash[options]
63
+ Meiriyigua::Config.categories.each do |host, name|
64
+ unless options.keys.include?(name)
65
+ @category_ids[host] = options[name]
66
+ @agent.post("http://#{Meiriyigua::Config.site_host}/admin/admin.php/NewsType/add", news_type: name)
67
+ end
68
+ end
69
+ load_category_ids
70
+ end
71
+
72
+ def post_news(page_record, category_id)
73
+ post_params = {}
74
+ post_params['news_tit'] = "#{page_record.category}最新版辅助外挂免费下载 #{page_record.title}"
75
+ post_params['author'] = random_zh
76
+ post_params['click'] = rand(600..13000)
77
+ post_params['biaoqian'] = page_record.category
78
+ post_params['download_url'] = page_record.downloads.split('#!#')[0..1].join('|')
79
+ post_params['news_type'] = category_id
80
+ post_params['jianjie'] = page_record.content.to_s[0..16]
81
+ post_params['editorValue'] = "<pre>#{page_record.content} \r\n\r\n 提示一:\r\n#{page_record.baidu_intro}</pre>"
82
+ result_page = @agent.post("http://#{Meiriyigua::Config.site_host}/admin/admin.php/News/insert", post_params)
83
+ !result_page.search('script').text.index('/admin/admin.php/News/index').nil?
84
+ end
85
+
86
+ private
87
+ def random_zh
88
+ s = ""
89
+ rand(2..4).times do
90
+ s << rand(ZHS)
91
+ end
92
+ s
93
+ end
94
+
95
+ def load_category_ids
96
+ page = @agent.get "http://#{Meiriyigua::Config.site_host}/admin/admin.php/News/add"
97
+ options = page.search('select#news_type > option').collect{|o| [o.text.strip, o['value'].to_i]}
98
+ options = Hash[options]
99
+ Meiriyigua::Config.categories.each do |host, name|
100
+ @category_ids[host] = options[name]
101
+ end
102
+ end
103
+
104
+ end
105
+ end
@@ -0,0 +1,4 @@
1
+ # -*- encoding : utf-8 -*-
2
+ module Meiriyigua
3
+ VERSION = "0.0.1"
4
+ end
data/lib/meiriyigua.rb ADDED
@@ -0,0 +1,43 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require "meiriyigua/version"
3
+ require 'meiriyigua/config'
4
+ require "meiriyigua/list_crawl"
5
+ require "meiriyigua/detail_crawl"
6
+ require "meiriyigua/baidu_crawl"
7
+ require "meiriyigua/post_client"
8
+
9
+ module Meiriyigua
10
+ def run
11
+ while true
12
+ begin
13
+
14
+ threads = []
15
+ threads << Thread.new do
16
+ list_crawl = ListCrawl.new
17
+ list_crawl.run
18
+ detail_crawl = DetailCrawl.new(list_crawl.detail_urls)
19
+ detail_crawl.run
20
+ sleep(Meiriyigua::Config.update_time.to_i * 60)
21
+ end
22
+ threads << Thread.new do
23
+ baidu_crawl = BaiduCrawl.new
24
+ baidu_crawl.run
25
+ sleep(Meiriyigua::Config.update_time.to_i * 60)
26
+ end
27
+ threads << Thread.new do
28
+ post_client = PostClient.new
29
+ post_client.run
30
+ sleep(Meiriyigua::Config.post_time.to_i * 60)
31
+ end
32
+ threads.each { |thr| thr.join }
33
+
34
+ rescue
35
+ puts "出错了 #{$!.message}"
36
+ end
37
+ sleep 60*10
38
+ end
39
+ end
40
+ module_function :run
41
+ end
42
+
43
+ Meiriyigua.run
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'meiriyigua/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "meiriyigua"
8
+ spec.version = Meiriyigua::VERSION
9
+ spec.authors = ["mangege"]
10
+ spec.email = ["cxh116@126.com"]
11
+ spec.description = %q{nil}
12
+ spec.summary = %q{nil}
13
+ spec.homepage = "http://github.com/mangege"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency "mechanize", '~> 2.7'
22
+ spec.add_runtime_dependency "sqlite3", '~> 1.3'
23
+ spec.add_runtime_dependency "data_mapper", '~> 1.2'
24
+ spec.add_runtime_dependency "dm-sqlite-adapter", '~> 1.2'
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.3"
27
+ spec.add_development_dependency "rake"
28
+ end
data/set.yml.example ADDED
@@ -0,0 +1,22 @@
1
+ 保留头:
2
+ 勿删除: 0
3
+
4
+ 基础:
5
+ 提交地址: http://www.meiriyigua.com/admin/admin.php
6
+ 用户名: admin
7
+ 密码: admin888
8
+ 编码: utf8
9
+ 每天发布: 300
10
+ 发布延时: 60
11
+ 监控延时: 30
12
+
13
+ 栏目:
14
+ www.1234wg.com: 1234外挂网
15
+ www.qh24.com: 艾艾游戏网
16
+ www.nanawg.com: 娜娜网
17
+ www.ucbug.com: ucbug游戏网
18
+ www.gg1z.com: 挂挂一族
19
+ www.dongdongwg.com: 东东游戏网
20
+ www.uuuwg.com: 西西游戏网
21
+ www.xixiwg.com: 西西外挂网
22
+ www.xiaolinzi.com: 小林子软件站
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: meiriyigua
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - mangege
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-04-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: sqlite3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: data_mapper
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.2'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: dm-sqlite-adapter
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.2'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: nil
98
+ email:
99
+ - cxh116@126.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - .gitignore
105
+ - Gemfile
106
+ - LICENSE.txt
107
+ - README.md
108
+ - Rakefile
109
+ - lib/meiriyigua.rb
110
+ - lib/meiriyigua/baidu_crawl.rb
111
+ - lib/meiriyigua/config.rb
112
+ - lib/meiriyigua/crawl_client.rb
113
+ - lib/meiriyigua/detail_crawl.rb
114
+ - lib/meiriyigua/list_crawl.rb
115
+ - lib/meiriyigua/models.rb
116
+ - lib/meiriyigua/post_client.rb
117
+ - lib/meiriyigua/version.rb
118
+ - meiriyigua.gemspec
119
+ - set.yml.example
120
+ homepage: http://github.com/mangege
121
+ licenses:
122
+ - MIT
123
+ metadata: {}
124
+ post_install_message:
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ! '>='
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ! '>='
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubyforge_project:
140
+ rubygems_version: 2.2.2
141
+ signing_key:
142
+ specification_version: 4
143
+ summary: nil
144
+ test_files: []