spider_rails 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/MIT-LICENSE +20 -0
- data/README.rdoc +3 -0
- data/Rakefile +32 -0
- data/lib/spider_rails.rb +4 -0
- data/lib/spider_rails/bilibili.rb +68 -0
- data/lib/spider_rails/common.rb +62 -0
- data/lib/spider_rails/google_dict.rb +91 -0
- data/lib/spider_rails/hl.rb +13 -0
- data/lib/spider_rails/ji_ying.rb +61 -0
- data/lib/spider_rails/sample_data.rb +10 -0
- data/lib/spider_rails/spread_sheet.rb +32 -0
- data/lib/spider_rails/version.rb +3 -0
- data/test/dummy/README.rdoc +28 -0
- data/test/dummy/Rakefile +6 -0
- data/test/dummy/app/assets/javascripts/anis.js +2 -0
- data/test/dummy/app/assets/javascripts/application.js +13 -0
- data/test/dummy/app/assets/stylesheets/anis.css +4 -0
- data/test/dummy/app/assets/stylesheets/application.css +13 -0
- data/test/dummy/app/assets/stylesheets/scaffold.css +56 -0
- data/test/dummy/app/controllers/anis_controller.rb +58 -0
- data/test/dummy/app/controllers/application_controller.rb +5 -0
- data/test/dummy/app/helpers/anis_helper.rb +2 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/app/models/ani.rb +2 -0
- data/test/dummy/app/views/anis/_form.html.erb +25 -0
- data/test/dummy/app/views/anis/edit.html.erb +6 -0
- data/test/dummy/app/views/anis/index.html.erb +29 -0
- data/test/dummy/app/views/anis/new.html.erb +5 -0
- data/test/dummy/app/views/anis/show.html.erb +14 -0
- data/test/dummy/app/views/layouts/application.html.erb +14 -0
- data/test/dummy/bin/bundle +3 -0
- data/test/dummy/bin/rails +4 -0
- data/test/dummy/bin/rake +4 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/config/application.rb +23 -0
- data/test/dummy/config/boot.rb +5 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +29 -0
- data/test/dummy/config/environments/production.rb +80 -0
- data/test/dummy/config/environments/test.rb +36 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/test/dummy/config/initializers/inflections.rb +16 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +12 -0
- data/test/dummy/config/initializers/session_store.rb +3 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +23 -0
- data/test/dummy/config/routes.rb +56 -0
- data/test/dummy/db/development.sqlite3 +0 -0
- data/test/dummy/db/migrate/20130714091905_create_anis.rb +10 -0
- data/test/dummy/db/schema.rb +23 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/development.log +21 -0
- data/test/dummy/log/test.log +160 -0
- data/test/dummy/public/404.html +58 -0
- data/test/dummy/public/422.html +58 -0
- data/test/dummy/public/500.html +57 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/tmp/pids/server.pid +1 -0
- data/test/ji_ying_test.rb +58 -0
- data/test/libpeerconnection.log +0 -0
- data/test/spider_rails_test.rb +7 -0
- data/test/test_helper.rb +18 -0
- metadata +189 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 42525540b17a7f35194db3126b79a8c0183e9090
|
4
|
+
data.tar.gz: a9d09421f94b4555b97dd098093e211d9c33c7b7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 62779738390335964ea5b8f596795b69820619e5cc60b800e604fe19292ac8099a42804e2207903f931afcb7d2b2a27439c8a8811aec4b39121334c820232633
|
7
|
+
data.tar.gz: d5e18b777d1a44726c4093b8b50cd83b9ef6d94fd606581ac302dfd110f5eec692c802bb15c01cda764cf80ce202230c5a426006647add04df31a18a7b8a9606
|
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2013 YOURNAME
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler/setup'
|
3
|
+
rescue LoadError
|
4
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rdoc/task'
|
8
|
+
|
9
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
+
rdoc.rdoc_dir = 'rdoc'
|
11
|
+
rdoc.title = 'SpiderRails'
|
12
|
+
rdoc.options << '--line-numbers'
|
13
|
+
rdoc.rdoc_files.include('README.rdoc')
|
14
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
Bundler::GemHelper.install_tasks
|
21
|
+
|
22
|
+
require 'rake/testtask'
|
23
|
+
|
24
|
+
Rake::TestTask.new(:test) do |t|
|
25
|
+
t.libs << 'lib'
|
26
|
+
t.libs << 'test'
|
27
|
+
t.pattern = 'test/**/*_test.rb'
|
28
|
+
t.verbose = false
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
task default: :test
|
data/lib/spider_rails.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module Spider
|
2
|
+
class BiliBili < Common
|
3
|
+
|
4
|
+
def get_res
|
5
|
+
animations = Hash.new
|
6
|
+
#browser for get page number
|
7
|
+
@b1 = goto("http://www.bilibili.tv/video/part-twoelement-1.html")
|
8
|
+
@b1.link(class: 'endPage').click
|
9
|
+
page_num = @b1.url[/\b\-\d+/].sub(/\-/, '')
|
10
|
+
@b1.close
|
11
|
+
|
12
|
+
#browser for get resources
|
13
|
+
browser2 = Watir::Browser.new :chrome
|
14
|
+
for i in 1..page_num.to_i
|
15
|
+
#for i in 1..1
|
16
|
+
browser2.goto "http://www.bilibili.tv/video/part-twoelement-#{i}.html"
|
17
|
+
html = Nokogiri::HTML.parse(browser2.html)
|
18
|
+
|
19
|
+
get_content(html, 'li.l2') do |li|
|
20
|
+
get_content(li, 'a.title') { |name| @name = name.content }
|
21
|
+
|
22
|
+
get_content(li, 'a.preview') { |a| @res = a['href'] }
|
23
|
+
get_content(li, 'a.preview img') { |img| @preview = img['src'] }
|
24
|
+
|
25
|
+
save_raw_data @name, @res, @preview
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
browser2.close
|
30
|
+
animations
|
31
|
+
end
|
32
|
+
|
33
|
+
alias_method :get_bilibili_res, :get_res
|
34
|
+
|
35
|
+
|
36
|
+
def goto url
|
37
|
+
browser1 = Watir::Browser.new :chrome
|
38
|
+
browser1.goto url
|
39
|
+
browser1
|
40
|
+
end
|
41
|
+
|
42
|
+
def save_raw_data(name, res, preview)
|
43
|
+
#anis.each do |preview, name|
|
44
|
+
# unless AniRaw.where(preview: preview).exists?
|
45
|
+
# AniRaw.create!(preview: preview, name: name)
|
46
|
+
# end
|
47
|
+
#end
|
48
|
+
|
49
|
+
save_record(AniRaw, name: name, preview: preview, res: res)
|
50
|
+
end
|
51
|
+
|
52
|
+
#deprecated
|
53
|
+
#def save_record(model, *args)
|
54
|
+
# if model.find_by_name(name) && !name.nil?
|
55
|
+
# id = model.find_by_name(name).id
|
56
|
+
# model.update(id, name: name, res: res, preview: preview)
|
57
|
+
# else
|
58
|
+
# model.create(name: name, res: res, preview: preview)
|
59
|
+
# end
|
60
|
+
#end
|
61
|
+
|
62
|
+
def save_handled_data
|
63
|
+
AniRaw.all.each do |record|
|
64
|
+
save_record AniFin, name: record.name, preview: record.preview, res: record.res
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Spider
|
2
|
+
class Common
|
3
|
+
def full_site
|
4
|
+
|
5
|
+
end
|
6
|
+
|
7
|
+
def full_site_filter
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
def full_page
|
12
|
+
close_all_chromes
|
13
|
+
end
|
14
|
+
|
15
|
+
def get element
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def single(element)
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
def single_filter
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_content(element, selector, &block)
|
27
|
+
begin
|
28
|
+
if block_given?
|
29
|
+
element.css(selector).each &block
|
30
|
+
else
|
31
|
+
element.css(selector).each do |e|
|
32
|
+
if e.content != 0
|
33
|
+
#Just get first element
|
34
|
+
return e.content
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
class Browser
|
44
|
+
class << self
|
45
|
+
def open_browser url
|
46
|
+
@browser = Watir::Browser.new :chrome, switches: %w( --user-data-dir=/home/zxr/.config/google-chrome)
|
47
|
+
@browser.goto url
|
48
|
+
@browser
|
49
|
+
end
|
50
|
+
|
51
|
+
def close_all_chromes
|
52
|
+
chromes = `xdotool search 'google-chrome'`
|
53
|
+
chromes = chromes.split(/\s+/)
|
54
|
+
chromes.each do |chrome|
|
55
|
+
`xdotool windowkill #{chrome}`
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module Spider
|
2
|
+
class GoogleDict < Common
|
3
|
+
def get_cards(keywords)
|
4
|
+
Headless.new.start
|
5
|
+
keywords.each do |keyword|
|
6
|
+
unless Card.find_by_word(keyword)
|
7
|
+
get_card keyword
|
8
|
+
save_record(Card, word: @card[:Word],
|
9
|
+
voice: @card[:Voice],
|
10
|
+
verb: @card[:Verb],
|
11
|
+
adj: @card[:Adjective],
|
12
|
+
noun: @card[:Noun],
|
13
|
+
pronoun: @card[:Pronoun],
|
14
|
+
synonyms: @card[:Synonyms],
|
15
|
+
abbr: @card[:Abbreviation],
|
16
|
+
prep: @card[:Preposition],
|
17
|
+
conj: @card[:Conjunction]
|
18
|
+
)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_keywords(path)
|
24
|
+
f = File.new(path)
|
25
|
+
dict = f.read.split(/\W/)
|
26
|
+
dict.delete("")
|
27
|
+
dict.uniq!
|
28
|
+
dict
|
29
|
+
end
|
30
|
+
|
31
|
+
class << self
|
32
|
+
def alias_methods(*args)
|
33
|
+
args.each do |arg|
|
34
|
+
alias_method arg, args.last
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_card keyword
|
40
|
+
@card = Hash.new
|
41
|
+
@b = open_browser "https://www.google.com.hk/search?newwindow=1&safe=strict&q=#{keyword}+define&oq=#{keyword}+define"
|
42
|
+
|
43
|
+
doc = Nokogiri::HTML.parse @b.html
|
44
|
+
|
45
|
+
GoogleDict.alias_methods :card, :voice, :word, :get_content
|
46
|
+
card(doc, 'li.dct') do |c|
|
47
|
+
@card[:Word] = keyword.downcase
|
48
|
+
@card[:Voice] = voice(c, 'h3+.vk_sh')
|
49
|
+
|
50
|
+
# Get word explainations
|
51
|
+
get_explain(c)
|
52
|
+
end
|
53
|
+
|
54
|
+
@b.close
|
55
|
+
@card.delete(0)
|
56
|
+
@card
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_explain(c)
|
60
|
+
type_nodes = c.css('div.vk_gy.vk_sh').to_a
|
61
|
+
content_nodes = c.css('div.vk_gy.vk_sh+div').to_a
|
62
|
+
type_nodes.each_with_index do |t, i|
|
63
|
+
table = content_nodes[i]
|
64
|
+
if table.css('li').count >= 2
|
65
|
+
fin_content = Array.new
|
66
|
+
table.css('li').each do |l|
|
67
|
+
fin_content << l.content
|
68
|
+
end
|
69
|
+
else
|
70
|
+
fin_content = table.content
|
71
|
+
end
|
72
|
+
@card[t.text.to_sym] = fin_content
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
#def login(username, password)
|
77
|
+
# element?('a.gbgt#gb_70') { |e| e.click }
|
78
|
+
# @b.text_field(name: 'Email').set username
|
79
|
+
# @b.text_field(name: 'Passwd').set password
|
80
|
+
# element?('input#signIn') { |e| e.click }
|
81
|
+
#end
|
82
|
+
|
83
|
+
def element?(selector, &block)
|
84
|
+
e = @b.element(css: selector)
|
85
|
+
if yield e
|
86
|
+
else
|
87
|
+
'element is nil'
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Lib
|
2
|
+
module Hl
|
3
|
+
class << self
|
4
|
+
def run
|
5
|
+
h = Headless.new
|
6
|
+
h.start
|
7
|
+
b = Watir::Browser.new :chrome, switches: %w[--proxy-server=socks5://127.0.0.1:7070]
|
8
|
+
b.goto 'https://www.google.com.hk/search?q=google+define&oq=google+define'
|
9
|
+
p b.title
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Spider
|
2
|
+
class JiYing < Common
|
3
|
+
attr_accessor :ani
|
4
|
+
|
5
|
+
def initialize browser
|
6
|
+
@ani = Hash.new
|
7
|
+
@b = browser
|
8
|
+
end
|
9
|
+
|
10
|
+
def full_site
|
11
|
+
fp = final_page
|
12
|
+
begin
|
13
|
+
(1..fp).each do |page_num|
|
14
|
+
full_page page_num
|
15
|
+
end
|
16
|
+
rescue Exception
|
17
|
+
raise %Q(page isn't not exist)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def final_page
|
22
|
+
@b.element(css: '.title h2 a').text[/\(.+\)/].gsub!(/\(|\)/, '').to_i/100 + 1
|
23
|
+
end
|
24
|
+
|
25
|
+
def full_page page_num
|
26
|
+
@b.goto "http://bt.ktxp.com/search.php?keyword=%E8%AF%B8%E7%A5%9E&sort_id=28&field=title&order=&page=#{page_num}"
|
27
|
+
html = Nokogiri::HTML.parse @b.html
|
28
|
+
html.css('ltext').each do |td|
|
29
|
+
single(td)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def single element
|
34
|
+
single_filter
|
35
|
+
save Ani
|
36
|
+
end
|
37
|
+
|
38
|
+
def get(element)
|
39
|
+
element.css('a.quick-down').each do |a|
|
40
|
+
@ani[:torrent] = a['href']
|
41
|
+
end
|
42
|
+
element.css('a.quick-down+a').each do |a|
|
43
|
+
@ani[:title] = a.content
|
44
|
+
end
|
45
|
+
@ani
|
46
|
+
end
|
47
|
+
|
48
|
+
def save model_name
|
49
|
+
model_name.create(title: @ani[:title], torrent: @ani[:torrent])
|
50
|
+
end
|
51
|
+
|
52
|
+
def single_filter
|
53
|
+
unless @ani[:title][/外挂/]
|
54
|
+
@ani = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
@ani[:torrent].prepend('http://bt.ktxp.com/')
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module Spider
|
2
|
+
class SampleData < Common
|
3
|
+
def generate
|
4
|
+
100.times do |n|
|
5
|
+
Novel.create(title: "やめて#{n}", content: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
|
6
|
+
Card.create(word: "やめて#{n}", voice: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module GoogleDrive
|
2
|
+
class SpreadSheet
|
3
|
+
def transfer
|
4
|
+
proxy = Net::HTTP.Proxy('127.0.0.1', 8087)
|
5
|
+
session = GoogleDrive.login("zhuxingruotest@gmail.com", "zhuxingruo", proxy)
|
6
|
+
|
7
|
+
ws = session.spreadsheet_by_key("0AiMMAt6U-_eEdFdNbXBfUjRMTlpsdV83OE9UWTRzTUE").worksheets[0]
|
8
|
+
#ws[2, 1] = "BakaBaka"
|
9
|
+
#ws[2, 2] = "Fuck you"
|
10
|
+
Card.columns.each_with_index do |c, i|
|
11
|
+
i = i + 1
|
12
|
+
ws[1, i] = c.name
|
13
|
+
end
|
14
|
+
|
15
|
+
Card.all.each_with_index do |card, i|
|
16
|
+
p "card.id:#{card.id}, i: #{i}"
|
17
|
+
|
18
|
+
if card.id >= 869
|
19
|
+
cf = card.attributes.each_with_index do |attr, j|
|
20
|
+
j = j + 1
|
21
|
+
#attr[0] is attr name, attr[1] is attr value
|
22
|
+
ws[i, j] = attr[1]
|
23
|
+
end
|
24
|
+
|
25
|
+
ws.save()
|
26
|
+
ws.reload()
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
== README
|
2
|
+
|
3
|
+
This README would normally document whatever steps are necessary to get the
|
4
|
+
application up and running.
|
5
|
+
|
6
|
+
Things you may want to cover:
|
7
|
+
|
8
|
+
* Ruby version
|
9
|
+
|
10
|
+
* System dependencies
|
11
|
+
|
12
|
+
* Configuration
|
13
|
+
|
14
|
+
* Database creation
|
15
|
+
|
16
|
+
* Database initialization
|
17
|
+
|
18
|
+
* How to run the test suite
|
19
|
+
|
20
|
+
* Services (job queues, cache servers, search engines, etc.)
|
21
|
+
|
22
|
+
* Deployment instructions
|
23
|
+
|
24
|
+
* ...
|
25
|
+
|
26
|
+
|
27
|
+
Please feel free to use a different markup language if you do not plan to run
|
28
|
+
<tt>rake doc:app</tt>.
|