spider_rails 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +3 -0
  4. data/Rakefile +32 -0
  5. data/lib/spider_rails.rb +4 -0
  6. data/lib/spider_rails/bilibili.rb +68 -0
  7. data/lib/spider_rails/common.rb +62 -0
  8. data/lib/spider_rails/google_dict.rb +91 -0
  9. data/lib/spider_rails/hl.rb +13 -0
  10. data/lib/spider_rails/ji_ying.rb +61 -0
  11. data/lib/spider_rails/sample_data.rb +10 -0
  12. data/lib/spider_rails/spread_sheet.rb +32 -0
  13. data/lib/spider_rails/version.rb +3 -0
  14. data/test/dummy/README.rdoc +28 -0
  15. data/test/dummy/Rakefile +6 -0
  16. data/test/dummy/app/assets/javascripts/anis.js +2 -0
  17. data/test/dummy/app/assets/javascripts/application.js +13 -0
  18. data/test/dummy/app/assets/stylesheets/anis.css +4 -0
  19. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  20. data/test/dummy/app/assets/stylesheets/scaffold.css +56 -0
  21. data/test/dummy/app/controllers/anis_controller.rb +58 -0
  22. data/test/dummy/app/controllers/application_controller.rb +5 -0
  23. data/test/dummy/app/helpers/anis_helper.rb +2 -0
  24. data/test/dummy/app/helpers/application_helper.rb +2 -0
  25. data/test/dummy/app/models/ani.rb +2 -0
  26. data/test/dummy/app/views/anis/_form.html.erb +25 -0
  27. data/test/dummy/app/views/anis/edit.html.erb +6 -0
  28. data/test/dummy/app/views/anis/index.html.erb +29 -0
  29. data/test/dummy/app/views/anis/new.html.erb +5 -0
  30. data/test/dummy/app/views/anis/show.html.erb +14 -0
  31. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  32. data/test/dummy/bin/bundle +3 -0
  33. data/test/dummy/bin/rails +4 -0
  34. data/test/dummy/bin/rake +4 -0
  35. data/test/dummy/config.ru +4 -0
  36. data/test/dummy/config/application.rb +23 -0
  37. data/test/dummy/config/boot.rb +5 -0
  38. data/test/dummy/config/database.yml +25 -0
  39. data/test/dummy/config/environment.rb +5 -0
  40. data/test/dummy/config/environments/development.rb +29 -0
  41. data/test/dummy/config/environments/production.rb +80 -0
  42. data/test/dummy/config/environments/test.rb +36 -0
  43. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  44. data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  45. data/test/dummy/config/initializers/inflections.rb +16 -0
  46. data/test/dummy/config/initializers/mime_types.rb +5 -0
  47. data/test/dummy/config/initializers/secret_token.rb +12 -0
  48. data/test/dummy/config/initializers/session_store.rb +3 -0
  49. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  50. data/test/dummy/config/locales/en.yml +23 -0
  51. data/test/dummy/config/routes.rb +56 -0
  52. data/test/dummy/db/development.sqlite3 +0 -0
  53. data/test/dummy/db/migrate/20130714091905_create_anis.rb +10 -0
  54. data/test/dummy/db/schema.rb +23 -0
  55. data/test/dummy/db/test.sqlite3 +0 -0
  56. data/test/dummy/log/development.log +21 -0
  57. data/test/dummy/log/test.log +160 -0
  58. data/test/dummy/public/404.html +58 -0
  59. data/test/dummy/public/422.html +58 -0
  60. data/test/dummy/public/500.html +57 -0
  61. data/test/dummy/public/favicon.ico +0 -0
  62. data/test/dummy/tmp/pids/server.pid +1 -0
  63. data/test/ji_ying_test.rb +58 -0
  64. data/test/libpeerconnection.log +0 -0
  65. data/test/spider_rails_test.rb +7 -0
  66. data/test/test_helper.rb +18 -0
  67. metadata +189 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 42525540b17a7f35194db3126b79a8c0183e9090
4
+ data.tar.gz: a9d09421f94b4555b97dd098093e211d9c33c7b7
5
+ SHA512:
6
+ metadata.gz: 62779738390335964ea5b8f596795b69820619e5cc60b800e604fe19292ac8099a42804e2207903f931afcb7d2b2a27439c8a8811aec4b39121334c820232633
7
+ data.tar.gz: d5e18b777d1a44726c4093b8b50cd83b9ef6d94fd606581ac302dfd110f5eec692c802bb15c01cda764cf80ce202230c5a426006647add04df31a18a7b8a9606
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2013 YOURNAME
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,3 @@
1
+ = SpiderRails
2
+
3
+ This project rocks and uses MIT-LICENSE.
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'SpiderRails'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+
18
+
19
+
20
+ Bundler::GemHelper.install_tasks
21
+
22
+ require 'rake/testtask'
23
+
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.libs << 'test'
27
+ t.pattern = 'test/**/*_test.rb'
28
+ t.verbose = false
29
+ end
30
+
31
+
32
+ task default: :test
@@ -0,0 +1,4 @@
1
+ # -*- encoding : utf-8 -*-
2
+ current_file_name = __FILE__.split('/').last.gsub('.rb', '')
3
+ Dir[File.expand_path("../#{current_file_name}/*.rb", __FILE__)].each { |file| require file }
4
+
@@ -0,0 +1,68 @@
1
+ module Spider
2
+ class BiliBili < Common
3
+
4
+ def get_res
5
+ animations = Hash.new
6
+ #browser for get page number
7
+ @b1 = goto("http://www.bilibili.tv/video/part-twoelement-1.html")
8
+ @b1.link(class: 'endPage').click
9
+ page_num = @b1.url[/\b\-\d+/].sub(/\-/, '')
10
+ @b1.close
11
+
12
+ #browser for get resources
13
+ browser2 = Watir::Browser.new :chrome
14
+ for i in 1..page_num.to_i
15
+ #for i in 1..1
16
+ browser2.goto "http://www.bilibili.tv/video/part-twoelement-#{i}.html"
17
+ html = Nokogiri::HTML.parse(browser2.html)
18
+
19
+ get_content(html, 'li.l2') do |li|
20
+ get_content(li, 'a.title') { |name| @name = name.content }
21
+
22
+ get_content(li, 'a.preview') { |a| @res = a['href'] }
23
+ get_content(li, 'a.preview img') { |img| @preview = img['src'] }
24
+
25
+ save_raw_data @name, @res, @preview
26
+ end
27
+ end
28
+
29
+ browser2.close
30
+ animations
31
+ end
32
+
33
+ alias_method :get_bilibili_res, :get_res
34
+
35
+
36
+ def goto url
37
+ browser1 = Watir::Browser.new :chrome
38
+ browser1.goto url
39
+ browser1
40
+ end
41
+
42
+ def save_raw_data(name, res, preview)
43
+ #anis.each do |preview, name|
44
+ # unless AniRaw.where(preview: preview).exists?
45
+ # AniRaw.create!(preview: preview, name: name)
46
+ # end
47
+ #end
48
+
49
+ save_record(AniRaw, name: name, preview: preview, res: res)
50
+ end
51
+
52
+ #deprecated
53
+ #def save_record(model, *args)
54
+ # if model.find_by_name(name) && !name.nil?
55
+ # id = model.find_by_name(name).id
56
+ # model.update(id, name: name, res: res, preview: preview)
57
+ # else
58
+ # model.create(name: name, res: res, preview: preview)
59
+ # end
60
+ #end
61
+
62
+ def save_handled_data
63
+ AniRaw.all.each do |record|
64
+ save_record AniFin, name: record.name, preview: record.preview, res: record.res
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,62 @@
1
+ module Spider
2
+ class Common
3
+ def full_site
4
+
5
+ end
6
+
7
+ def full_site_filter
8
+
9
+ end
10
+
11
+ def full_page
12
+ close_all_chromes
13
+ end
14
+
15
+ def get element
16
+
17
+ end
18
+
19
+ def single(element)
20
+
21
+ end
22
+
23
+ def single_filter
24
+ end
25
+
26
+ def get_content(element, selector, &block)
27
+ begin
28
+ if block_given?
29
+ element.css(selector).each &block
30
+ else
31
+ element.css(selector).each do |e|
32
+ if e.content != 0
33
+ #Just get first element
34
+ return e.content
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ end
42
+
43
+ class Browser
44
+ class << self
45
+ def open_browser url
46
+ @browser = Watir::Browser.new :chrome, switches: %w( --user-data-dir=/home/zxr/.config/google-chrome)
47
+ @browser.goto url
48
+ @browser
49
+ end
50
+
51
+ def close_all_chromes
52
+ chromes = `xdotool search 'google-chrome'`
53
+ chromes = chromes.split(/\s+/)
54
+ chromes.each do |chrome|
55
+ `xdotool windowkill #{chrome}`
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+
@@ -0,0 +1,91 @@
1
+ module Spider
2
+ class GoogleDict < Common
3
+ def get_cards(keywords)
4
+ Headless.new.start
5
+ keywords.each do |keyword|
6
+ unless Card.find_by_word(keyword)
7
+ get_card keyword
8
+ save_record(Card, word: @card[:Word],
9
+ voice: @card[:Voice],
10
+ verb: @card[:Verb],
11
+ adj: @card[:Adjective],
12
+ noun: @card[:Noun],
13
+ pronoun: @card[:Pronoun],
14
+ synonyms: @card[:Synonyms],
15
+ abbr: @card[:Abbreviation],
16
+ prep: @card[:Preposition],
17
+ conj: @card[:Conjunction]
18
+ )
19
+ end
20
+ end
21
+ end
22
+
23
+ def get_keywords(path)
24
+ f = File.new(path)
25
+ dict = f.read.split(/\W/)
26
+ dict.delete("")
27
+ dict.uniq!
28
+ dict
29
+ end
30
+
31
+ class << self
32
+ def alias_methods(*args)
33
+ args.each do |arg|
34
+ alias_method arg, args.last
35
+ end
36
+ end
37
+ end
38
+
39
+ def get_card keyword
40
+ @card = Hash.new
41
+ @b = open_browser "https://www.google.com.hk/search?newwindow=1&safe=strict&q=#{keyword}+define&oq=#{keyword}+define"
42
+
43
+ doc = Nokogiri::HTML.parse @b.html
44
+
45
+ GoogleDict.alias_methods :card, :voice, :word, :get_content
46
+ card(doc, 'li.dct') do |c|
47
+ @card[:Word] = keyword.downcase
48
+ @card[:Voice] = voice(c, 'h3+.vk_sh')
49
+
50
+ # Get word explainations
51
+ get_explain(c)
52
+ end
53
+
54
+ @b.close
55
+ @card.delete(0)
56
+ @card
57
+ end
58
+
59
+ def get_explain(c)
60
+ type_nodes = c.css('div.vk_gy.vk_sh').to_a
61
+ content_nodes = c.css('div.vk_gy.vk_sh+div').to_a
62
+ type_nodes.each_with_index do |t, i|
63
+ table = content_nodes[i]
64
+ if table.css('li').count >= 2
65
+ fin_content = Array.new
66
+ table.css('li').each do |l|
67
+ fin_content << l.content
68
+ end
69
+ else
70
+ fin_content = table.content
71
+ end
72
+ @card[t.text.to_sym] = fin_content
73
+ end
74
+ end
75
+
76
+ #def login(username, password)
77
+ # element?('a.gbgt#gb_70') { |e| e.click }
78
+ # @b.text_field(name: 'Email').set username
79
+ # @b.text_field(name: 'Passwd').set password
80
+ # element?('input#signIn') { |e| e.click }
81
+ #end
82
+
83
+ def element?(selector, &block)
84
+ e = @b.element(css: selector)
85
+ if yield e
86
+ else
87
+ 'element is nil'
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,13 @@
1
+ module Lib
2
+ module Hl
3
+ class << self
4
+ def run
5
+ h = Headless.new
6
+ h.start
7
+ b = Watir::Browser.new :chrome, switches: %w[--proxy-server=socks5://127.0.0.1:7070]
8
+ b.goto 'https://www.google.com.hk/search?q=google+define&oq=google+define'
9
+ p b.title
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,61 @@
1
+ module Spider
2
+ class JiYing < Common
3
+ attr_accessor :ani
4
+
5
+ def initialize browser
6
+ @ani = Hash.new
7
+ @b = browser
8
+ end
9
+
10
+ def full_site
11
+ fp = final_page
12
+ begin
13
+ (1..fp).each do |page_num|
14
+ full_page page_num
15
+ end
16
+ rescue Exception
17
+ raise %Q(page isn't not exist)
18
+ end
19
+ end
20
+
21
+ def final_page
22
+ @b.element(css: '.title h2 a').text[/\(.+\)/].gsub!(/\(|\)/, '').to_i/100 + 1
23
+ end
24
+
25
+ def full_page page_num
26
+ @b.goto "http://bt.ktxp.com/search.php?keyword=%E8%AF%B8%E7%A5%9E&sort_id=28&field=title&order=&page=#{page_num}"
27
+ html = Nokogiri::HTML.parse @b.html
28
+ html.css('ltext').each do |td|
29
+ single(td)
30
+ end
31
+ end
32
+
33
+ def single element
34
+ single_filter
35
+ save Ani
36
+ end
37
+
38
+ def get(element)
39
+ element.css('a.quick-down').each do |a|
40
+ @ani[:torrent] = a['href']
41
+ end
42
+ element.css('a.quick-down+a').each do |a|
43
+ @ani[:title] = a.content
44
+ end
45
+ @ani
46
+ end
47
+
48
+ def save model_name
49
+ model_name.create(title: @ani[:title], torrent: @ani[:torrent])
50
+ end
51
+
52
+ def single_filter
53
+ unless @ani[:title][/外挂/]
54
+ @ani = nil
55
+ end
56
+
57
+ @ani[:torrent].prepend('http://bt.ktxp.com/')
58
+ end
59
+ end
60
+ end
61
+
@@ -0,0 +1,10 @@
1
+ module Spider
2
+ class SampleData < Common
3
+ def generate
4
+ 100.times do |n|
5
+ Novel.create(title: "やめて#{n}", content: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
6
+ Card.create(word: "やめて#{n}", voice: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,32 @@
1
+ module GoogleDrive
2
+ class SpreadSheet
3
+ def transfer
4
+ proxy = Net::HTTP.Proxy('127.0.0.1', 8087)
5
+ session = GoogleDrive.login("zhuxingruotest@gmail.com", "zhuxingruo", proxy)
6
+
7
+ ws = session.spreadsheet_by_key("0AiMMAt6U-_eEdFdNbXBfUjRMTlpsdV83OE9UWTRzTUE").worksheets[0]
8
+ #ws[2, 1] = "BakaBaka"
9
+ #ws[2, 2] = "Fuck you"
10
+ Card.columns.each_with_index do |c, i|
11
+ i = i + 1
12
+ ws[1, i] = c.name
13
+ end
14
+
15
+ Card.all.each_with_index do |card, i|
16
+ p "card.id:#{card.id}, i: #{i}"
17
+
18
+ if card.id >= 869
19
+ cf = card.attributes.each_with_index do |attr, j|
20
+ j = j + 1
21
+ #attr[0] is attr name, attr[1] is attr value
22
+ ws[i, j] = attr[1]
23
+ end
24
+
25
+ ws.save()
26
+ ws.reload()
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+
@@ -0,0 +1,3 @@
1
+ module SpiderRails
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.