spider_rails 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +3 -0
  4. data/Rakefile +32 -0
  5. data/lib/spider_rails.rb +4 -0
  6. data/lib/spider_rails/bilibili.rb +68 -0
  7. data/lib/spider_rails/common.rb +62 -0
  8. data/lib/spider_rails/google_dict.rb +91 -0
  9. data/lib/spider_rails/hl.rb +13 -0
  10. data/lib/spider_rails/ji_ying.rb +61 -0
  11. data/lib/spider_rails/sample_data.rb +10 -0
  12. data/lib/spider_rails/spread_sheet.rb +32 -0
  13. data/lib/spider_rails/version.rb +3 -0
  14. data/test/dummy/README.rdoc +28 -0
  15. data/test/dummy/Rakefile +6 -0
  16. data/test/dummy/app/assets/javascripts/anis.js +2 -0
  17. data/test/dummy/app/assets/javascripts/application.js +13 -0
  18. data/test/dummy/app/assets/stylesheets/anis.css +4 -0
  19. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  20. data/test/dummy/app/assets/stylesheets/scaffold.css +56 -0
  21. data/test/dummy/app/controllers/anis_controller.rb +58 -0
  22. data/test/dummy/app/controllers/application_controller.rb +5 -0
  23. data/test/dummy/app/helpers/anis_helper.rb +2 -0
  24. data/test/dummy/app/helpers/application_helper.rb +2 -0
  25. data/test/dummy/app/models/ani.rb +2 -0
  26. data/test/dummy/app/views/anis/_form.html.erb +25 -0
  27. data/test/dummy/app/views/anis/edit.html.erb +6 -0
  28. data/test/dummy/app/views/anis/index.html.erb +29 -0
  29. data/test/dummy/app/views/anis/new.html.erb +5 -0
  30. data/test/dummy/app/views/anis/show.html.erb +14 -0
  31. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  32. data/test/dummy/bin/bundle +3 -0
  33. data/test/dummy/bin/rails +4 -0
  34. data/test/dummy/bin/rake +4 -0
  35. data/test/dummy/config.ru +4 -0
  36. data/test/dummy/config/application.rb +23 -0
  37. data/test/dummy/config/boot.rb +5 -0
  38. data/test/dummy/config/database.yml +25 -0
  39. data/test/dummy/config/environment.rb +5 -0
  40. data/test/dummy/config/environments/development.rb +29 -0
  41. data/test/dummy/config/environments/production.rb +80 -0
  42. data/test/dummy/config/environments/test.rb +36 -0
  43. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  44. data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  45. data/test/dummy/config/initializers/inflections.rb +16 -0
  46. data/test/dummy/config/initializers/mime_types.rb +5 -0
  47. data/test/dummy/config/initializers/secret_token.rb +12 -0
  48. data/test/dummy/config/initializers/session_store.rb +3 -0
  49. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  50. data/test/dummy/config/locales/en.yml +23 -0
  51. data/test/dummy/config/routes.rb +56 -0
  52. data/test/dummy/db/development.sqlite3 +0 -0
  53. data/test/dummy/db/migrate/20130714091905_create_anis.rb +10 -0
  54. data/test/dummy/db/schema.rb +23 -0
  55. data/test/dummy/db/test.sqlite3 +0 -0
  56. data/test/dummy/log/development.log +21 -0
  57. data/test/dummy/log/test.log +160 -0
  58. data/test/dummy/public/404.html +58 -0
  59. data/test/dummy/public/422.html +58 -0
  60. data/test/dummy/public/500.html +57 -0
  61. data/test/dummy/public/favicon.ico +0 -0
  62. data/test/dummy/tmp/pids/server.pid +1 -0
  63. data/test/ji_ying_test.rb +58 -0
  64. data/test/libpeerconnection.log +0 -0
  65. data/test/spider_rails_test.rb +7 -0
  66. data/test/test_helper.rb +18 -0
  67. metadata +189 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 42525540b17a7f35194db3126b79a8c0183e9090
4
+ data.tar.gz: a9d09421f94b4555b97dd098093e211d9c33c7b7
5
+ SHA512:
6
+ metadata.gz: 62779738390335964ea5b8f596795b69820619e5cc60b800e604fe19292ac8099a42804e2207903f931afcb7d2b2a27439c8a8811aec4b39121334c820232633
7
+ data.tar.gz: d5e18b777d1a44726c4093b8b50cd83b9ef6d94fd606581ac302dfd110f5eec692c802bb15c01cda764cf80ce202230c5a426006647add04df31a18a7b8a9606
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2013 YOURNAME
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,3 @@
1
+ = SpiderRails
2
+
3
+ This project rocks and uses MIT-LICENSE.
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'SpiderRails'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+
18
+
19
+
20
+ Bundler::GemHelper.install_tasks
21
+
22
+ require 'rake/testtask'
23
+
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.libs << 'test'
27
+ t.pattern = 'test/**/*_test.rb'
28
+ t.verbose = false
29
+ end
30
+
31
+
32
+ task default: :test
@@ -0,0 +1,4 @@
1
+ # -*- encoding : utf-8 -*-
2
+ current_file_name = __FILE__.split('/').last.gsub('.rb', '')
3
+ Dir[File.expand_path("../#{current_file_name}/*.rb", __FILE__)].each { |file| require file }
4
+
@@ -0,0 +1,68 @@
1
+ module Spider
2
+ class BiliBili < Common
3
+
4
+ def get_res
5
+ animations = Hash.new
6
+ #browser for get page number
7
+ @b1 = goto("http://www.bilibili.tv/video/part-twoelement-1.html")
8
+ @b1.link(class: 'endPage').click
9
+ page_num = @b1.url[/\b\-\d+/].sub(/\-/, '')
10
+ @b1.close
11
+
12
+ #browser for get resources
13
+ browser2 = Watir::Browser.new :chrome
14
+ for i in 1..page_num.to_i
15
+ #for i in 1..1
16
+ browser2.goto "http://www.bilibili.tv/video/part-twoelement-#{i}.html"
17
+ html = Nokogiri::HTML.parse(browser2.html)
18
+
19
+ get_content(html, 'li.l2') do |li|
20
+ get_content(li, 'a.title') { |name| @name = name.content }
21
+
22
+ get_content(li, 'a.preview') { |a| @res = a['href'] }
23
+ get_content(li, 'a.preview img') { |img| @preview = img['src'] }
24
+
25
+ save_raw_data @name, @res, @preview
26
+ end
27
+ end
28
+
29
+ browser2.close
30
+ animations
31
+ end
32
+
33
+ alias_method :get_bilibili_res, :get_res
34
+
35
+
36
+ def goto url
37
+ browser1 = Watir::Browser.new :chrome
38
+ browser1.goto url
39
+ browser1
40
+ end
41
+
42
+ def save_raw_data(name, res, preview)
43
+ #anis.each do |preview, name|
44
+ # unless AniRaw.where(preview: preview).exists?
45
+ # AniRaw.create!(preview: preview, name: name)
46
+ # end
47
+ #end
48
+
49
+ save_record(AniRaw, name: name, preview: preview, res: res)
50
+ end
51
+
52
+ #deprecated
53
+ #def save_record(model, *args)
54
+ # if model.find_by_name(name) && !name.nil?
55
+ # id = model.find_by_name(name).id
56
+ # model.update(id, name: name, res: res, preview: preview)
57
+ # else
58
+ # model.create(name: name, res: res, preview: preview)
59
+ # end
60
+ #end
61
+
62
+ def save_handled_data
63
+ AniRaw.all.each do |record|
64
+ save_record AniFin, name: record.name, preview: record.preview, res: record.res
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,62 @@
1
+ module Spider
2
+ class Common
3
+ def full_site
4
+
5
+ end
6
+
7
+ def full_site_filter
8
+
9
+ end
10
+
11
+ def full_page
12
+ close_all_chromes
13
+ end
14
+
15
+ def get element
16
+
17
+ end
18
+
19
+ def single(element)
20
+
21
+ end
22
+
23
+ def single_filter
24
+ end
25
+
26
+ def get_content(element, selector, &block)
27
+ begin
28
+ if block_given?
29
+ element.css(selector).each &block
30
+ else
31
+ element.css(selector).each do |e|
32
+ if e.content != 0
33
+ #Just get first element
34
+ return e.content
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ end
42
+
43
+ class Browser
44
+ class << self
45
+ def open_browser url
46
+ @browser = Watir::Browser.new :chrome, switches: %w( --user-data-dir=/home/zxr/.config/google-chrome)
47
+ @browser.goto url
48
+ @browser
49
+ end
50
+
51
+ def close_all_chromes
52
+ chromes = `xdotool search 'google-chrome'`
53
+ chromes = chromes.split(/\s+/)
54
+ chromes.each do |chrome|
55
+ `xdotool windowkill #{chrome}`
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+
@@ -0,0 +1,91 @@
1
+ module Spider
2
+ class GoogleDict < Common
3
+ def get_cards(keywords)
4
+ Headless.new.start
5
+ keywords.each do |keyword|
6
+ unless Card.find_by_word(keyword)
7
+ get_card keyword
8
+ save_record(Card, word: @card[:Word],
9
+ voice: @card[:Voice],
10
+ verb: @card[:Verb],
11
+ adj: @card[:Adjective],
12
+ noun: @card[:Noun],
13
+ pronoun: @card[:Pronoun],
14
+ synonyms: @card[:Synonyms],
15
+ abbr: @card[:Abbreviation],
16
+ prep: @card[:Preposition],
17
+ conj: @card[:Conjunction]
18
+ )
19
+ end
20
+ end
21
+ end
22
+
23
+ def get_keywords(path)
24
+ f = File.new(path)
25
+ dict = f.read.split(/\W/)
26
+ dict.delete("")
27
+ dict.uniq!
28
+ dict
29
+ end
30
+
31
+ class << self
32
+ def alias_methods(*args)
33
+ args.each do |arg|
34
+ alias_method arg, args.last
35
+ end
36
+ end
37
+ end
38
+
39
+ def get_card keyword
40
+ @card = Hash.new
41
+ @b = open_browser "https://www.google.com.hk/search?newwindow=1&safe=strict&q=#{keyword}+define&oq=#{keyword}+define"
42
+
43
+ doc = Nokogiri::HTML.parse @b.html
44
+
45
+ GoogleDict.alias_methods :card, :voice, :word, :get_content
46
+ card(doc, 'li.dct') do |c|
47
+ @card[:Word] = keyword.downcase
48
+ @card[:Voice] = voice(c, 'h3+.vk_sh')
49
+
50
+ # Get word explainations
51
+ get_explain(c)
52
+ end
53
+
54
+ @b.close
55
+ @card.delete(0)
56
+ @card
57
+ end
58
+
59
+ def get_explain(c)
60
+ type_nodes = c.css('div.vk_gy.vk_sh').to_a
61
+ content_nodes = c.css('div.vk_gy.vk_sh+div').to_a
62
+ type_nodes.each_with_index do |t, i|
63
+ table = content_nodes[i]
64
+ if table.css('li').count >= 2
65
+ fin_content = Array.new
66
+ table.css('li').each do |l|
67
+ fin_content << l.content
68
+ end
69
+ else
70
+ fin_content = table.content
71
+ end
72
+ @card[t.text.to_sym] = fin_content
73
+ end
74
+ end
75
+
76
+ #def login(username, password)
77
+ # element?('a.gbgt#gb_70') { |e| e.click }
78
+ # @b.text_field(name: 'Email').set username
79
+ # @b.text_field(name: 'Passwd').set password
80
+ # element?('input#signIn') { |e| e.click }
81
+ #end
82
+
83
+ def element?(selector, &block)
84
+ e = @b.element(css: selector)
85
+ if yield e
86
+ else
87
+ 'element is nil'
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,13 @@
1
+ module Lib
2
+ module Hl
3
+ class << self
4
+ def run
5
+ h = Headless.new
6
+ h.start
7
+ b = Watir::Browser.new :chrome, switches: %w[--proxy-server=socks5://127.0.0.1:7070]
8
+ b.goto 'https://www.google.com.hk/search?q=google+define&oq=google+define'
9
+ p b.title
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,61 @@
1
+ module Spider
2
+ class JiYing < Common
3
+ attr_accessor :ani
4
+
5
+ def initialize browser
6
+ @ani = Hash.new
7
+ @b = browser
8
+ end
9
+
10
+ def full_site
11
+ fp = final_page
12
+ begin
13
+ (1..fp).each do |page_num|
14
+ full_page page_num
15
+ end
16
+ rescue Exception
17
+ raise %Q(page isn't not exist)
18
+ end
19
+ end
20
+
21
+ def final_page
22
+ @b.element(css: '.title h2 a').text[/\(.+\)/].gsub!(/\(|\)/, '').to_i/100 + 1
23
+ end
24
+
25
+ def full_page page_num
26
+ @b.goto "http://bt.ktxp.com/search.php?keyword=%E8%AF%B8%E7%A5%9E&sort_id=28&field=title&order=&page=#{page_num}"
27
+ html = Nokogiri::HTML.parse @b.html
28
+ html.css('ltext').each do |td|
29
+ single(td)
30
+ end
31
+ end
32
+
33
+ def single element
34
+ single_filter
35
+ save Ani
36
+ end
37
+
38
+ def get(element)
39
+ element.css('a.quick-down').each do |a|
40
+ @ani[:torrent] = a['href']
41
+ end
42
+ element.css('a.quick-down+a').each do |a|
43
+ @ani[:title] = a.content
44
+ end
45
+ @ani
46
+ end
47
+
48
+ def save model_name
49
+ model_name.create(title: @ani[:title], torrent: @ani[:torrent])
50
+ end
51
+
52
+ def single_filter
53
+ unless @ani[:title][/外挂/]
54
+ @ani = nil
55
+ end
56
+
57
+ @ani[:torrent].prepend('http://bt.ktxp.com/')
58
+ end
59
+ end
60
+ end
61
+
@@ -0,0 +1,10 @@
1
+ module Spider
2
+ class SampleData < Common
3
+ def generate
4
+ 100.times do |n|
5
+ Novel.create(title: "やめて#{n}", content: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
6
+ Card.create(word: "やめて#{n}", voice: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,32 @@
1
+ module GoogleDrive
2
+ class SpreadSheet
3
+ def transfer
4
+ proxy = Net::HTTP.Proxy('127.0.0.1', 8087)
5
+ session = GoogleDrive.login("zhuxingruotest@gmail.com", "zhuxingruo", proxy)
6
+
7
+ ws = session.spreadsheet_by_key("0AiMMAt6U-_eEdFdNbXBfUjRMTlpsdV83OE9UWTRzTUE").worksheets[0]
8
+ #ws[2, 1] = "BakaBaka"
9
+ #ws[2, 2] = "Fuck you"
10
+ Card.columns.each_with_index do |c, i|
11
+ i = i + 1
12
+ ws[1, i] = c.name
13
+ end
14
+
15
+ Card.all.each_with_index do |card, i|
16
+ p "card.id:#{card.id}, i: #{i}"
17
+
18
+ if card.id >= 869
19
+ cf = card.attributes.each_with_index do |attr, j|
20
+ j = j + 1
21
+ #attr[0] is attr name, attr[1] is attr value
22
+ ws[i, j] = attr[1]
23
+ end
24
+
25
+ ws.save()
26
+ ws.reload()
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+
@@ -0,0 +1,3 @@
1
+ module SpiderRails
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.