spider_rails 4.0.2 → 4.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e182b6084575d3b2ac7e4cc9dcaf9c07b7018fc
4
- data.tar.gz: 060df4f018e1bf486c0ac03d923fde72f5cd3f96
3
+ metadata.gz: e9ae14d4aa7c198c8e42c9075b9cf76457c1d6a9
4
+ data.tar.gz: 1177b0a8dfe1c3715050a9bd5dcb25f14daaf32d
5
5
  SHA512:
6
- metadata.gz: 386ffce6067639a68a264d4fe73d9d2eb398874d4c68f7290fbbf1bfd4163206e3c1e9754f7891b43b390dea38fefcd0674cbac92b7c3fb6bae55ea3fc94544e
7
- data.tar.gz: 463612cd49d92066f7c349f4188a9cff34e3e31a69ebeb9ce1ce874e006e1aae8586b3d41a21a323138bcee6fa69a7c706d08105d5762c21a80a86bca1653436
6
+ metadata.gz: 3b1573e5ebed6b5bce30e04f715a0058d59d3eeaf8fad387a93fe8fecd71334a471cd946aee1d7f150f865ad8bc8c6ace4672b9baa2ceaaeff302eb5b2b44778
7
+ data.tar.gz: 29b0d0bcccd465fc61f11e704e655e505d024d2ac9646d415398cfea67c93a665ff33af37ba6b2f73133b9a1830346d1013a21e7d4ea99f3c862b9f13e61a321
data/Rakefile CHANGED
@@ -19,3 +19,5 @@ Bundler::GemHelper.install_tasks
19
19
  require 'rspec/core/rake_task'
20
20
 
21
21
  RSpec::Core::RakeTask.new(:spec)
22
+
23
+ task default: :spec
@@ -1,28 +1,5 @@
1
- module Spider
1
+ module Common
2
2
  class Common
3
- def full_site
4
-
5
- end
6
-
7
- def full_site_filter
8
-
9
- end
10
-
11
- def full_page
12
- close_all_chromes
13
- end
14
-
15
- def get element
16
-
17
- end
18
-
19
- def single(element)
20
-
21
- end
22
-
23
- def single_filter
24
- end
25
-
26
3
  def get_content(element, selector, &block)
27
4
  begin
28
5
  if block_given?
@@ -37,11 +14,10 @@ module Spider
37
14
  end
38
15
  end
39
16
  end
40
-
41
17
  end
42
18
 
43
19
  class << self
44
- def open_browser driver, url
20
+ def start driver, url
45
21
  #@browser = Watir::Browser.new :chrome, switches: %w( --user-data-dir=/home/zxr/.config/google-chrome)
46
22
  @browser = Watir::Browser.new driver
47
23
  @browser.goto url
@@ -0,0 +1,32 @@
1
+ require 'watir-webdriver'
2
+ require 'nokogiri'
3
+ module DSL
4
+ # Add a visit method to ::Watir::Browser
5
+ class Browser < ::Watir::Browser
6
+ def visit(relative_url = nil, base_url = 'http://localhost:3000/')
7
+ goto("#{base_url}#{relative_url}")
8
+ end
9
+
10
+ def initialize(browser = :phantomjs, *args)
11
+ super
12
+ end
13
+
14
+ def dsl_enable
15
+ @doc = Nokogiri::HTML.parse(self.html)
16
+ ::String.class_variable_set(:@@doc, @doc)
17
+ eval <<-RUBY
18
+ class ::String
19
+ def ctn
20
+ if block_given?
21
+ @@doc.css(self) &block
22
+ else
23
+ @@doc.css(self).each do |e|
24
+ return e.text
25
+ end
26
+ end
27
+ end
28
+ end
29
+ RUBY
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,5 @@
1
+ require File.expand_path('../dsl', __FILE__)
2
+
3
+ RSpec.configure do |config|
4
+
5
+ end
@@ -1,5 +1,5 @@
1
- module Spider
2
- class BiliBili < Common
1
+ module Specific
2
+ class BiliBili
3
3
 
4
4
  def get_res
5
5
  animations = Hash.new
@@ -0,0 +1,91 @@
1
+ module Specific
2
+ class GoogleDict
3
+ def get_cards(keywords)
4
+ Headless.new.start
5
+ keywords.each do |keyword|
6
+ unless Card.find_by_word(keyword)
7
+ get_card keyword
8
+ save_record(Card, word: @card[:Word],
9
+ voice: @card[:Voice],
10
+ verb: @card[:Verb],
11
+ adj: @card[:Adjective],
12
+ noun: @card[:Noun],
13
+ pronoun: @card[:Pronoun],
14
+ synonyms: @card[:Synonyms],
15
+ abbr: @card[:Abbreviation],
16
+ prep: @card[:Preposition],
17
+ conj: @card[:Conjunction]
18
+ )
19
+ end
20
+ end
21
+ end
22
+
23
+ def get_keywords(path)
24
+ f = File.new(path)
25
+ dict = f.read.split(/\W/)
26
+ dict.delete("")
27
+ dict.uniq!
28
+ dict
29
+ end
30
+
31
+ class << self
32
+ def alias_methods(*args)
33
+ args.each do |arg|
34
+ alias_method arg, args.last
35
+ end
36
+ end
37
+ end
38
+
39
+ def get_card keyword
40
+ @card = Hash.new
41
+ @page = start "https://www.google.com.hk/search?newwindow=1&safe=strict&q=#{keyword}+define&oq=#{keyword}+define"
42
+
43
+ doc = Nokogiri::HTML.parse @page.html
44
+
45
+ GoogleDict.alias_methods :card, :voice, :word, :get_content
46
+ card(doc, 'li.dct') do |c|
47
+ @card[:Word] = keyword.downcase
48
+ @card[:Voice] = voice(c, 'h3+.vk_sh')
49
+
50
+ # Get word explainations
51
+ get_explain(c)
52
+ end
53
+
54
+ @page.close
55
+ @card.delete(0)
56
+ @card
57
+ end
58
+
59
+ def get_explain(c)
60
+ type_nodes = c.css('div.vk_gy.vk_sh').to_a
61
+ content_nodes = c.css('div.vk_gy.vk_sh+div').to_a
62
+ type_nodes.each_with_index do |t, i|
63
+ table = content_nodes[i]
64
+ if table.css('li').count >= 2
65
+ fin_content = Array.new
66
+ table.css('li').each do |l|
67
+ fin_content << l.content
68
+ end
69
+ else
70
+ fin_content = table.content
71
+ end
72
+ @card[t.text.to_sym] = fin_content
73
+ end
74
+ end
75
+
76
+ #def login(username, password)
77
+ # element?('a.gbgt#gb_70') { |e| e.click }
78
+ # @b.text_field(name: 'Email').set username
79
+ # @b.text_field(name: 'Passwd').set password
80
+ # element?('input#signIn') { |e| e.click }
81
+ #end
82
+
83
+ def element?(selector, &block)
84
+ e = @page.element(css: selector)
85
+ if yield e
86
+ else
87
+ 'element is nil'
88
+ end
89
+ end
90
+ end
91
+ end
@@ -1,5 +1,10 @@
1
- module Spider
2
- class JiYing < Common
1
+ module Specific
2
+ # Download JiYing resources
3
+ # example:
4
+ # @page = ::Spider.open_browser(:phantomjs, 'http://bt.ktxp.com/sort-50-1.html')
5
+ # jy = ::Spider::JiYing.new(@page)
6
+ # jy.full_site
7
+ class JiYing
3
8
  attr_accessor :ani, :anis, :page
4
9
 
5
10
  def initialize page
@@ -21,19 +26,23 @@ module Spider
21
26
  (1..fp).each do |page_num|
22
27
  full_page page_num
23
28
  end
29
+ p_anis
24
30
  rescue Exception
25
31
  raise %Q(page isn't not exist)
26
32
  end
27
33
  end
28
34
 
29
35
  def multi_pages final_page_num
30
- begin
31
- (1..final_page_num).each do |page_num|
32
- full_page page_num
33
- end
36
+ (1..final_page_num).each do |page_num|
37
+ full_page page_num
38
+ p_anis
34
39
  end
35
40
  end
36
41
 
42
+ def p_anis
43
+ p "@anis is #{@anis}"
44
+ end
45
+
37
46
  def final_page
38
47
  if @mode == 'search'
39
48
  fp = @page.element(css: '.title h2 a').text[/\(.+\)/].gsub!(/\(|\)/, '').to_i/100 + 1
@@ -51,7 +60,7 @@ module Spider
51
60
  when 'normal'
52
61
  @page.goto "#{@base_url}#{page_num}.html"
53
62
  end
54
- html = Nokogiri::HTML.parse @page.html
63
+ html = ::Nokogiri::HTML.parse @page.html
55
64
 
56
65
  html.css('.ltext').each do |td|
57
66
  single(td)
@@ -70,11 +79,13 @@ module Spider
70
79
  @ani[:title],
71
80
  @ani[:size],
72
81
  @ani[:finish] = get_content(element, 'a.quick-down+a', 'td.ltext+td', 'td.ltext+td+td+td+td')
73
- @anis << @ani
82
+ p "Get Animation: #{@ani[:title]}"
83
+
84
+ @anis << @ani.dup
74
85
  end
75
86
 
76
- def count
77
- self.anis.count
87
+ def ani_count
88
+ @anis.uniq.count if @anis
78
89
  end
79
90
 
80
91
  def get_content(element, *selectors)
@@ -1,3 +1,3 @@
1
1
  module SpiderRails
2
- VERSION = "4.0.2"
2
+ VERSION = "4.0.3"
3
3
  end
data/lib/spider_rails.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  current_file_name = __FILE__.split('/').last.gsub('.rb', '')
3
- Dir[File.expand_path("../#{current_file_name}/*.rb", __FILE__)].each { |file| require file }
3
+ Dir[File.expand_path("../#{current_file_name}/**/*.rb", __FILE__)].each { |file| require file }
4
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spider_rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.2
4
+ version: 4.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - zhuxingruo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-20 00:00:00.000000000 Z
11
+ date: 2013-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -38,6 +38,104 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: watir
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: headless
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: guard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: guard-rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: spork
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
41
139
  description: nil
42
140
  email:
43
141
  - zhuxingruo3@gmail.com
@@ -45,14 +143,14 @@ executables: []
45
143
  extensions: []
46
144
  extra_rdoc_files: []
47
145
  files:
48
- - lib/spider_rails/common.rb
49
146
  - lib/spider_rails/version.rb
50
- - lib/spider_rails/bilibili.rb
51
- - lib/spider_rails/sample_data.rb
52
- - lib/spider_rails/spread_sheet.rb
53
- - lib/spider_rails/google_dict.rb
54
- - lib/spider_rails/ji_ying.rb
55
- - lib/spider_rails/hl.rb
147
+ - lib/spider_rails/specific/bilibili.rb
148
+ - lib/spider_rails/specific/spread_sheet.rb
149
+ - lib/spider_rails/specific/google_dict.rb
150
+ - lib/spider_rails/specific/ji_ying.rb
151
+ - lib/spider_rails/common/common.rb
152
+ - lib/spider_rails/common/rspec.rb
153
+ - lib/spider_rails/common/dsl.rb
56
154
  - lib/spider_rails.rb
57
155
  - MIT-LICENSE
58
156
  - Rakefile
@@ -1,91 +0,0 @@
1
- module Spider
2
- class GoogleDict < Common
3
- def get_cards(keywords)
4
- Headless.new.start
5
- keywords.each do |keyword|
6
- unless Card.find_by_word(keyword)
7
- get_card keyword
8
- save_record(Card, word: @card[:Word],
9
- voice: @card[:Voice],
10
- verb: @card[:Verb],
11
- adj: @card[:Adjective],
12
- noun: @card[:Noun],
13
- pronoun: @card[:Pronoun],
14
- synonyms: @card[:Synonyms],
15
- abbr: @card[:Abbreviation],
16
- prep: @card[:Preposition],
17
- conj: @card[:Conjunction]
18
- )
19
- end
20
- end
21
- end
22
-
23
- def get_keywords(path)
24
- f = File.new(path)
25
- dict = f.read.split(/\W/)
26
- dict.delete("")
27
- dict.uniq!
28
- dict
29
- end
30
-
31
- class << self
32
- def alias_methods(*args)
33
- args.each do |arg|
34
- alias_method arg, args.last
35
- end
36
- end
37
- end
38
-
39
- def get_card keyword
40
- @card = Hash.new
41
- @page = open_browser "https://www.google.com.hk/search?newwindow=1&safe=strict&q=#{keyword}+define&oq=#{keyword}+define"
42
-
43
- doc = Nokogiri::HTML.parse @page.html
44
-
45
- GoogleDict.alias_methods :card, :voice, :word, :get_content
46
- card(doc, 'li.dct') do |c|
47
- @card[:Word] = keyword.downcase
48
- @card[:Voice] = voice(c, 'h3+.vk_sh')
49
-
50
- # Get word explainations
51
- get_explain(c)
52
- end
53
-
54
- @page.close
55
- @card.delete(0)
56
- @card
57
- end
58
-
59
- def get_explain(c)
60
- type_nodes = c.css('div.vk_gy.vk_sh').to_a
61
- content_nodes = c.css('div.vk_gy.vk_sh+div').to_a
62
- type_nodes.each_with_index do |t, i|
63
- table = content_nodes[i]
64
- if table.css('li').count >= 2
65
- fin_content = Array.new
66
- table.css('li').each do |l|
67
- fin_content << l.content
68
- end
69
- else
70
- fin_content = table.content
71
- end
72
- @card[t.text.to_sym] = fin_content
73
- end
74
- end
75
-
76
- #def login(username, password)
77
- # element?('a.gbgt#gb_70') { |e| e.click }
78
- # @b.text_field(name: 'Email').set username
79
- # @b.text_field(name: 'Passwd').set password
80
- # element?('input#signIn') { |e| e.click }
81
- #end
82
-
83
- def element?(selector, &block)
84
- e = @page.element(css: selector)
85
- if yield e
86
- else
87
- 'element is nil'
88
- end
89
- end
90
- end
91
- end
@@ -1,13 +0,0 @@
1
- module Lib
2
- module Hl
3
- class << self
4
- def run
5
- h = Headless.new
6
- h.start
7
- b = Watir::Browser.new :chrome, switches: %w[--proxy-server=socks5://127.0.0.1:7070]
8
- b.goto 'https://www.google.com.hk/search?q=google+define&oq=google+define'
9
- p b.title
10
- end
11
- end
12
- end
13
- end
@@ -1,10 +0,0 @@
1
- module Spider
2
- class SampleData < Common
3
- def generate
4
- 100.times do |n|
5
- Novel.create(title: "やめて#{n}", content: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
6
- Card.create(word: "やめて#{n}", voice: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
7
- end
8
- end
9
- end
10
- end