spider_rails 4.0.2 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e182b6084575d3b2ac7e4cc9dcaf9c07b7018fc
4
- data.tar.gz: 060df4f018e1bf486c0ac03d923fde72f5cd3f96
3
+ metadata.gz: e9ae14d4aa7c198c8e42c9075b9cf76457c1d6a9
4
+ data.tar.gz: 1177b0a8dfe1c3715050a9bd5dcb25f14daaf32d
5
5
  SHA512:
6
- metadata.gz: 386ffce6067639a68a264d4fe73d9d2eb398874d4c68f7290fbbf1bfd4163206e3c1e9754f7891b43b390dea38fefcd0674cbac92b7c3fb6bae55ea3fc94544e
7
- data.tar.gz: 463612cd49d92066f7c349f4188a9cff34e3e31a69ebeb9ce1ce874e006e1aae8586b3d41a21a323138bcee6fa69a7c706d08105d5762c21a80a86bca1653436
6
+ metadata.gz: 3b1573e5ebed6b5bce30e04f715a0058d59d3eeaf8fad387a93fe8fecd71334a471cd946aee1d7f150f865ad8bc8c6ace4672b9baa2ceaaeff302eb5b2b44778
7
+ data.tar.gz: 29b0d0bcccd465fc61f11e704e655e505d024d2ac9646d415398cfea67c93a665ff33af37ba6b2f73133b9a1830346d1013a21e7d4ea99f3c862b9f13e61a321
data/Rakefile CHANGED
@@ -19,3 +19,5 @@ Bundler::GemHelper.install_tasks
19
19
  require 'rspec/core/rake_task'
20
20
 
21
21
  RSpec::Core::RakeTask.new(:spec)
22
+
23
+ task default: :spec
@@ -1,28 +1,5 @@
1
- module Spider
1
+ module Common
2
2
  class Common
3
- def full_site
4
-
5
- end
6
-
7
- def full_site_filter
8
-
9
- end
10
-
11
- def full_page
12
- close_all_chromes
13
- end
14
-
15
- def get element
16
-
17
- end
18
-
19
- def single(element)
20
-
21
- end
22
-
23
- def single_filter
24
- end
25
-
26
3
  def get_content(element, selector, &block)
27
4
  begin
28
5
  if block_given?
@@ -37,11 +14,10 @@ module Spider
37
14
  end
38
15
  end
39
16
  end
40
-
41
17
  end
42
18
 
43
19
  class << self
44
- def open_browser driver, url
20
+ def start driver, url
45
21
  #@browser = Watir::Browser.new :chrome, switches: %w( --user-data-dir=/home/zxr/.config/google-chrome)
46
22
  @browser = Watir::Browser.new driver
47
23
  @browser.goto url
@@ -0,0 +1,32 @@
1
+ require 'watir-webdriver'
2
+ require 'nokogiri'
3
+ module DSL
4
+ # Add a visit method to ::Watir::Browser
5
+ class Browser < ::Watir::Browser
6
+ def visit(relative_url = nil, base_url = 'http://localhost:3000/')
7
+ goto("#{base_url}#{relative_url}")
8
+ end
9
+
10
+ def initialize(browser = :phantomjs, *args)
11
+ super
12
+ end
13
+
14
+ def dsl_enable
15
+ @doc = Nokogiri::HTML.parse(self.html)
16
+ ::String.class_variable_set(:@@doc, @doc)
17
+ eval <<-RUBY
18
+ class ::String
19
+ def ctn
20
+ if block_given?
21
+ @@doc.css(self) &block
22
+ else
23
+ @@doc.css(self).each do |e|
24
+ return e.text
25
+ end
26
+ end
27
+ end
28
+ end
29
+ RUBY
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,5 @@
1
+ require File.expand_path('../dsl', __FILE__)
2
+
3
+ RSpec.configure do |config|
4
+
5
+ end
@@ -1,5 +1,5 @@
1
- module Spider
2
- class BiliBili < Common
1
+ module Specific
2
+ class BiliBili
3
3
 
4
4
  def get_res
5
5
  animations = Hash.new
@@ -0,0 +1,91 @@
1
+ module Specific
2
+ class GoogleDict
3
+ def get_cards(keywords)
4
+ Headless.new.start
5
+ keywords.each do |keyword|
6
+ unless Card.find_by_word(keyword)
7
+ get_card keyword
8
+ save_record(Card, word: @card[:Word],
9
+ voice: @card[:Voice],
10
+ verb: @card[:Verb],
11
+ adj: @card[:Adjective],
12
+ noun: @card[:Noun],
13
+ pronoun: @card[:Pronoun],
14
+ synonyms: @card[:Synonyms],
15
+ abbr: @card[:Abbreviation],
16
+ prep: @card[:Preposition],
17
+ conj: @card[:Conjunction]
18
+ )
19
+ end
20
+ end
21
+ end
22
+
23
+ def get_keywords(path)
24
+ f = File.new(path)
25
+ dict = f.read.split(/\W/)
26
+ dict.delete("")
27
+ dict.uniq!
28
+ dict
29
+ end
30
+
31
+ class << self
32
+ def alias_methods(*args)
33
+ args.each do |arg|
34
+ alias_method arg, args.last
35
+ end
36
+ end
37
+ end
38
+
39
+ def get_card keyword
40
+ @card = Hash.new
41
+ @page = start "https://www.google.com.hk/search?newwindow=1&safe=strict&q=#{keyword}+define&oq=#{keyword}+define"
42
+
43
+ doc = Nokogiri::HTML.parse @page.html
44
+
45
+ GoogleDict.alias_methods :card, :voice, :word, :get_content
46
+ card(doc, 'li.dct') do |c|
47
+ @card[:Word] = keyword.downcase
48
+ @card[:Voice] = voice(c, 'h3+.vk_sh')
49
+
50
+ # Get word explainations
51
+ get_explain(c)
52
+ end
53
+
54
+ @page.close
55
+ @card.delete(0)
56
+ @card
57
+ end
58
+
59
+ def get_explain(c)
60
+ type_nodes = c.css('div.vk_gy.vk_sh').to_a
61
+ content_nodes = c.css('div.vk_gy.vk_sh+div').to_a
62
+ type_nodes.each_with_index do |t, i|
63
+ table = content_nodes[i]
64
+ if table.css('li').count >= 2
65
+ fin_content = Array.new
66
+ table.css('li').each do |l|
67
+ fin_content << l.content
68
+ end
69
+ else
70
+ fin_content = table.content
71
+ end
72
+ @card[t.text.to_sym] = fin_content
73
+ end
74
+ end
75
+
76
+ #def login(username, password)
77
+ # element?('a.gbgt#gb_70') { |e| e.click }
78
+ # @b.text_field(name: 'Email').set username
79
+ # @b.text_field(name: 'Passwd').set password
80
+ # element?('input#signIn') { |e| e.click }
81
+ #end
82
+
83
+ def element?(selector, &block)
84
+ e = @page.element(css: selector)
85
+ if yield e
86
+ else
87
+ 'element is nil'
88
+ end
89
+ end
90
+ end
91
+ end
@@ -1,5 +1,10 @@
1
- module Spider
2
- class JiYing < Common
1
+ module Specific
2
+ # Download JiYing resources
3
+ # example:
4
+ # @page = ::Spider.open_browser(:phantomjs, 'http://bt.ktxp.com/sort-50-1.html')
5
+ # jy = ::Spider::JiYing.new(@page)
6
+ # jy.full_site
7
+ class JiYing
3
8
  attr_accessor :ani, :anis, :page
4
9
 
5
10
  def initialize page
@@ -21,19 +26,23 @@ module Spider
21
26
  (1..fp).each do |page_num|
22
27
  full_page page_num
23
28
  end
29
+ p_anis
24
30
  rescue Exception
25
31
  raise %Q(page isn't not exist)
26
32
  end
27
33
  end
28
34
 
29
35
  def multi_pages final_page_num
30
- begin
31
- (1..final_page_num).each do |page_num|
32
- full_page page_num
33
- end
36
+ (1..final_page_num).each do |page_num|
37
+ full_page page_num
38
+ p_anis
34
39
  end
35
40
  end
36
41
 
42
+ def p_anis
43
+ p "@anis is #{@anis}"
44
+ end
45
+
37
46
  def final_page
38
47
  if @mode == 'search'
39
48
  fp = @page.element(css: '.title h2 a').text[/\(.+\)/].gsub!(/\(|\)/, '').to_i/100 + 1
@@ -51,7 +60,7 @@ module Spider
51
60
  when 'normal'
52
61
  @page.goto "#{@base_url}#{page_num}.html"
53
62
  end
54
- html = Nokogiri::HTML.parse @page.html
63
+ html = ::Nokogiri::HTML.parse @page.html
55
64
 
56
65
  html.css('.ltext').each do |td|
57
66
  single(td)
@@ -70,11 +79,13 @@ module Spider
70
79
  @ani[:title],
71
80
  @ani[:size],
72
81
  @ani[:finish] = get_content(element, 'a.quick-down+a', 'td.ltext+td', 'td.ltext+td+td+td+td')
73
- @anis << @ani
82
+ p "Get Animation: #{@ani[:title]}"
83
+
84
+ @anis << @ani.dup
74
85
  end
75
86
 
76
- def count
77
- self.anis.count
87
+ def ani_count
88
+ @anis.uniq.count if @anis
78
89
  end
79
90
 
80
91
  def get_content(element, *selectors)
@@ -1,3 +1,3 @@
1
1
  module SpiderRails
2
- VERSION = "4.0.2"
2
+ VERSION = "4.0.3"
3
3
  end
data/lib/spider_rails.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # -*- encoding : utf-8 -*-
2
2
  current_file_name = __FILE__.split('/').last.gsub('.rb', '')
3
- Dir[File.expand_path("../#{current_file_name}/*.rb", __FILE__)].each { |file| require file }
3
+ Dir[File.expand_path("../#{current_file_name}/**/*.rb", __FILE__)].each { |file| require file }
4
4
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spider_rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.2
4
+ version: 4.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - zhuxingruo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-20 00:00:00.000000000 Z
11
+ date: 2013-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -38,6 +38,104 @@ dependencies:
38
38
  - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: watir
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: nokogiri
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: headless
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: guard
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: guard-rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: spork
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
41
139
  description: nil
42
140
  email:
43
141
  - zhuxingruo3@gmail.com
@@ -45,14 +143,14 @@ executables: []
45
143
  extensions: []
46
144
  extra_rdoc_files: []
47
145
  files:
48
- - lib/spider_rails/common.rb
49
146
  - lib/spider_rails/version.rb
50
- - lib/spider_rails/bilibili.rb
51
- - lib/spider_rails/sample_data.rb
52
- - lib/spider_rails/spread_sheet.rb
53
- - lib/spider_rails/google_dict.rb
54
- - lib/spider_rails/ji_ying.rb
55
- - lib/spider_rails/hl.rb
147
+ - lib/spider_rails/specific/bilibili.rb
148
+ - lib/spider_rails/specific/spread_sheet.rb
149
+ - lib/spider_rails/specific/google_dict.rb
150
+ - lib/spider_rails/specific/ji_ying.rb
151
+ - lib/spider_rails/common/common.rb
152
+ - lib/spider_rails/common/rspec.rb
153
+ - lib/spider_rails/common/dsl.rb
56
154
  - lib/spider_rails.rb
57
155
  - MIT-LICENSE
58
156
  - Rakefile
@@ -1,91 +0,0 @@
1
- module Spider
2
- class GoogleDict < Common
3
- def get_cards(keywords)
4
- Headless.new.start
5
- keywords.each do |keyword|
6
- unless Card.find_by_word(keyword)
7
- get_card keyword
8
- save_record(Card, word: @card[:Word],
9
- voice: @card[:Voice],
10
- verb: @card[:Verb],
11
- adj: @card[:Adjective],
12
- noun: @card[:Noun],
13
- pronoun: @card[:Pronoun],
14
- synonyms: @card[:Synonyms],
15
- abbr: @card[:Abbreviation],
16
- prep: @card[:Preposition],
17
- conj: @card[:Conjunction]
18
- )
19
- end
20
- end
21
- end
22
-
23
- def get_keywords(path)
24
- f = File.new(path)
25
- dict = f.read.split(/\W/)
26
- dict.delete("")
27
- dict.uniq!
28
- dict
29
- end
30
-
31
- class << self
32
- def alias_methods(*args)
33
- args.each do |arg|
34
- alias_method arg, args.last
35
- end
36
- end
37
- end
38
-
39
- def get_card keyword
40
- @card = Hash.new
41
- @page = open_browser "https://www.google.com.hk/search?newwindow=1&safe=strict&q=#{keyword}+define&oq=#{keyword}+define"
42
-
43
- doc = Nokogiri::HTML.parse @page.html
44
-
45
- GoogleDict.alias_methods :card, :voice, :word, :get_content
46
- card(doc, 'li.dct') do |c|
47
- @card[:Word] = keyword.downcase
48
- @card[:Voice] = voice(c, 'h3+.vk_sh')
49
-
50
- # Get word explainations
51
- get_explain(c)
52
- end
53
-
54
- @page.close
55
- @card.delete(0)
56
- @card
57
- end
58
-
59
- def get_explain(c)
60
- type_nodes = c.css('div.vk_gy.vk_sh').to_a
61
- content_nodes = c.css('div.vk_gy.vk_sh+div').to_a
62
- type_nodes.each_with_index do |t, i|
63
- table = content_nodes[i]
64
- if table.css('li').count >= 2
65
- fin_content = Array.new
66
- table.css('li').each do |l|
67
- fin_content << l.content
68
- end
69
- else
70
- fin_content = table.content
71
- end
72
- @card[t.text.to_sym] = fin_content
73
- end
74
- end
75
-
76
- #def login(username, password)
77
- # element?('a.gbgt#gb_70') { |e| e.click }
78
- # @b.text_field(name: 'Email').set username
79
- # @b.text_field(name: 'Passwd').set password
80
- # element?('input#signIn') { |e| e.click }
81
- #end
82
-
83
- def element?(selector, &block)
84
- e = @page.element(css: selector)
85
- if yield e
86
- else
87
- 'element is nil'
88
- end
89
- end
90
- end
91
- end
@@ -1,13 +0,0 @@
1
- module Lib
2
- module Hl
3
- class << self
4
- def run
5
- h = Headless.new
6
- h.start
7
- b = Watir::Browser.new :chrome, switches: %w[--proxy-server=socks5://127.0.0.1:7070]
8
- b.goto 'https://www.google.com.hk/search?q=google+define&oq=google+define'
9
- p b.title
10
- end
11
- end
12
- end
13
- end
@@ -1,10 +0,0 @@
1
- module Spider
2
- class SampleData < Common
3
- def generate
4
- 100.times do |n|
5
- Novel.create(title: "やめて#{n}", content: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
6
- Card.create(word: "やめて#{n}", voice: 'やめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめてやめて やめて')
7
- end
8
- end
9
- end
10
- end