fetch_news 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/fetch_news/configuration.rb +5 -0
- data/lib/fetch_news/dynamic_page.rb +29 -0
- data/lib/fetch_news/static_page.rb +20 -0
- data/lib/fetch_news.rb +28 -0
- metadata +75 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a70ed2e1e968f7d3bddda5494e166e13e5440ce2
|
4
|
+
data.tar.gz: 62eb6f62a5e32817095af66084cdcf711a6cce91
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1f9cf7a25ac8b3a31e90f6df1947659c6e6853a722f9a8a9d7900fbef8d2711c82a33e8a16dc224cd7bae52a3ee6ffc7ee1c7f27fb5c7e62efc9a21454c45515
|
7
|
+
data.tar.gz: f83935885b4ab32a4eac94fb3514efebbb89a02fa895440f251869bf012a08261667f3f3119805f24922d9b71066c0998db7c7a8374e17c3aee63a28b4da8fff
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module FetchNews
|
4
|
+
class FetchDynamicContent
|
5
|
+
attr_reader :browser
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@browser = Watir::Browser.new FetchNews.configuration.browser.to_sym, headless: FetchNews.configuration.headless
|
9
|
+
end
|
10
|
+
|
11
|
+
def go url
|
12
|
+
browser.goto url
|
13
|
+
end
|
14
|
+
|
15
|
+
def get_content target_html, &block
|
16
|
+
result = []
|
17
|
+
target_html.each do |link|
|
18
|
+
result << block.call(link) if block_given?
|
19
|
+
end
|
20
|
+
|
21
|
+
result
|
22
|
+
end
|
23
|
+
|
24
|
+
def quit
|
25
|
+
puts "browser exist with code 0"
|
26
|
+
browser.close
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module FetchNews
|
2
|
+
class FetchStaticContent
|
3
|
+
attr_reader :agent
|
4
|
+
attr_accessor :page
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@agent = Mechanize.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def go url
|
11
|
+
@page = @agent.get(url)
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_content target_html, &block
|
15
|
+
target_html.map do |link|
|
16
|
+
block.call(link) if block_given?
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/fetch_news.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'watir'
|
3
|
+
require_relative './fetch_news/static_page.rb'
|
4
|
+
require_relative './fetch_news/dynamic_page.rb'
|
5
|
+
require_relative './fetch_news/configuration.rb'
|
6
|
+
|
7
|
+
module FetchNews
|
8
|
+
class << self
|
9
|
+
attr_accessor :configuration
|
10
|
+
|
11
|
+
def init(type)
|
12
|
+
case type
|
13
|
+
when "dynamic"
|
14
|
+
FetchNews::FetchDynamicContent.new
|
15
|
+
when "static"
|
16
|
+
FetchNews::FetchStaticContent.new
|
17
|
+
else
|
18
|
+
raise "invalid type"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def configure
|
23
|
+
@configuration = FetchNews::Configuration.new
|
24
|
+
|
25
|
+
yield configuration
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fetch_news
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jim He
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-06-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: watir
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: A simple spider gem
|
42
|
+
email: 6756971174@qq.com
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- lib/fetch_news.rb
|
48
|
+
- lib/fetch_news/configuration.rb
|
49
|
+
- lib/fetch_news/dynamic_page.rb
|
50
|
+
- lib/fetch_news/static_page.rb
|
51
|
+
homepage:
|
52
|
+
licenses:
|
53
|
+
- MIT
|
54
|
+
metadata: {}
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubyforge_project:
|
71
|
+
rubygems_version: 2.6.14
|
72
|
+
signing_key:
|
73
|
+
specification_version: 4
|
74
|
+
summary: fetch page content from page!
|
75
|
+
test_files: []
|