pagedump 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/pagedump.rb +38 -10
- data/lib/pagedump/driver.rb +18 -6
- data/lib/pagedump/driver_error.rb +5 -0
- data/lib/pagedump/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9943288b16e64f01ce860e21ea2b2fe8bbea2236
|
4
|
+
data.tar.gz: 72ee28830d7a60def93b728472a18cadaf68af5f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 796f81e0fa89052028e84d564486e87f7410c460fbb46f240a6b36c4378c8a63648654e7d5770edfc47b399acfe16f635c76031dfe122ee03a2efe99fd2b70f2
|
7
|
+
data.tar.gz: 78373b80f9dbca802a006bacaf5a783d74643778b0e2a19ddde1fa9b5617380b5e8578915db8837b68a0aa4b0229c8ea65c0305461488cfb7d3f9bb281ee2037
|
data/.gitignore
CHANGED
data/lib/pagedump.rb
CHANGED
@@ -2,29 +2,57 @@ require 'logging'
|
|
2
2
|
require 'mechanize'
|
3
3
|
require "pagedump/version"
|
4
4
|
require "pagedump/driver"
|
5
|
+
require "pagedump/driver_error"
|
5
6
|
|
6
7
|
module Pagedump
|
7
8
|
class << self
|
8
9
|
def logger
|
9
10
|
Logging.logger[Pagedump]
|
10
11
|
end
|
12
|
+
|
11
13
|
def load_drivers path
|
12
|
-
|
13
|
-
|
14
|
+
if(File.directory?(path))
|
15
|
+
Dir[File.join(path, "**/*.rb")].each do |p|
|
16
|
+
load_driver p
|
17
|
+
end
|
18
|
+
elsif(File.file? path)
|
19
|
+
load_driver path
|
20
|
+
else
|
21
|
+
raise "Not a directory: #{path}"
|
14
22
|
end
|
15
23
|
end
|
24
|
+
|
25
|
+
def driver_exist? driver
|
26
|
+
drivers.any?{|d| d == driver || d.name == driver}
|
27
|
+
end
|
28
|
+
|
29
|
+
def load_driver path
|
30
|
+
require path
|
31
|
+
puts "DRIVERs: #{drivers}"
|
32
|
+
drivers[-1]
|
33
|
+
end
|
34
|
+
|
16
35
|
def register_driver driver_cls
|
17
|
-
|
36
|
+
unless driver_cls.is_a?(Class) && driver_cls.superclass == Pagedump::Driver
|
37
|
+
raise("Not a driver class: #{driver_cls}")
|
38
|
+
end
|
39
|
+
logger.debug "Registering driver #{driver_cls}."
|
40
|
+
drivers << driver_cls
|
41
|
+
driver_cls
|
18
42
|
end
|
43
|
+
|
44
|
+
alias :register :register_driver
|
45
|
+
|
19
46
|
def drivers
|
20
|
-
@drivers ||=
|
47
|
+
@drivers ||= []
|
21
48
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
else
|
26
|
-
cls.new
|
27
|
-
end
|
49
|
+
|
50
|
+
def driver dr
|
51
|
+
drivers.find{|cls| c.name == dr.to_s} || raise("No such driver: #{dr}")
|
28
52
|
end
|
53
|
+
alias :"driver[]" :driver
|
54
|
+
|
55
|
+
private
|
56
|
+
|
29
57
|
end
|
30
58
|
end
|
data/lib/pagedump/driver.rb
CHANGED
@@ -1,16 +1,21 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
1
3
|
module Pagedump
|
2
|
-
#
|
3
|
-
#
|
4
|
+
# WARNING !!
|
5
|
+
# Not Thread-Safe
|
4
6
|
class Driver
|
5
|
-
attr_reader :headlines
|
6
|
-
|
7
7
|
def self.inherited(subclass)
|
8
8
|
Pagedump.register_driver subclass
|
9
|
-
puts "New driver: #{subclass}"
|
10
9
|
end
|
11
10
|
|
12
11
|
def initialize
|
13
12
|
@wlinks = {}
|
13
|
+
@data = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def data key, value
|
17
|
+
@data[key] ||= []
|
18
|
+
@data[key] << value
|
14
19
|
end
|
15
20
|
|
16
21
|
def link weight, href
|
@@ -24,7 +29,14 @@ module Pagedump
|
|
24
29
|
agent = Mechanize.new
|
25
30
|
page = agent.get(url)
|
26
31
|
self.links page
|
27
|
-
|
32
|
+
self.check page
|
33
|
+
result = OpenStruct.new
|
34
|
+
result.links = @wlinks
|
35
|
+
result.data = @data
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
39
|
+
def check page
|
28
40
|
end
|
29
41
|
|
30
42
|
def url
|
data/lib/pagedump/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pagedump
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damien Cram
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -98,6 +98,7 @@ files:
|
|
98
98
|
- bin/setup
|
99
99
|
- lib/pagedump.rb
|
100
100
|
- lib/pagedump/driver.rb
|
101
|
+
- lib/pagedump/driver_error.rb
|
101
102
|
- lib/pagedump/version.rb
|
102
103
|
- pagedump.gemspec
|
103
104
|
homepage: http://github.com/pompadour/pagedump/
|