pagedump 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/lib/pagedump.rb +38 -10
- data/lib/pagedump/driver.rb +18 -6
- data/lib/pagedump/driver_error.rb +5 -0
- data/lib/pagedump/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9943288b16e64f01ce860e21ea2b2fe8bbea2236
|
4
|
+
data.tar.gz: 72ee28830d7a60def93b728472a18cadaf68af5f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 796f81e0fa89052028e84d564486e87f7410c460fbb46f240a6b36c4378c8a63648654e7d5770edfc47b399acfe16f635c76031dfe122ee03a2efe99fd2b70f2
|
7
|
+
data.tar.gz: 78373b80f9dbca802a006bacaf5a783d74643778b0e2a19ddde1fa9b5617380b5e8578915db8837b68a0aa4b0229c8ea65c0305461488cfb7d3f9bb281ee2037
|
data/.gitignore
CHANGED
data/lib/pagedump.rb
CHANGED
@@ -2,29 +2,57 @@ require 'logging'
|
|
2
2
|
require 'mechanize'
|
3
3
|
require "pagedump/version"
|
4
4
|
require "pagedump/driver"
|
5
|
+
require "pagedump/driver_error"
|
5
6
|
|
6
7
|
module Pagedump
|
7
8
|
class << self
|
8
9
|
def logger
|
9
10
|
Logging.logger[Pagedump]
|
10
11
|
end
|
12
|
+
|
11
13
|
def load_drivers path
|
12
|
-
|
13
|
-
|
14
|
+
if(File.directory?(path))
|
15
|
+
Dir[File.join(path, "**/*.rb")].each do |p|
|
16
|
+
load_driver p
|
17
|
+
end
|
18
|
+
elsif(File.file? path)
|
19
|
+
load_driver path
|
20
|
+
else
|
21
|
+
raise "Not a directory: #{path}"
|
14
22
|
end
|
15
23
|
end
|
24
|
+
|
25
|
+
def driver_exist? driver
|
26
|
+
drivers.any?{|d| d == driver || d.name == driver}
|
27
|
+
end
|
28
|
+
|
29
|
+
def load_driver path
|
30
|
+
require path
|
31
|
+
puts "DRIVERs: #{drivers}"
|
32
|
+
drivers[-1]
|
33
|
+
end
|
34
|
+
|
16
35
|
def register_driver driver_cls
|
17
|
-
|
36
|
+
unless driver_cls.is_a?(Class) && driver_cls.superclass == Pagedump::Driver
|
37
|
+
raise("Not a driver class: #{driver_cls}")
|
38
|
+
end
|
39
|
+
logger.debug "Registering driver #{driver_cls}."
|
40
|
+
drivers << driver_cls
|
41
|
+
driver_cls
|
18
42
|
end
|
43
|
+
|
44
|
+
alias :register :register_driver
|
45
|
+
|
19
46
|
def drivers
|
20
|
-
@drivers ||=
|
47
|
+
@drivers ||= []
|
21
48
|
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
else
|
26
|
-
cls.new
|
27
|
-
end
|
49
|
+
|
50
|
+
def driver dr
|
51
|
+
drivers.find{|cls| c.name == dr.to_s} || raise("No such driver: #{dr}")
|
28
52
|
end
|
53
|
+
alias :"driver[]" :driver
|
54
|
+
|
55
|
+
private
|
56
|
+
|
29
57
|
end
|
30
58
|
end
|
data/lib/pagedump/driver.rb
CHANGED
@@ -1,16 +1,21 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
1
3
|
module Pagedump
|
2
|
-
#
|
3
|
-
#
|
4
|
+
# WARNING !!
|
5
|
+
# Not Thread-Safe
|
4
6
|
class Driver
|
5
|
-
attr_reader :headlines
|
6
|
-
|
7
7
|
def self.inherited(subclass)
|
8
8
|
Pagedump.register_driver subclass
|
9
|
-
puts "New driver: #{subclass}"
|
10
9
|
end
|
11
10
|
|
12
11
|
def initialize
|
13
12
|
@wlinks = {}
|
13
|
+
@data = {}
|
14
|
+
end
|
15
|
+
|
16
|
+
def data key, value
|
17
|
+
@data[key] ||= []
|
18
|
+
@data[key] << value
|
14
19
|
end
|
15
20
|
|
16
21
|
def link weight, href
|
@@ -24,7 +29,14 @@ module Pagedump
|
|
24
29
|
agent = Mechanize.new
|
25
30
|
page = agent.get(url)
|
26
31
|
self.links page
|
27
|
-
|
32
|
+
self.check page
|
33
|
+
result = OpenStruct.new
|
34
|
+
result.links = @wlinks
|
35
|
+
result.data = @data
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
39
|
+
def check page
|
28
40
|
end
|
29
41
|
|
30
42
|
def url
|
data/lib/pagedump/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pagedump
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damien Cram
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-05-
|
11
|
+
date: 2016-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -98,6 +98,7 @@ files:
|
|
98
98
|
- bin/setup
|
99
99
|
- lib/pagedump.rb
|
100
100
|
- lib/pagedump/driver.rb
|
101
|
+
- lib/pagedump/driver_error.rb
|
101
102
|
- lib/pagedump/version.rb
|
102
103
|
- pagedump.gemspec
|
103
104
|
homepage: http://github.com/pompadour/pagedump/
|