spidr_epg_gem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/spidr.rb +3 -0
- data/lib/spidr_epg/actions/actions.rb +83 -0
- data/lib/spidr_epg/actions/exceptions/action.rb +9 -0
- data/lib/spidr_epg/actions/exceptions/paused.rb +11 -0
- data/lib/spidr_epg/actions/exceptions/skip_link.rb +12 -0
- data/lib/spidr_epg/actions/exceptions/skip_page.rb +12 -0
- data/lib/spidr_epg/actions/exceptions.rb +4 -0
- data/lib/spidr_epg/actions.rb +2 -0
- data/lib/spidr_epg/agent.rb +747 -0
- data/lib/spidr_epg/auth_credential.rb +28 -0
- data/lib/spidr_epg/auth_store.rb +161 -0
- data/lib/spidr_epg/body.rb +98 -0
- data/lib/spidr_epg/cookie_jar.rb +202 -0
- data/lib/spidr_epg/events.rb +537 -0
- data/lib/spidr_epg/extensions/uri.rb +52 -0
- data/lib/spidr_epg/extensions.rb +1 -0
- data/lib/spidr_epg/filters.rb +539 -0
- data/lib/spidr_epg/headers.rb +370 -0
- data/lib/spidr_epg/links.rb +229 -0
- data/lib/spidr_epg/page.rb +108 -0
- data/lib/spidr_epg/rules.rb +79 -0
- data/lib/spidr_epg/sanitizers.rb +56 -0
- data/lib/spidr_epg/session_cache.rb +145 -0
- data/lib/spidr_epg/spidr.rb +98 -0
- data/lib/spidr_epg/version.rb +4 -0
- data/lib/spidr_epg.rb +3 -0
- data/lib/spidr_epg_gem.rb~ +3 -0
- data/lib/spidr_epg_gem~ +7 -0
- data/spidr_epg_gem.gemspec +17 -0
- metadata +72 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NzdmOTg4MGRhNTFkZDQwOGY4NGQ2ZjY2ZjExYjI3NzE4M2Y0NDVjOQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YjY2Yjg4MTMxMTU5MDRiMDdiNzU5MTg1Yzc1ZmZkOGMzYTY4YTZkZA==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
Y2NkNTBkMjRkODk3NGY1MWJjZWZmYzcxNGU2Y2E0MTgyYWE0ZmFkODBmZjk5
|
10
|
+
MDk0MjE2MGQ1ZDRlYWU1MGU4ZmFmOGM4ZDViYjlkNDFlOTlmMTIxNjM0NDVi
|
11
|
+
YTViNjFmNzU5YmVkMThkNGZjMThhYjU1YzQ4MTI4OGJjMDU5ZDA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
OTA0OWI4OGYwZDBkOWM4ZjFlYmU1YzkyYWMwMzhjNjc1NTUxZDEyZTRkYjgy
|
14
|
+
NjZjMTljNTYwYWNhZDNhMTNiYzhlMTU5NzY2ZmIzOGZhMDM1Zjc1YWE5MDJl
|
15
|
+
ZGFjMmY2ZTY2Zjc0ODM5M2Y0MjM4NjI0YmIxN2Y1OWNjZDcxMzQ=
|
data/lib/spidr.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'spidr/actions/exceptions/paused'
|
2
|
+
require 'spidr/actions/exceptions/skip_link'
|
3
|
+
require 'spidr/actions/exceptions/skip_page'
|
4
|
+
|
5
|
+
module Spidr
|
6
|
+
#
|
7
|
+
# The {Actions} module adds methods to {Agent} for controlling the
|
8
|
+
# spidering of links.
|
9
|
+
#
|
10
|
+
module Actions
|
11
|
+
#
|
12
|
+
# Continue spidering.
|
13
|
+
#
|
14
|
+
# @yield [page]
|
15
|
+
# If a block is given, it will be passed every page visited.
|
16
|
+
#
|
17
|
+
# @yieldparam [Page] page
|
18
|
+
# The page to be visited.
|
19
|
+
#
|
20
|
+
def continue!(&block)
|
21
|
+
@paused = false
|
22
|
+
return run(&block)
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Sets the pause state of the agent.
|
27
|
+
#
|
28
|
+
# @param [Boolean] state
|
29
|
+
# The new pause state of the agent.
|
30
|
+
#
|
31
|
+
def pause=(state)
|
32
|
+
@paused = state
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Pauses the agent, causing spidering to temporarily stop.
|
37
|
+
#
|
38
|
+
# @raise [Paused]
|
39
|
+
# Indicates to the agent, that it should pause spidering.
|
40
|
+
#
|
41
|
+
def pause!
|
42
|
+
@paused = true
|
43
|
+
raise(Paused)
|
44
|
+
end
|
45
|
+
|
46
|
+
#
|
47
|
+
# Determines whether the agent is paused.
|
48
|
+
#
|
49
|
+
# @return [Boolean]
|
50
|
+
# Specifies whether the agent is paused.
|
51
|
+
#
|
52
|
+
def paused?
|
53
|
+
@paused == true
|
54
|
+
end
|
55
|
+
|
56
|
+
#
|
57
|
+
# Causes the agent to skip the link being enqueued.
|
58
|
+
#
|
59
|
+
# @raise [SkipLink]
|
60
|
+
# Indicates to the agent, that the current link should be skipped,
|
61
|
+
# and not enqueued or visited.
|
62
|
+
#
|
63
|
+
def skip_link!
|
64
|
+
raise(SkipLink)
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Causes the agent to skip the page being visited.
|
69
|
+
#
|
70
|
+
# @raise [SkipPage]
|
71
|
+
# Indicates to the agent, that the current page should be skipped.
|
72
|
+
#
|
73
|
+
def skip_page!
|
74
|
+
raise(SkipPage)
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
|
79
|
+
def initialize_actions(options={})
|
80
|
+
@paused = false
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|