apollo-crawler 0.0.34 → 0.0.35
Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'iconv'
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '..', '..', 'plugin')
|
4
|
+
|
5
|
+
module Apollo
|
6
|
+
module Crawler
|
7
|
+
module Plugins
|
8
|
+
# PARAMATRIZE: Plugin class name
|
9
|
+
class Xkcd < Plugin
|
10
|
+
@@URL = "http://xkcd.com/"
|
11
|
+
|
12
|
+
@@MATCHER_ITEM = "//div[@id = 'comic']/img"
|
13
|
+
|
14
|
+
def name()
|
15
|
+
return "Xkcd"
|
16
|
+
end
|
17
|
+
|
18
|
+
def run()
|
19
|
+
ic = Iconv.new("UTF-8//IGNORE", "UTF-8")
|
20
|
+
|
21
|
+
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
22
|
+
doc = Nokogiri::HTML(ic.iconv(open(@@URL).read))
|
23
|
+
|
24
|
+
res = doc.xpath(@@MATCHER_ITEM).map { |node|
|
25
|
+
{
|
26
|
+
:text => node['title'],
|
27
|
+
:link => URI.join(@@URL, node['src'])
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
return {
|
32
|
+
:plugin => self.class.name,
|
33
|
+
:title => doc.title,
|
34
|
+
:res => res
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end # Plugins
|
39
|
+
end # Crawler
|
40
|
+
end # Apollo
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.35
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -59,6 +59,22 @@ dependencies:
|
|
59
59
|
- - ! '>='
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: mime-types
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
- !ruby/object:Gem::Dependency
|
63
79
|
name: nokogiri
|
64
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -75,6 +91,38 @@ dependencies:
|
|
75
91
|
- - ! '>='
|
76
92
|
- !ruby/object:Gem::Version
|
77
93
|
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: openurl
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: parallel
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
78
126
|
- !ruby/object:Gem::Dependency
|
79
127
|
name: thor
|
80
128
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +149,7 @@ files:
|
|
101
149
|
- ./lib/apollo_crawler/version.rb
|
102
150
|
- ./lib/apollo_crawler/crawler.rb
|
103
151
|
- ./lib/apollo_crawler/plugin_template.rb
|
152
|
+
- ./lib/apollo_crawler/plugins/xkcd_com/xkcd.rb
|
104
153
|
- ./lib/apollo_crawler/plugins/slashdot_org/slashdot.rb
|
105
154
|
- ./lib/apollo_crawler/plugins/firmy_cz/firmy.rb
|
106
155
|
- ./lib/apollo_crawler/plugins/alexa_com/alexa.rb
|