apollo-crawler 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,24 +1,24 @@
1
- module Apollo
2
- module Crawlers
3
- class CRAWLER_CLASS_NAME < Crawler
4
- @@MATCHER_ITEM = "CRAWLER_MATCHER"
5
-
6
- def name()
7
- return "CRAWLER_NAME"
8
- end
9
-
10
- def url()
11
- return "CRAWLER_URL"
12
- end
13
-
14
- def extract_data(doc)
15
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
16
- {
17
- :text => i.text,
18
- :link => URI.join(self.url, i['href'])
19
- }
20
- }
21
- end
22
- end # CRAWLER_CLASS_NAME
23
- end # Crawlers
24
- end # Apollo
1
+ module Apollo
2
+ module Crawlers
3
+ class CRAWLER_CLASS_NAME < Crawler
4
+ @@MATCHER_ITEM = "CRAWLER_MATCHER"
5
+
6
+ def name()
7
+ return "CRAWLER_NAME"
8
+ end
9
+
10
+ def url()
11
+ return "CRAWLER_URL"
12
+ end
13
+
14
+ def extract_data(doc)
15
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
16
+ {
17
+ :text => i.text,
18
+ :link => URI.join(self.url, i['href'])
19
+ }
20
+ }
21
+ end
22
+ end # CRAWLER_CLASS_NAME
23
+ end # Crawlers
24
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Alexa < Crawler
6
- @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
-
8
- def name()
9
- return "Alexa Rank"
10
- end
11
-
12
- def url()
13
- return "http://www.alexa.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Alexa < Crawler
6
+ @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
+
8
+ def name()
9
+ return "Alexa Rank"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.alexa.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Firmy < Crawler
6
- @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
-
8
- def name()
9
- return "Firmy.cz"
10
- end
11
-
12
- def url()
13
- return "http://www.firmy.cz/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Firmy < Crawler
6
+ @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
+
8
+ def name()
9
+ return "Firmy.cz"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.firmy.cz/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Google < Crawler
6
- @@MATCHER_ITEM = "//h3/a"
7
-
8
- def name()
9
- return "Google"
10
- end
11
-
12
- def url()
13
- return "http://www.google.com/search?q=ruby"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Google < Crawler
6
+ @@MATCHER_ITEM = "//h3/a"
7
+
8
+ def name()
9
+ return "Google"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.google.com/search?q=ruby"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Slashdot < Crawler
6
- @@MATCHER_ITEM = "//article/header/h2/span/a"
7
-
8
- def name()
9
- return "Slashdot"
10
- end
11
-
12
- def url()
13
- return"http://slashdot.org/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Slashdot < Crawler
6
+ @@MATCHER_ITEM = "//article/header/h2/span/a"
7
+
8
+ def name()
9
+ return "Slashdot"
10
+ end
11
+
12
+ def url()
13
+ return"http://slashdot.org/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class StackOverflow < Crawler
6
- @@MATCHER_ITEM = "//div[@class = 'summary']/h3/a"
7
-
8
- def name()
9
- return "Stackoverflow"
10
- end
11
-
12
- def url()
13
- return "http://stackoverflow.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class StackOverflow < Crawler
6
+ @@MATCHER_ITEM = "//div[@class = 'summary']/h3/a"
7
+
8
+ def name()
9
+ return "Stackoverflow"
10
+ end
11
+
12
+ def url()
13
+ return "http://stackoverflow.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,35 +1,35 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Xkcd < Crawler
6
- @@MATCHER_ITEM = "//div[@id = 'comic']/img"
7
-
8
- def name()
9
- return "Xkcd"
10
- end
11
-
12
- def url()
13
- return "http://xkcd.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |node|
18
- {
19
- :text => node['title'],
20
- :link => URI.join(self.url, node['src']),
21
- }
22
- }
23
- end
24
-
25
- def extract_links(doc)
26
- res = doc.xpath("//ul[@class = 'comicNav']/li/a[@accesskey = 'p']").map { |node|
27
- {
28
- :link => URI.join(self.url, node['href']),
29
- }
30
- }
31
- res.uniq
32
- end
33
- end
34
- end # Crawlers
35
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Xkcd < Crawler
6
+ @@MATCHER_ITEM = "//div[@id = 'comic']/img"
7
+
8
+ def name()
9
+ return "Xkcd"
10
+ end
11
+
12
+ def url()
13
+ return "http://xkcd.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |node|
18
+ {
19
+ :text => node['title'],
20
+ :link => URI.join(self.url, node['src']),
21
+ }
22
+ }
23
+ end
24
+
25
+ def extract_links(doc)
26
+ res = doc.xpath("//ul[@class = 'comicNav']/li/a[@accesskey = 'p']").map { |node|
27
+ {
28
+ :link => URI.join(self.url, node['href']),
29
+ }
30
+ }
31
+ res.uniq
32
+ end
33
+ end
34
+ end # Crawlers
35
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class HackerNews < Crawler
6
- @@MATCHER_ITEM = "//td[@class = 'title']/a"
7
-
8
- def name()
9
- return "Hacker News"
10
- end
11
-
12
- def url()
13
- return "http://news.ycombinator.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class HackerNews < Crawler
6
+ @@MATCHER_ITEM = "//td[@class = 'title']/a"
7
+
8
+ def name()
9
+ return "Hacker News"
10
+ end
11
+
12
+ def url()
13
+ return "http://news.ycombinator.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,6 +1,6 @@
1
- module Apollo
2
- module Formatters
3
- class Formatter
4
- end # Formatter
5
- end # Formatters
6
- end # Apollo
1
+ module Apollo
2
+ module Formatters
3
+ class Formatter
4
+ end # Formatter
5
+ end # Formatters
6
+ end # Apollo