apollo-crawler 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,24 +1,24 @@
1
- module Apollo
2
- module Crawlers
3
- class CRAWLER_CLASS_NAME < Crawler
4
- @@MATCHER_ITEM = "CRAWLER_MATCHER"
5
-
6
- def name()
7
- return "CRAWLER_NAME"
8
- end
9
-
10
- def url()
11
- return "CRAWLER_URL"
12
- end
13
-
14
- def extract_data(doc)
15
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
16
- {
17
- :text => i.text,
18
- :link => URI.join(self.url, i['href'])
19
- }
20
- }
21
- end
22
- end # CRAWLER_CLASS_NAME
23
- end # Crawlers
24
- end # Apollo
1
+ module Apollo
2
+ module Crawlers
3
+ class CRAWLER_CLASS_NAME < Crawler
4
+ @@MATCHER_ITEM = "CRAWLER_MATCHER"
5
+
6
+ def name()
7
+ return "CRAWLER_NAME"
8
+ end
9
+
10
+ def url()
11
+ return "CRAWLER_URL"
12
+ end
13
+
14
+ def extract_data(doc)
15
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
16
+ {
17
+ :text => i.text,
18
+ :link => URI.join(self.url, i['href'])
19
+ }
20
+ }
21
+ end
22
+ end # CRAWLER_CLASS_NAME
23
+ end # Crawlers
24
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Alexa < Crawler
6
- @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
-
8
- def name()
9
- return "Alexa Rank"
10
- end
11
-
12
- def url()
13
- return "http://www.alexa.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Alexa < Crawler
6
+ @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
+
8
+ def name()
9
+ return "Alexa Rank"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.alexa.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Firmy < Crawler
6
- @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
-
8
- def name()
9
- return "Firmy.cz"
10
- end
11
-
12
- def url()
13
- return "http://www.firmy.cz/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Firmy < Crawler
6
+ @@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
7
+
8
+ def name()
9
+ return "Firmy.cz"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.firmy.cz/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Google < Crawler
6
- @@MATCHER_ITEM = "//h3/a"
7
-
8
- def name()
9
- return "Google"
10
- end
11
-
12
- def url()
13
- return "http://www.google.com/search?q=ruby"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Google < Crawler
6
+ @@MATCHER_ITEM = "//h3/a"
7
+
8
+ def name()
9
+ return "Google"
10
+ end
11
+
12
+ def url()
13
+ return "http://www.google.com/search?q=ruby"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Slashdot < Crawler
6
- @@MATCHER_ITEM = "//article/header/h2/span/a"
7
-
8
- def name()
9
- return "Slashdot"
10
- end
11
-
12
- def url()
13
- return"http://slashdot.org/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Slashdot < Crawler
6
+ @@MATCHER_ITEM = "//article/header/h2/span/a"
7
+
8
+ def name()
9
+ return "Slashdot"
10
+ end
11
+
12
+ def url()
13
+ return"http://slashdot.org/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class StackOverflow < Crawler
6
- @@MATCHER_ITEM = "//div[@class = 'summary']/h3/a"
7
-
8
- def name()
9
- return "Stackoverflow"
10
- end
11
-
12
- def url()
13
- return "http://stackoverflow.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class StackOverflow < Crawler
6
+ @@MATCHER_ITEM = "//div[@class = 'summary']/h3/a"
7
+
8
+ def name()
9
+ return "Stackoverflow"
10
+ end
11
+
12
+ def url()
13
+ return "http://stackoverflow.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,35 +1,35 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class Xkcd < Crawler
6
- @@MATCHER_ITEM = "//div[@id = 'comic']/img"
7
-
8
- def name()
9
- return "Xkcd"
10
- end
11
-
12
- def url()
13
- return "http://xkcd.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |node|
18
- {
19
- :text => node['title'],
20
- :link => URI.join(self.url, node['src']),
21
- }
22
- }
23
- end
24
-
25
- def extract_links(doc)
26
- res = doc.xpath("//ul[@class = 'comicNav']/li/a[@accesskey = 'p']").map { |node|
27
- {
28
- :link => URI.join(self.url, node['href']),
29
- }
30
- }
31
- res.uniq
32
- end
33
- end
34
- end # Crawlers
35
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class Xkcd < Crawler
6
+ @@MATCHER_ITEM = "//div[@id = 'comic']/img"
7
+
8
+ def name()
9
+ return "Xkcd"
10
+ end
11
+
12
+ def url()
13
+ return "http://xkcd.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |node|
18
+ {
19
+ :text => node['title'],
20
+ :link => URI.join(self.url, node['src']),
21
+ }
22
+ }
23
+ end
24
+
25
+ def extract_links(doc)
26
+ res = doc.xpath("//ul[@class = 'comicNav']/li/a[@accesskey = 'p']").map { |node|
27
+ {
28
+ :link => URI.join(self.url, node['href']),
29
+ }
30
+ }
31
+ res.uniq
32
+ end
33
+ end
34
+ end # Crawlers
35
+ end # Apollo
@@ -1,26 +1,26 @@
1
- require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
-
3
- module Apollo
4
- module Crawlers
5
- class HackerNews < Crawler
6
- @@MATCHER_ITEM = "//td[@class = 'title']/a"
7
-
8
- def name()
9
- return "Hacker News"
10
- end
11
-
12
- def url()
13
- return "http://news.ycombinator.com/"
14
- end
15
-
16
- def extract_data(doc)
17
- res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
- {
19
- :text => i.text,
20
- :link => URI.join(self.url, i['href'])
21
- }
22
- }
23
- end
24
- end
25
- end # Crawlers
26
- end # Apollo
1
+ require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
2
+
3
+ module Apollo
4
+ module Crawlers
5
+ class HackerNews < Crawler
6
+ @@MATCHER_ITEM = "//td[@class = 'title']/a"
7
+
8
+ def name()
9
+ return "Hacker News"
10
+ end
11
+
12
+ def url()
13
+ return "http://news.ycombinator.com/"
14
+ end
15
+
16
+ def extract_data(doc)
17
+ res = doc.xpath(@@MATCHER_ITEM).map { |i|
18
+ {
19
+ :text => i.text,
20
+ :link => URI.join(self.url, i['href'])
21
+ }
22
+ }
23
+ end
24
+ end
25
+ end # Crawlers
26
+ end # Apollo
@@ -1,6 +1,6 @@
1
- module Apollo
2
- module Formatters
3
- class Formatter
4
- end # Formatter
5
- end # Formatters
6
- end # Apollo
1
+ module Apollo
2
+ module Formatters
3
+ class Formatter
4
+ end # Formatter
5
+ end # Formatters
6
+ end # Apollo