apollo-crawler 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/bin/apollo-crawler +410 -405
- data/lib/apollo_crawler.rb +20 -20
- data/lib/apollo_crawler/cache.rb +34 -34
- data/lib/apollo_crawler/caches/factory.rb +30 -30
- data/lib/apollo_crawler/caches/filesystem_cache.rb +34 -34
- data/lib/apollo_crawler/caches/memory_cache.rb +43 -43
- data/lib/apollo_crawler/caches/null_cache.rb +30 -30
- data/lib/apollo_crawler/crawler.rb +154 -127
- data/lib/apollo_crawler/crawler_template.rb +24 -24
- data/lib/apollo_crawler/crawlers/google_com/google.rb +40 -26
- data/lib/apollo_crawler/crawlers/slashdot_org/slashdot.rb +40 -26
- data/lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb +44 -26
- data/lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb +35 -35
- data/lib/apollo_crawler/crawlers/ycombinator_com/hacker_news.rb +44 -26
- data/lib/apollo_crawler/formatter.rb +6 -6
- data/lib/apollo_crawler/formatters/formatter_json.rb +17 -17
- data/lib/apollo_crawler/formatters/formatter_plain.rb +17 -17
- data/lib/apollo_crawler/formatters/formatter_table.rb +35 -33
- data/lib/apollo_crawler/version.rb +2 -2
- metadata +12 -14
- data/lib/apollo_crawler/crawlers/alexa_com/alexa.rb +0 -26
- data/lib/apollo_crawler/crawlers/firmy_cz/firmy.rb +0 -26
@@ -1,33 +1,35 @@
|
|
1
|
-
require 'terminal-table'
|
2
|
-
|
3
|
-
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
-
|
5
|
-
module Apollo
|
6
|
-
module Formatters
|
7
|
-
class Table < Formatter
|
8
|
-
def format(obj)
|
9
|
-
return Table.format(obj)
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.format(obj)
|
13
|
-
headings = []
|
14
|
-
if(obj[:data].length > 0)
|
15
|
-
headings = obj[:data][0].keys
|
16
|
-
end
|
17
|
-
|
18
|
-
rows = []
|
19
|
-
obj[:data].each do |line|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
end
|
1
|
+
require 'terminal-table'
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
+
|
5
|
+
module Apollo
|
6
|
+
module Formatters
|
7
|
+
class Table < Formatter
|
8
|
+
def format(obj)
|
9
|
+
return Table.format(obj)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.format(obj)
|
13
|
+
headings = []
|
14
|
+
if(obj[:data].length > 0)
|
15
|
+
headings = obj[:data][0].keys
|
16
|
+
end
|
17
|
+
|
18
|
+
rows = []
|
19
|
+
obj[:data].each do |line|
|
20
|
+
next if (line.nil? || line.empty?)
|
21
|
+
|
22
|
+
data = []
|
23
|
+
headings.each do |column|
|
24
|
+
data << line[column]
|
25
|
+
end
|
26
|
+
|
27
|
+
rows << data
|
28
|
+
end
|
29
|
+
|
30
|
+
table = Terminal::Table.new :headings => headings, :rows => rows
|
31
|
+
return table
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end # Formatters
|
35
|
+
end # Apollo
|
@@ -1,3 +1,3 @@
|
|
1
|
-
module Apollo
|
2
|
-
VERSION = '0.1.
|
1
|
+
module Apollo
|
2
|
+
VERSION = '0.1.5'
|
3
3
|
end # Apollo
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-02-
|
11
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amqp
|
@@ -199,25 +199,23 @@ executables:
|
|
199
199
|
extensions: []
|
200
200
|
extra_rdoc_files: []
|
201
201
|
files:
|
202
|
-
- ./lib/apollo_crawler/
|
203
|
-
- ./lib/apollo_crawler/formatters/formatter_json.rb
|
204
|
-
- ./lib/apollo_crawler/formatters/formatter_table.rb
|
205
|
-
- ./lib/apollo_crawler/version.rb
|
202
|
+
- ./lib/apollo_crawler/cache.rb
|
206
203
|
- ./lib/apollo_crawler/caches/factory.rb
|
207
|
-
- ./lib/apollo_crawler/caches/null_cache.rb
|
208
|
-
- ./lib/apollo_crawler/caches/memory_cache.rb
|
209
204
|
- ./lib/apollo_crawler/caches/filesystem_cache.rb
|
210
|
-
- ./lib/apollo_crawler/
|
205
|
+
- ./lib/apollo_crawler/caches/memory_cache.rb
|
206
|
+
- ./lib/apollo_crawler/caches/null_cache.rb
|
211
207
|
- ./lib/apollo_crawler/crawler.rb
|
212
|
-
- ./lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb
|
213
|
-
- ./lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb
|
214
208
|
- ./lib/apollo_crawler/crawlers/google_com/google.rb
|
215
209
|
- ./lib/apollo_crawler/crawlers/slashdot_org/slashdot.rb
|
216
|
-
- ./lib/apollo_crawler/crawlers/
|
217
|
-
- ./lib/apollo_crawler/crawlers/
|
210
|
+
- ./lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb
|
211
|
+
- ./lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb
|
218
212
|
- ./lib/apollo_crawler/crawlers/ycombinator_com/hacker_news.rb
|
213
|
+
- ./lib/apollo_crawler/crawler_template.rb
|
219
214
|
- ./lib/apollo_crawler/formatter.rb
|
220
|
-
- ./lib/apollo_crawler/
|
215
|
+
- ./lib/apollo_crawler/formatters/formatter_json.rb
|
216
|
+
- ./lib/apollo_crawler/formatters/formatter_plain.rb
|
217
|
+
- ./lib/apollo_crawler/formatters/formatter_table.rb
|
218
|
+
- ./lib/apollo_crawler/version.rb
|
221
219
|
- ./lib/apollo_crawler.rb
|
222
220
|
- bin/apollo-crawler
|
223
221
|
homepage: https://github.com/korczis/apollo-crawler
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
|
2
|
-
|
3
|
-
module Apollo
|
4
|
-
module Crawlers
|
5
|
-
class Alexa < Crawler
|
6
|
-
@@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
|
7
|
-
|
8
|
-
def name()
|
9
|
-
return "Alexa Rank"
|
10
|
-
end
|
11
|
-
|
12
|
-
def url()
|
13
|
-
return "http://www.alexa.com/"
|
14
|
-
end
|
15
|
-
|
16
|
-
def extract_data(doc)
|
17
|
-
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
18
|
-
{
|
19
|
-
:text => i.text,
|
20
|
-
:link => URI.join(self.url, i['href'])
|
21
|
-
}
|
22
|
-
}
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end # Crawlers
|
26
|
-
end # Apollo
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
|
2
|
-
|
3
|
-
module Apollo
|
4
|
-
module Crawlers
|
5
|
-
class Firmy < Crawler
|
6
|
-
@@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
|
7
|
-
|
8
|
-
def name()
|
9
|
-
return "Firmy.cz"
|
10
|
-
end
|
11
|
-
|
12
|
-
def url()
|
13
|
-
return "http://www.firmy.cz/"
|
14
|
-
end
|
15
|
-
|
16
|
-
def extract_data(doc)
|
17
|
-
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
18
|
-
{
|
19
|
-
:text => i.text,
|
20
|
-
:link => URI.join(self.url, i['href'])
|
21
|
-
}
|
22
|
-
}
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end # Crawlers
|
26
|
-
end # Apollo
|