apollo-crawler 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/bin/apollo-crawler +410 -405
- data/lib/apollo_crawler.rb +20 -20
- data/lib/apollo_crawler/cache.rb +34 -34
- data/lib/apollo_crawler/caches/factory.rb +30 -30
- data/lib/apollo_crawler/caches/filesystem_cache.rb +34 -34
- data/lib/apollo_crawler/caches/memory_cache.rb +43 -43
- data/lib/apollo_crawler/caches/null_cache.rb +30 -30
- data/lib/apollo_crawler/crawler.rb +154 -127
- data/lib/apollo_crawler/crawler_template.rb +24 -24
- data/lib/apollo_crawler/crawlers/google_com/google.rb +40 -26
- data/lib/apollo_crawler/crawlers/slashdot_org/slashdot.rb +40 -26
- data/lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb +44 -26
- data/lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb +35 -35
- data/lib/apollo_crawler/crawlers/ycombinator_com/hacker_news.rb +44 -26
- data/lib/apollo_crawler/formatter.rb +6 -6
- data/lib/apollo_crawler/formatters/formatter_json.rb +17 -17
- data/lib/apollo_crawler/formatters/formatter_plain.rb +17 -17
- data/lib/apollo_crawler/formatters/formatter_table.rb +35 -33
- data/lib/apollo_crawler/version.rb +2 -2
- metadata +12 -14
- data/lib/apollo_crawler/crawlers/alexa_com/alexa.rb +0 -26
- data/lib/apollo_crawler/crawlers/firmy_cz/firmy.rb +0 -26
@@ -1,33 +1,35 @@
|
|
1
|
-
require 'terminal-table'
|
2
|
-
|
3
|
-
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
-
|
5
|
-
module Apollo
|
6
|
-
module Formatters
|
7
|
-
class Table < Formatter
|
8
|
-
def format(obj)
|
9
|
-
return Table.format(obj)
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.format(obj)
|
13
|
-
headings = []
|
14
|
-
if(obj[:data].length > 0)
|
15
|
-
headings = obj[:data][0].keys
|
16
|
-
end
|
17
|
-
|
18
|
-
rows = []
|
19
|
-
obj[:data].each do |line|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
end
|
1
|
+
require 'terminal-table'
|
2
|
+
|
3
|
+
require File.join(File.dirname(__FILE__), '..', 'formatter')
|
4
|
+
|
5
|
+
module Apollo
|
6
|
+
module Formatters
|
7
|
+
class Table < Formatter
|
8
|
+
def format(obj)
|
9
|
+
return Table.format(obj)
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.format(obj)
|
13
|
+
headings = []
|
14
|
+
if(obj[:data].length > 0)
|
15
|
+
headings = obj[:data][0].keys
|
16
|
+
end
|
17
|
+
|
18
|
+
rows = []
|
19
|
+
obj[:data].each do |line|
|
20
|
+
next if (line.nil? || line.empty?)
|
21
|
+
|
22
|
+
data = []
|
23
|
+
headings.each do |column|
|
24
|
+
data << line[column]
|
25
|
+
end
|
26
|
+
|
27
|
+
rows << data
|
28
|
+
end
|
29
|
+
|
30
|
+
table = Terminal::Table.new :headings => headings, :rows => rows
|
31
|
+
return table
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end # Formatters
|
35
|
+
end # Apollo
|
@@ -1,3 +1,3 @@
|
|
1
|
-
module Apollo
|
2
|
-
VERSION = '0.1.
|
1
|
+
module Apollo
|
2
|
+
VERSION = '0.1.5'
|
3
3
|
end # Apollo
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-02-
|
11
|
+
date: 2013-02-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amqp
|
@@ -199,25 +199,23 @@ executables:
|
|
199
199
|
extensions: []
|
200
200
|
extra_rdoc_files: []
|
201
201
|
files:
|
202
|
-
- ./lib/apollo_crawler/
|
203
|
-
- ./lib/apollo_crawler/formatters/formatter_json.rb
|
204
|
-
- ./lib/apollo_crawler/formatters/formatter_table.rb
|
205
|
-
- ./lib/apollo_crawler/version.rb
|
202
|
+
- ./lib/apollo_crawler/cache.rb
|
206
203
|
- ./lib/apollo_crawler/caches/factory.rb
|
207
|
-
- ./lib/apollo_crawler/caches/null_cache.rb
|
208
|
-
- ./lib/apollo_crawler/caches/memory_cache.rb
|
209
204
|
- ./lib/apollo_crawler/caches/filesystem_cache.rb
|
210
|
-
- ./lib/apollo_crawler/
|
205
|
+
- ./lib/apollo_crawler/caches/memory_cache.rb
|
206
|
+
- ./lib/apollo_crawler/caches/null_cache.rb
|
211
207
|
- ./lib/apollo_crawler/crawler.rb
|
212
|
-
- ./lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb
|
213
|
-
- ./lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb
|
214
208
|
- ./lib/apollo_crawler/crawlers/google_com/google.rb
|
215
209
|
- ./lib/apollo_crawler/crawlers/slashdot_org/slashdot.rb
|
216
|
-
- ./lib/apollo_crawler/crawlers/
|
217
|
-
- ./lib/apollo_crawler/crawlers/
|
210
|
+
- ./lib/apollo_crawler/crawlers/stackoverflow_com/stackoverflow.rb
|
211
|
+
- ./lib/apollo_crawler/crawlers/xkcd_com/xkcd.rb
|
218
212
|
- ./lib/apollo_crawler/crawlers/ycombinator_com/hacker_news.rb
|
213
|
+
- ./lib/apollo_crawler/crawler_template.rb
|
219
214
|
- ./lib/apollo_crawler/formatter.rb
|
220
|
-
- ./lib/apollo_crawler/
|
215
|
+
- ./lib/apollo_crawler/formatters/formatter_json.rb
|
216
|
+
- ./lib/apollo_crawler/formatters/formatter_plain.rb
|
217
|
+
- ./lib/apollo_crawler/formatters/formatter_table.rb
|
218
|
+
- ./lib/apollo_crawler/version.rb
|
221
219
|
- ./lib/apollo_crawler.rb
|
222
220
|
- bin/apollo-crawler
|
223
221
|
homepage: https://github.com/korczis/apollo-crawler
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
|
2
|
-
|
3
|
-
module Apollo
|
4
|
-
module Crawlers
|
5
|
-
class Alexa < Crawler
|
6
|
-
@@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
|
7
|
-
|
8
|
-
def name()
|
9
|
-
return "Alexa Rank"
|
10
|
-
end
|
11
|
-
|
12
|
-
def url()
|
13
|
-
return "http://www.alexa.com/"
|
14
|
-
end
|
15
|
-
|
16
|
-
def extract_data(doc)
|
17
|
-
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
18
|
-
{
|
19
|
-
:text => i.text,
|
20
|
-
:link => URI.join(self.url, i['href'])
|
21
|
-
}
|
22
|
-
}
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end # Crawlers
|
26
|
-
end # Apollo
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), '..', '..', 'crawler')
|
2
|
-
|
3
|
-
module Apollo
|
4
|
-
module Crawlers
|
5
|
-
class Firmy < Crawler
|
6
|
-
@@MATCHER_ITEM = "//div[@id = 'alphabetically']/ul/li/a"
|
7
|
-
|
8
|
-
def name()
|
9
|
-
return "Firmy.cz"
|
10
|
-
end
|
11
|
-
|
12
|
-
def url()
|
13
|
-
return "http://www.firmy.cz/"
|
14
|
-
end
|
15
|
-
|
16
|
-
def extract_data(doc)
|
17
|
-
res = doc.xpath(@@MATCHER_ITEM).map { |i|
|
18
|
-
{
|
19
|
-
:text => i.text,
|
20
|
-
:link => URI.join(self.url, i['href'])
|
21
|
-
}
|
22
|
-
}
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end # Crawlers
|
26
|
-
end # Apollo
|