apollo-crawler 0.1.11 → 0.1.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/apollo_crawler.rb +1 -0
- data/lib/apollo_crawler/cache/mongo_cache.rb +54 -0
- data/lib/apollo_crawler/crawler/base_crawler.rb +17 -7
- data/lib/apollo_crawler/fetcher/base_fetcher.rb +5 -3
- data/lib/apollo_crawler/fetcher/simple_fetcher.rb +1 -1
- data/lib/apollo_crawler/fetcher/smart_fetcher.rb +2 -2
- data/lib/apollo_crawler/lib.rb +1 -0
- data/lib/apollo_crawler/program.rb +110 -75
- data/lib/apollo_crawler/version.rb +1 -1
- metadata +87 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MjkxYjlmY2NjMDYwODcxN2JmMzA1MDM3NzM5NzQ1ZWVhNDNiYWQ0MQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
N2ViMTdhNmQ1OGM3ZTczYjIxZWU1Y2NlY2NlYWMxMDM1MDkwZjBjYg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NmNiNjMzZGFjY2ZmOTNjOTU4NTAxMGQzODZlNGYyOWI2MGQzY2YwM2Q4ZmMw
|
10
|
+
NDMzYzM2OTNkYmU2MzJiYzNhYzMwNGNmZDI0OWZiM2ZiZjJiYjFkMWExY2Rh
|
11
|
+
NTA3MDkxNTA1OTk1NWE5ZWMyNGFiZjY5ODhiMDMxZDU5NjgwZDU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWI1YWI3NjcwYzQ1NWVkZDI2ZjM3NjY5MGRjMzZkMDQxMThlMWE2MGU3MzAx
|
14
|
+
ZWE5ZWIyYWExNjdjMjYyYjUxNTU5MWZlNGI5MWUzOWYwZGI2NjQ2YTNkMTIy
|
15
|
+
YmRiNzIzNGY5ZThlNTdkMzIwODJkNjc2ZWUyNzQ5MWNlOWZlM2I=
|
data/lib/apollo_crawler.rb
CHANGED
@@ -29,6 +29,7 @@ require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/base_cache')
|
|
29
29
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/factory')
|
30
30
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/memcached_cache')
|
31
31
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/memory_cache')
|
32
|
+
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/mongo_cache')
|
32
33
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/null_cache')
|
33
34
|
|
34
35
|
# Crawlers
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require File.join(File.dirname(__FILE__), 'base_cache')
|
22
|
+
|
23
|
+
require 'mongo'
|
24
|
+
|
25
|
+
module Apollo
|
26
|
+
module Cache
|
27
|
+
class MongoCache < BaseCache
|
28
|
+
def initialize
|
29
|
+
@mongo_client = Mongo::MongoClient.new('localhost', 27017, :pool_size => 5, :pool_timeout => 5)
|
30
|
+
@db = @mongo_client['apollo-crawler']
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get value associated with key from cache
|
34
|
+
def get(key, *args)
|
35
|
+
res = @db['docs'].find({:url => key})
|
36
|
+
|
37
|
+
# Not found, Create, cache and return
|
38
|
+
if res.nil? || res.count < 1 && block_given?
|
39
|
+
res = yield args
|
40
|
+
return self.set(key, res)
|
41
|
+
end
|
42
|
+
|
43
|
+
return res.to_a[0]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Set value associated with key
|
47
|
+
# Return cached value
|
48
|
+
def set(key, value)
|
49
|
+
@db['docs'].insert(value)
|
50
|
+
return value
|
51
|
+
end
|
52
|
+
end # MongoCache
|
53
|
+
end # Cache
|
54
|
+
end # Apollo
|
@@ -150,7 +150,6 @@ module Apollo
|
|
150
150
|
# Format ETL result
|
151
151
|
res = {
|
152
152
|
:crawler => self.class.name,
|
153
|
-
:title => doc.title,
|
154
153
|
:data => data,
|
155
154
|
:links => links
|
156
155
|
}
|
@@ -158,6 +157,17 @@ module Apollo
|
|
158
157
|
return res
|
159
158
|
end
|
160
159
|
|
160
|
+
def self.create_metadoc(url, doc)
|
161
|
+
return {
|
162
|
+
'url' => url,
|
163
|
+
'doc' => doc.encode('utf-8'),
|
164
|
+
'hash' => Digest::SHA256.new.update(doc).hexdigest,
|
165
|
+
'created_at' => Time.now.utc,
|
166
|
+
'expires_at' => nil,
|
167
|
+
'version' => 0
|
168
|
+
}
|
169
|
+
end
|
170
|
+
|
161
171
|
# Fetch document
|
162
172
|
def fetch_document(url)
|
163
173
|
# TODO: Refactor following idiom
|
@@ -171,15 +181,15 @@ module Apollo
|
|
171
181
|
|
172
182
|
# TODO: Use some (custom-made) low-level HTTTP Protocol cache - just for sure
|
173
183
|
cache = Apollo::Cache::Factory.instance.construct
|
174
|
-
|
184
|
+
metadoc = cache.get(url) do
|
175
185
|
max_attempts = 3
|
176
186
|
attempt_no = 0
|
177
187
|
success = false
|
178
188
|
|
179
|
-
|
189
|
+
doc = nil
|
180
190
|
while(attempt_no < max_attempts && success == false) do
|
181
191
|
begin
|
182
|
-
|
192
|
+
doc = BaseCrawler.fetch(url)
|
183
193
|
success = true
|
184
194
|
rescue Exception => e
|
185
195
|
puts "EXCEPTION: Unable to fetch '#{url}', reason: '#{e.to_s}'"
|
@@ -190,12 +200,12 @@ module Apollo
|
|
190
200
|
end
|
191
201
|
end
|
192
202
|
|
193
|
-
|
203
|
+
# Create metadata
|
204
|
+
BaseCrawler.create_metadoc(url, doc)
|
194
205
|
end
|
195
206
|
|
196
207
|
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
197
|
-
|
198
|
-
return doc
|
208
|
+
return Nokogiri::HTML(metadoc['doc'])
|
199
209
|
end
|
200
210
|
|
201
211
|
# Extracts data from document
|
@@ -18,14 +18,16 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
|
+
require "open-uri"
|
22
|
+
require "nokogiri"
|
23
|
+
|
21
24
|
module Apollo
|
22
25
|
module Fetcher
|
23
26
|
class BaseFetcher
|
24
27
|
def self.fetch(url)
|
25
28
|
# TODO: Throw exception ???
|
26
|
-
return
|
27
|
-
end
|
28
|
-
|
29
|
+
return open(url).read
|
30
|
+
end
|
29
31
|
end # class BaseFetcher
|
30
32
|
end # module Fetcher
|
31
33
|
end # module Apollo
|
@@ -38,11 +38,11 @@ module Apollo
|
|
38
38
|
sleep(diff)
|
39
39
|
end
|
40
40
|
|
41
|
-
res =
|
41
|
+
res = BaseFetcher::fetch(url)
|
42
42
|
|
43
43
|
@@LAST_FETCH = DateTime.now
|
44
44
|
return res
|
45
45
|
end
|
46
46
|
end # class SimpleFetcher
|
47
47
|
end # module SmartFetcher
|
48
|
-
end # module Apollo
|
48
|
+
end # module Apollo
|
data/lib/apollo_crawler/lib.rb
CHANGED
@@ -26,6 +26,7 @@ require File.join(File.dirname(__FILE__), 'cache/base_cache')
|
|
26
26
|
require File.join(File.dirname(__FILE__), 'cache/factory')
|
27
27
|
require File.join(File.dirname(__FILE__), 'cache/memcached_cache')
|
28
28
|
require File.join(File.dirname(__FILE__), 'cache/memory_cache')
|
29
|
+
require File.join(File.dirname(__FILE__), 'cache/mongo_cache')
|
29
30
|
require File.join(File.dirname(__FILE__), 'cache/null_cache')
|
30
31
|
|
31
32
|
# Crawlers
|
@@ -65,18 +65,23 @@ module Apollo
|
|
65
65
|
# Initialize command-line options
|
66
66
|
def init_options
|
67
67
|
@options = {}
|
68
|
+
|
68
69
|
@options[:doc_limit] = nil
|
69
70
|
@options[:verbose] = false
|
70
71
|
@options[:version] = false
|
72
|
+
|
71
73
|
@options[:cache_dirs] = [
|
72
74
|
RbConfig::CACHES_DIR
|
73
75
|
]
|
76
|
+
|
74
77
|
@options[:crawler_dirs] = [
|
75
78
|
RbConfig::CRAWLERS_DIR
|
76
79
|
]
|
80
|
+
|
77
81
|
@options[:formatter_dirs] = [
|
78
82
|
RbConfig::FORMATTERS_DIR
|
79
83
|
]
|
84
|
+
|
80
85
|
@options[:generate_crawler] = nil
|
81
86
|
|
82
87
|
@optparser = OptionParser.new do | opts |
|
@@ -139,6 +144,56 @@ module Apollo
|
|
139
144
|
@optparser.parse!(args)
|
140
145
|
end
|
141
146
|
|
147
|
+
def process_options(args)
|
148
|
+
if(@options[:version])
|
149
|
+
puts Apollo::VERSION
|
150
|
+
return 0
|
151
|
+
end
|
152
|
+
|
153
|
+
if(@options[:show_help])
|
154
|
+
puts @optparser
|
155
|
+
return 0
|
156
|
+
end
|
157
|
+
|
158
|
+
if(@options[:generate_crawler])
|
159
|
+
name = @options[:generate_crawler]
|
160
|
+
url = args.length > 0 ? args[0] : nil
|
161
|
+
matcher = args.length > 1 ? args[1] : nil
|
162
|
+
|
163
|
+
return self.generate_crawler(name, url, matcher)
|
164
|
+
end
|
165
|
+
|
166
|
+
if(@options[:list_formatters])
|
167
|
+
list_formatters()
|
168
|
+
return 0
|
169
|
+
end
|
170
|
+
|
171
|
+
if(@options[:list_crawlers])
|
172
|
+
list_crawlers()
|
173
|
+
return 0
|
174
|
+
end
|
175
|
+
|
176
|
+
return nil
|
177
|
+
end
|
178
|
+
|
179
|
+
def init_formatter()
|
180
|
+
# Set default formatter here
|
181
|
+
formatter_name = "json"
|
182
|
+
if(@options[:formatter])
|
183
|
+
formatter_name = @options[:formatter]
|
184
|
+
end
|
185
|
+
|
186
|
+
# Look for specified formatter
|
187
|
+
f = @formatters.select { |k, v|
|
188
|
+
name = formatter_name.gsub(Apollo::Formatter::BaseFormatter::name_re, "")
|
189
|
+
k.downcase == name
|
190
|
+
}
|
191
|
+
|
192
|
+
if(f)
|
193
|
+
@formatter = f[f.keys[0]]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
142
197
|
# Load global options first
|
143
198
|
# Merge it with local options (if they exists)
|
144
199
|
def load_config_file()
|
@@ -328,96 +383,31 @@ module Apollo
|
|
328
383
|
return 0
|
329
384
|
end
|
330
385
|
|
386
|
+
# Show tabular data in form of CLI table
|
331
387
|
def self.console_table(headings, rows)
|
332
388
|
table = Terminal::Table.new :headings => headings, :rows => rows
|
333
389
|
puts table
|
334
390
|
end
|
335
391
|
|
392
|
+
# List available crawlers
|
336
393
|
def list_crawlers()
|
337
394
|
CrawlerProgram.console_table(['name', 'class'], @crawlers)
|
338
395
|
return
|
339
396
|
end
|
340
397
|
|
398
|
+
# List available formatters
|
341
399
|
def list_formatters()
|
342
400
|
CrawlerProgram.console_table(['name', 'class'], @formatters)
|
343
401
|
return
|
344
402
|
end
|
345
403
|
|
346
|
-
def
|
347
|
-
|
348
|
-
|
349
|
-
init_options()
|
350
|
-
|
351
|
-
parse_options(args)
|
352
|
-
|
353
|
-
if(@options[:version])
|
354
|
-
puts Apollo::VERSION
|
355
|
-
return 0
|
356
|
-
end
|
357
|
-
|
358
|
-
if(@options[:show_help])
|
359
|
-
puts @optparser
|
360
|
-
return 0
|
361
|
-
end
|
362
|
-
|
363
|
-
load_config_file()
|
364
|
-
|
365
|
-
if(@options[:generate_crawler])
|
366
|
-
name = @options[:generate_crawler]
|
367
|
-
url = args.length > 0 ? args[0] : nil
|
368
|
-
matcher = args.length > 1 ? args[1] : nil
|
369
|
-
|
370
|
-
return self.generate_crawler(name, url, matcher)
|
371
|
-
end
|
372
|
-
|
373
|
-
register_modules()
|
374
|
-
|
375
|
-
# Set default formatter here
|
376
|
-
formatter_name = "json"
|
377
|
-
if(@options[:formatter])
|
378
|
-
formatter_name = @options[:formatter]
|
379
|
-
end
|
380
|
-
|
381
|
-
# Look for specified formatter
|
382
|
-
f = @formatters.select { |k, v|
|
383
|
-
name = formatter_name.gsub(Apollo::Formatter::BaseFormatter::name_re, "")
|
384
|
-
k.downcase == name
|
385
|
-
}
|
386
|
-
|
387
|
-
if(f)
|
388
|
-
@formatter = f[f.keys[0]]
|
389
|
-
end
|
390
|
-
|
391
|
-
if(@options[:list_formatters])
|
392
|
-
list_formatters()
|
393
|
-
return 0
|
394
|
-
end
|
395
|
-
|
396
|
-
if(@options[:list_crawlers])
|
397
|
-
list_crawlers()
|
398
|
-
return 0
|
399
|
-
end
|
400
|
-
|
401
|
-
crawlers = []
|
402
|
-
if(args.length > 0)
|
403
|
-
crawlers << args.shift
|
404
|
-
end
|
405
|
-
|
406
|
-
if(@options[:run_all])
|
407
|
-
crawlers = @crawlers.keys
|
408
|
-
end
|
409
|
-
|
410
|
-
if(crawlers.empty?)
|
411
|
-
puts @optparser
|
412
|
-
return 0
|
413
|
-
end
|
414
|
-
|
415
|
-
crawlers.each do |crawler|
|
416
|
-
crawler_name = crawler.downcase.gsub(Apollo::Crawler::BaseCrawler.name_re, "")
|
404
|
+
def run_crawlers(crawlers, args)
|
405
|
+
crawlers.each do |name|
|
406
|
+
crawler_name = name.downcase.gsub(Apollo::Crawler::BaseCrawler.name_re, "")
|
417
407
|
|
418
|
-
|
419
|
-
if(
|
420
|
-
puts "Invalid crawler name - '#{
|
408
|
+
crawler = @crawlers[crawler_name]
|
409
|
+
if(crawler == nil)
|
410
|
+
puts "Invalid crawler name - '#{name}'"
|
421
411
|
puts "See program help"
|
422
412
|
return 0
|
423
413
|
end
|
@@ -430,7 +420,7 @@ module Apollo
|
|
430
420
|
:doc_limit => @options[:doc_limit]
|
431
421
|
}
|
432
422
|
|
433
|
-
res =
|
423
|
+
res = crawler.new.etl(args, opts) { | docs |
|
434
424
|
if(docs.nil?)
|
435
425
|
next
|
436
426
|
end
|
@@ -447,5 +437,50 @@ module Apollo
|
|
447
437
|
|
448
438
|
return 0
|
449
439
|
end
|
440
|
+
|
441
|
+
# Get crawlers passd to cmd-line
|
442
|
+
def get_crawlers(args)
|
443
|
+
crawlers = []
|
444
|
+
if(args.length > 0)
|
445
|
+
crawlers << args.shift
|
446
|
+
end
|
447
|
+
|
448
|
+
if(@options[:run_all])
|
449
|
+
crawlers = @crawlers.keys
|
450
|
+
end
|
451
|
+
|
452
|
+
return crawlers
|
453
|
+
end
|
454
|
+
|
455
|
+
# Init program
|
456
|
+
def init_program(args)
|
457
|
+
init_options()
|
458
|
+
|
459
|
+
parse_options(args)
|
460
|
+
|
461
|
+
load_config_file()
|
462
|
+
|
463
|
+
register_modules()
|
464
|
+
|
465
|
+
res = process_options(args)
|
466
|
+
if res != nil
|
467
|
+
return res
|
468
|
+
end
|
469
|
+
|
470
|
+
init_formatter()
|
471
|
+
end
|
472
|
+
|
473
|
+
# Run Program
|
474
|
+
def run(args = ARGV)
|
475
|
+
init_program(args)
|
476
|
+
|
477
|
+
crawlers = get_crawlers(args)
|
478
|
+
if(crawlers.empty?)
|
479
|
+
puts @optparser
|
480
|
+
return 0
|
481
|
+
end
|
482
|
+
|
483
|
+
return run_crawlers(crawlers, args)
|
484
|
+
end
|
450
485
|
end
|
451
486
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amqp
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ~>
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 1.5.5
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: ffi
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: eventmachine
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +150,34 @@ dependencies:
|
|
136
150
|
- - ! '>='
|
137
151
|
- !ruby/object:Gem::Version
|
138
152
|
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: mongo
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ! '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ! '>='
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: mongoid
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ! '>='
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ! '>='
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
139
181
|
- !ruby/object:Gem::Dependency
|
140
182
|
name: mime-types
|
141
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,6 +262,48 @@ dependencies:
|
|
220
262
|
- - ! '>='
|
221
263
|
- !ruby/object:Gem::Version
|
222
264
|
version: '0'
|
265
|
+
- !ruby/object:Gem::Dependency
|
266
|
+
name: guard
|
267
|
+
requirement: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - ! '>='
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: '0'
|
272
|
+
type: :development
|
273
|
+
prerelease: false
|
274
|
+
version_requirements: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - ! '>='
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: '0'
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: guard-rake
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - ! '>='
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: '0'
|
286
|
+
type: :development
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - ! '>='
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: '0'
|
293
|
+
- !ruby/object:Gem::Dependency
|
294
|
+
name: guard-rspec
|
295
|
+
requirement: !ruby/object:Gem::Requirement
|
296
|
+
requirements:
|
297
|
+
- - ! '>='
|
298
|
+
- !ruby/object:Gem::Version
|
299
|
+
version: '0'
|
300
|
+
type: :development
|
301
|
+
prerelease: false
|
302
|
+
version_requirements: !ruby/object:Gem::Requirement
|
303
|
+
requirements:
|
304
|
+
- - ! '>='
|
305
|
+
- !ruby/object:Gem::Version
|
306
|
+
version: '0'
|
223
307
|
description: Gem for crawling data from external sources
|
224
308
|
email: korczis@gmail.com
|
225
309
|
executables:
|
@@ -240,6 +324,7 @@ files:
|
|
240
324
|
- ./lib/apollo_crawler/cache/null_cache.rb
|
241
325
|
- ./lib/apollo_crawler/cache/memory_cache.rb
|
242
326
|
- ./lib/apollo_crawler/cache/base_cache.rb
|
327
|
+
- ./lib/apollo_crawler/cache/mongo_cache.rb
|
243
328
|
- ./lib/apollo_crawler/cache/memcached_cache.rb
|
244
329
|
- ./lib/apollo_crawler/crawler/xkcd_crawler.rb
|
245
330
|
- ./lib/apollo_crawler/crawler/google_crawler.rb
|