apollo-crawler 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/apollo_crawler.rb +1 -0
- data/lib/apollo_crawler/cache/mongo_cache.rb +54 -0
- data/lib/apollo_crawler/crawler/base_crawler.rb +17 -7
- data/lib/apollo_crawler/fetcher/base_fetcher.rb +5 -3
- data/lib/apollo_crawler/fetcher/simple_fetcher.rb +1 -1
- data/lib/apollo_crawler/fetcher/smart_fetcher.rb +2 -2
- data/lib/apollo_crawler/lib.rb +1 -0
- data/lib/apollo_crawler/program.rb +110 -75
- data/lib/apollo_crawler/version.rb +1 -1
- metadata +87 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MjkxYjlmY2NjMDYwODcxN2JmMzA1MDM3NzM5NzQ1ZWVhNDNiYWQ0MQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
N2ViMTdhNmQ1OGM3ZTczYjIxZWU1Y2NlY2NlYWMxMDM1MDkwZjBjYg==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NmNiNjMzZGFjY2ZmOTNjOTU4NTAxMGQzODZlNGYyOWI2MGQzY2YwM2Q4ZmMw
|
10
|
+
NDMzYzM2OTNkYmU2MzJiYzNhYzMwNGNmZDI0OWZiM2ZiZjJiYjFkMWExY2Rh
|
11
|
+
NTA3MDkxNTA1OTk1NWE5ZWMyNGFiZjY5ODhiMDMxZDU5NjgwZDU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWI1YWI3NjcwYzQ1NWVkZDI2ZjM3NjY5MGRjMzZkMDQxMThlMWE2MGU3MzAx
|
14
|
+
ZWE5ZWIyYWExNjdjMjYyYjUxNTU5MWZlNGI5MWUzOWYwZGI2NjQ2YTNkMTIy
|
15
|
+
YmRiNzIzNGY5ZThlNTdkMzIwODJkNjc2ZWUyNzQ5MWNlOWZlM2I=
|
data/lib/apollo_crawler.rb
CHANGED
@@ -29,6 +29,7 @@ require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/base_cache')
|
|
29
29
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/factory')
|
30
30
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/memcached_cache')
|
31
31
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/memory_cache')
|
32
|
+
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/mongo_cache')
|
32
33
|
require File.join(File.dirname(__FILE__), 'apollo_crawler/cache/null_cache')
|
33
34
|
|
34
35
|
# Crawlers
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
21
|
+
require File.join(File.dirname(__FILE__), 'base_cache')
|
22
|
+
|
23
|
+
require 'mongo'
|
24
|
+
|
25
|
+
module Apollo
|
26
|
+
module Cache
|
27
|
+
class MongoCache < BaseCache
|
28
|
+
def initialize
|
29
|
+
@mongo_client = Mongo::MongoClient.new('localhost', 27017, :pool_size => 5, :pool_timeout => 5)
|
30
|
+
@db = @mongo_client['apollo-crawler']
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get value associated with key from cache
|
34
|
+
def get(key, *args)
|
35
|
+
res = @db['docs'].find({:url => key})
|
36
|
+
|
37
|
+
# Not found, Create, cache and return
|
38
|
+
if res.nil? || res.count < 1 && block_given?
|
39
|
+
res = yield args
|
40
|
+
return self.set(key, res)
|
41
|
+
end
|
42
|
+
|
43
|
+
return res.to_a[0]
|
44
|
+
end
|
45
|
+
|
46
|
+
# Set value associated with key
|
47
|
+
# Return cached value
|
48
|
+
def set(key, value)
|
49
|
+
@db['docs'].insert(value)
|
50
|
+
return value
|
51
|
+
end
|
52
|
+
end # MongoCache
|
53
|
+
end # Cache
|
54
|
+
end # Apollo
|
@@ -150,7 +150,6 @@ module Apollo
|
|
150
150
|
# Format ETL result
|
151
151
|
res = {
|
152
152
|
:crawler => self.class.name,
|
153
|
-
:title => doc.title,
|
154
153
|
:data => data,
|
155
154
|
:links => links
|
156
155
|
}
|
@@ -158,6 +157,17 @@ module Apollo
|
|
158
157
|
return res
|
159
158
|
end
|
160
159
|
|
160
|
+
def self.create_metadoc(url, doc)
|
161
|
+
return {
|
162
|
+
'url' => url,
|
163
|
+
'doc' => doc.encode('utf-8'),
|
164
|
+
'hash' => Digest::SHA256.new.update(doc).hexdigest,
|
165
|
+
'created_at' => Time.now.utc,
|
166
|
+
'expires_at' => nil,
|
167
|
+
'version' => 0
|
168
|
+
}
|
169
|
+
end
|
170
|
+
|
161
171
|
# Fetch document
|
162
172
|
def fetch_document(url)
|
163
173
|
# TODO: Refactor following idiom
|
@@ -171,15 +181,15 @@ module Apollo
|
|
171
181
|
|
172
182
|
# TODO: Use some (custom-made) low-level HTTTP Protocol cache - just for sure
|
173
183
|
cache = Apollo::Cache::Factory.instance.construct
|
174
|
-
|
184
|
+
metadoc = cache.get(url) do
|
175
185
|
max_attempts = 3
|
176
186
|
attempt_no = 0
|
177
187
|
success = false
|
178
188
|
|
179
|
-
|
189
|
+
doc = nil
|
180
190
|
while(attempt_no < max_attempts && success == false) do
|
181
191
|
begin
|
182
|
-
|
192
|
+
doc = BaseCrawler.fetch(url)
|
183
193
|
success = true
|
184
194
|
rescue Exception => e
|
185
195
|
puts "EXCEPTION: Unable to fetch '#{url}', reason: '#{e.to_s}'"
|
@@ -190,12 +200,12 @@ module Apollo
|
|
190
200
|
end
|
191
201
|
end
|
192
202
|
|
193
|
-
|
203
|
+
# Create metadata
|
204
|
+
BaseCrawler.create_metadoc(url, doc)
|
194
205
|
end
|
195
206
|
|
196
207
|
# TODO: Encapsulate and make more robust => invalid hostname, timeouts and so
|
197
|
-
|
198
|
-
return doc
|
208
|
+
return Nokogiri::HTML(metadoc['doc'])
|
199
209
|
end
|
200
210
|
|
201
211
|
# Extracts data from document
|
@@ -18,14 +18,16 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
19
|
# THE SOFTWARE.
|
20
20
|
|
21
|
+
require "open-uri"
|
22
|
+
require "nokogiri"
|
23
|
+
|
21
24
|
module Apollo
|
22
25
|
module Fetcher
|
23
26
|
class BaseFetcher
|
24
27
|
def self.fetch(url)
|
25
28
|
# TODO: Throw exception ???
|
26
|
-
return
|
27
|
-
end
|
28
|
-
|
29
|
+
return open(url).read
|
30
|
+
end
|
29
31
|
end # class BaseFetcher
|
30
32
|
end # module Fetcher
|
31
33
|
end # module Apollo
|
@@ -38,11 +38,11 @@ module Apollo
|
|
38
38
|
sleep(diff)
|
39
39
|
end
|
40
40
|
|
41
|
-
res =
|
41
|
+
res = BaseFetcher::fetch(url)
|
42
42
|
|
43
43
|
@@LAST_FETCH = DateTime.now
|
44
44
|
return res
|
45
45
|
end
|
46
46
|
end # class SimpleFetcher
|
47
47
|
end # module SmartFetcher
|
48
|
-
end # module Apollo
|
48
|
+
end # module Apollo
|
data/lib/apollo_crawler/lib.rb
CHANGED
@@ -26,6 +26,7 @@ require File.join(File.dirname(__FILE__), 'cache/base_cache')
|
|
26
26
|
require File.join(File.dirname(__FILE__), 'cache/factory')
|
27
27
|
require File.join(File.dirname(__FILE__), 'cache/memcached_cache')
|
28
28
|
require File.join(File.dirname(__FILE__), 'cache/memory_cache')
|
29
|
+
require File.join(File.dirname(__FILE__), 'cache/mongo_cache')
|
29
30
|
require File.join(File.dirname(__FILE__), 'cache/null_cache')
|
30
31
|
|
31
32
|
# Crawlers
|
@@ -65,18 +65,23 @@ module Apollo
|
|
65
65
|
# Initialize command-line options
|
66
66
|
def init_options
|
67
67
|
@options = {}
|
68
|
+
|
68
69
|
@options[:doc_limit] = nil
|
69
70
|
@options[:verbose] = false
|
70
71
|
@options[:version] = false
|
72
|
+
|
71
73
|
@options[:cache_dirs] = [
|
72
74
|
RbConfig::CACHES_DIR
|
73
75
|
]
|
76
|
+
|
74
77
|
@options[:crawler_dirs] = [
|
75
78
|
RbConfig::CRAWLERS_DIR
|
76
79
|
]
|
80
|
+
|
77
81
|
@options[:formatter_dirs] = [
|
78
82
|
RbConfig::FORMATTERS_DIR
|
79
83
|
]
|
84
|
+
|
80
85
|
@options[:generate_crawler] = nil
|
81
86
|
|
82
87
|
@optparser = OptionParser.new do | opts |
|
@@ -139,6 +144,56 @@ module Apollo
|
|
139
144
|
@optparser.parse!(args)
|
140
145
|
end
|
141
146
|
|
147
|
+
def process_options(args)
|
148
|
+
if(@options[:version])
|
149
|
+
puts Apollo::VERSION
|
150
|
+
return 0
|
151
|
+
end
|
152
|
+
|
153
|
+
if(@options[:show_help])
|
154
|
+
puts @optparser
|
155
|
+
return 0
|
156
|
+
end
|
157
|
+
|
158
|
+
if(@options[:generate_crawler])
|
159
|
+
name = @options[:generate_crawler]
|
160
|
+
url = args.length > 0 ? args[0] : nil
|
161
|
+
matcher = args.length > 1 ? args[1] : nil
|
162
|
+
|
163
|
+
return self.generate_crawler(name, url, matcher)
|
164
|
+
end
|
165
|
+
|
166
|
+
if(@options[:list_formatters])
|
167
|
+
list_formatters()
|
168
|
+
return 0
|
169
|
+
end
|
170
|
+
|
171
|
+
if(@options[:list_crawlers])
|
172
|
+
list_crawlers()
|
173
|
+
return 0
|
174
|
+
end
|
175
|
+
|
176
|
+
return nil
|
177
|
+
end
|
178
|
+
|
179
|
+
def init_formatter()
|
180
|
+
# Set default formatter here
|
181
|
+
formatter_name = "json"
|
182
|
+
if(@options[:formatter])
|
183
|
+
formatter_name = @options[:formatter]
|
184
|
+
end
|
185
|
+
|
186
|
+
# Look for specified formatter
|
187
|
+
f = @formatters.select { |k, v|
|
188
|
+
name = formatter_name.gsub(Apollo::Formatter::BaseFormatter::name_re, "")
|
189
|
+
k.downcase == name
|
190
|
+
}
|
191
|
+
|
192
|
+
if(f)
|
193
|
+
@formatter = f[f.keys[0]]
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
142
197
|
# Load global options first
|
143
198
|
# Merge it with local options (if they exists)
|
144
199
|
def load_config_file()
|
@@ -328,96 +383,31 @@ module Apollo
|
|
328
383
|
return 0
|
329
384
|
end
|
330
385
|
|
386
|
+
# Show tabular data in form of CLI table
|
331
387
|
def self.console_table(headings, rows)
|
332
388
|
table = Terminal::Table.new :headings => headings, :rows => rows
|
333
389
|
puts table
|
334
390
|
end
|
335
391
|
|
392
|
+
# List available crawlers
|
336
393
|
def list_crawlers()
|
337
394
|
CrawlerProgram.console_table(['name', 'class'], @crawlers)
|
338
395
|
return
|
339
396
|
end
|
340
397
|
|
398
|
+
# List available formatters
|
341
399
|
def list_formatters()
|
342
400
|
CrawlerProgram.console_table(['name', 'class'], @formatters)
|
343
401
|
return
|
344
402
|
end
|
345
403
|
|
346
|
-
def
|
347
|
-
|
348
|
-
|
349
|
-
init_options()
|
350
|
-
|
351
|
-
parse_options(args)
|
352
|
-
|
353
|
-
if(@options[:version])
|
354
|
-
puts Apollo::VERSION
|
355
|
-
return 0
|
356
|
-
end
|
357
|
-
|
358
|
-
if(@options[:show_help])
|
359
|
-
puts @optparser
|
360
|
-
return 0
|
361
|
-
end
|
362
|
-
|
363
|
-
load_config_file()
|
364
|
-
|
365
|
-
if(@options[:generate_crawler])
|
366
|
-
name = @options[:generate_crawler]
|
367
|
-
url = args.length > 0 ? args[0] : nil
|
368
|
-
matcher = args.length > 1 ? args[1] : nil
|
369
|
-
|
370
|
-
return self.generate_crawler(name, url, matcher)
|
371
|
-
end
|
372
|
-
|
373
|
-
register_modules()
|
374
|
-
|
375
|
-
# Set default formatter here
|
376
|
-
formatter_name = "json"
|
377
|
-
if(@options[:formatter])
|
378
|
-
formatter_name = @options[:formatter]
|
379
|
-
end
|
380
|
-
|
381
|
-
# Look for specified formatter
|
382
|
-
f = @formatters.select { |k, v|
|
383
|
-
name = formatter_name.gsub(Apollo::Formatter::BaseFormatter::name_re, "")
|
384
|
-
k.downcase == name
|
385
|
-
}
|
386
|
-
|
387
|
-
if(f)
|
388
|
-
@formatter = f[f.keys[0]]
|
389
|
-
end
|
390
|
-
|
391
|
-
if(@options[:list_formatters])
|
392
|
-
list_formatters()
|
393
|
-
return 0
|
394
|
-
end
|
395
|
-
|
396
|
-
if(@options[:list_crawlers])
|
397
|
-
list_crawlers()
|
398
|
-
return 0
|
399
|
-
end
|
400
|
-
|
401
|
-
crawlers = []
|
402
|
-
if(args.length > 0)
|
403
|
-
crawlers << args.shift
|
404
|
-
end
|
405
|
-
|
406
|
-
if(@options[:run_all])
|
407
|
-
crawlers = @crawlers.keys
|
408
|
-
end
|
409
|
-
|
410
|
-
if(crawlers.empty?)
|
411
|
-
puts @optparser
|
412
|
-
return 0
|
413
|
-
end
|
414
|
-
|
415
|
-
crawlers.each do |crawler|
|
416
|
-
crawler_name = crawler.downcase.gsub(Apollo::Crawler::BaseCrawler.name_re, "")
|
404
|
+
def run_crawlers(crawlers, args)
|
405
|
+
crawlers.each do |name|
|
406
|
+
crawler_name = name.downcase.gsub(Apollo::Crawler::BaseCrawler.name_re, "")
|
417
407
|
|
418
|
-
|
419
|
-
if(
|
420
|
-
puts "Invalid crawler name - '#{
|
408
|
+
crawler = @crawlers[crawler_name]
|
409
|
+
if(crawler == nil)
|
410
|
+
puts "Invalid crawler name - '#{name}'"
|
421
411
|
puts "See program help"
|
422
412
|
return 0
|
423
413
|
end
|
@@ -430,7 +420,7 @@ module Apollo
|
|
430
420
|
:doc_limit => @options[:doc_limit]
|
431
421
|
}
|
432
422
|
|
433
|
-
res =
|
423
|
+
res = crawler.new.etl(args, opts) { | docs |
|
434
424
|
if(docs.nil?)
|
435
425
|
next
|
436
426
|
end
|
@@ -447,5 +437,50 @@ module Apollo
|
|
447
437
|
|
448
438
|
return 0
|
449
439
|
end
|
440
|
+
|
441
|
+
# Get crawlers passd to cmd-line
|
442
|
+
def get_crawlers(args)
|
443
|
+
crawlers = []
|
444
|
+
if(args.length > 0)
|
445
|
+
crawlers << args.shift
|
446
|
+
end
|
447
|
+
|
448
|
+
if(@options[:run_all])
|
449
|
+
crawlers = @crawlers.keys
|
450
|
+
end
|
451
|
+
|
452
|
+
return crawlers
|
453
|
+
end
|
454
|
+
|
455
|
+
# Init program
|
456
|
+
def init_program(args)
|
457
|
+
init_options()
|
458
|
+
|
459
|
+
parse_options(args)
|
460
|
+
|
461
|
+
load_config_file()
|
462
|
+
|
463
|
+
register_modules()
|
464
|
+
|
465
|
+
res = process_options(args)
|
466
|
+
if res != nil
|
467
|
+
return res
|
468
|
+
end
|
469
|
+
|
470
|
+
init_formatter()
|
471
|
+
end
|
472
|
+
|
473
|
+
# Run Program
|
474
|
+
def run(args = ARGV)
|
475
|
+
init_program(args)
|
476
|
+
|
477
|
+
crawlers = get_crawlers(args)
|
478
|
+
if(crawlers.empty?)
|
479
|
+
puts @optparser
|
480
|
+
return 0
|
481
|
+
end
|
482
|
+
|
483
|
+
return run_crawlers(crawlers, args)
|
484
|
+
end
|
450
485
|
end
|
451
486
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: amqp
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ~>
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 1.5.5
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: ffi
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: eventmachine
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,6 +150,34 @@ dependencies:
|
|
136
150
|
- - ! '>='
|
137
151
|
- !ruby/object:Gem::Version
|
138
152
|
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: mongo
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ! '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ! '>='
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: mongoid
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ! '>='
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ! '>='
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
139
181
|
- !ruby/object:Gem::Dependency
|
140
182
|
name: mime-types
|
141
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -220,6 +262,48 @@ dependencies:
|
|
220
262
|
- - ! '>='
|
221
263
|
- !ruby/object:Gem::Version
|
222
264
|
version: '0'
|
265
|
+
- !ruby/object:Gem::Dependency
|
266
|
+
name: guard
|
267
|
+
requirement: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - ! '>='
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: '0'
|
272
|
+
type: :development
|
273
|
+
prerelease: false
|
274
|
+
version_requirements: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - ! '>='
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: '0'
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: guard-rake
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - ! '>='
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: '0'
|
286
|
+
type: :development
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - ! '>='
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: '0'
|
293
|
+
- !ruby/object:Gem::Dependency
|
294
|
+
name: guard-rspec
|
295
|
+
requirement: !ruby/object:Gem::Requirement
|
296
|
+
requirements:
|
297
|
+
- - ! '>='
|
298
|
+
- !ruby/object:Gem::Version
|
299
|
+
version: '0'
|
300
|
+
type: :development
|
301
|
+
prerelease: false
|
302
|
+
version_requirements: !ruby/object:Gem::Requirement
|
303
|
+
requirements:
|
304
|
+
- - ! '>='
|
305
|
+
- !ruby/object:Gem::Version
|
306
|
+
version: '0'
|
223
307
|
description: Gem for crawling data from external sources
|
224
308
|
email: korczis@gmail.com
|
225
309
|
executables:
|
@@ -240,6 +324,7 @@ files:
|
|
240
324
|
- ./lib/apollo_crawler/cache/null_cache.rb
|
241
325
|
- ./lib/apollo_crawler/cache/memory_cache.rb
|
242
326
|
- ./lib/apollo_crawler/cache/base_cache.rb
|
327
|
+
- ./lib/apollo_crawler/cache/mongo_cache.rb
|
243
328
|
- ./lib/apollo_crawler/cache/memcached_cache.rb
|
244
329
|
- ./lib/apollo_crawler/crawler/xkcd_crawler.rb
|
245
330
|
- ./lib/apollo_crawler/crawler/google_crawler.rb
|