apollo-crawler 0.1.21 → 0.1.22
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/bin/apollo-crawler +0 -4
- data/bin/apollo-platform +30 -0
- data/config/amqp.yml +18 -0
- data/config/amqp.yml.default +18 -0
- data/config/apollo.yml +13 -0
- data/config/apollo.yml.default +10 -0
- data/config/memcached.yml +14 -0
- data/config/memcached.yml.default +14 -0
- data/config/mongo.yml +19 -0
- data/config/mongo.yml.default +19 -0
- data/config/mongoid.yml +23 -0
- data/config/mongoid.yml.default +59 -0
- data/lib/apollo_crawler.rb +12 -3
- data/lib/apollo_crawler/adapter/adapters.rb +22 -0
- data/lib/apollo_crawler/adapter/amqp_adapter.rb +26 -0
- data/lib/apollo_crawler/adapter/mongo_adapter.rb +26 -0
- data/lib/apollo_crawler/{cli/cli.rb → agent/agents.rb} +2 -0
- data/lib/apollo_crawler/agent/base_agent.rb +26 -0
- data/lib/apollo_crawler/cache/caches.rb +20 -0
- data/lib/apollo_crawler/cache/sqlite_cache.rb +9 -0
- data/lib/apollo_crawler/config.rb +82 -72
- data/lib/apollo_crawler/crawler/crawlers.rb +20 -0
- data/lib/apollo_crawler/crawler/google_crawler.rb +2 -2
- data/lib/apollo_crawler/crawler/hacker_news_crawler.rb +2 -2
- data/lib/apollo_crawler/crawler/slashdot_crawler.rb +2 -2
- data/lib/apollo_crawler/crawler/stackoverflow_crawler.rb +2 -2
- data/lib/apollo_crawler/crawler/xkcd_crawler.rb +2 -2
- data/lib/apollo_crawler/crawler/youjizz_crawler.rb +2 -2
- data/lib/apollo_crawler/env.rb +24 -0
- data/lib/apollo_crawler/fetcher/base_fetcher.rb +1 -1
- data/lib/apollo_crawler/fetcher/fetchers.rb +20 -0
- data/lib/apollo_crawler/fetcher/simple_fetcher.rb +1 -1
- data/lib/apollo_crawler/fetcher/smart_fetcher.rb +1 -1
- data/lib/apollo_crawler/formatter/formatters.rb +20 -0
- data/lib/apollo_crawler/formatter/json_formatter.rb +5 -1
- data/lib/apollo_crawler/formatter/plain_formatter.rb +4 -0
- data/lib/apollo_crawler/formatter/table_formatter.rb +4 -0
- data/lib/apollo_crawler/helper/amqp_helper.rb +26 -0
- data/lib/apollo_crawler/helper/core_helper.rb +24 -4
- data/lib/apollo_crawler/helper/helpers.rb +23 -1
- data/lib/apollo_crawler/helper/mongo_helper.rb +26 -0
- data/lib/apollo_crawler/lib.rb +12 -3
- data/lib/apollo_crawler/logger/loggers.rb +20 -0
- data/lib/apollo_crawler/planner/base_planner.rb +26 -0
- data/lib/apollo_crawler/planner/planners.rb +22 -0
- data/lib/apollo_crawler/planner/smart_planner.rb +28 -0
- data/lib/apollo_crawler/program/base_program.rb +130 -0
- data/lib/apollo_crawler/program/console_program.rb +177 -0
- data/lib/apollo_crawler/program/crawler_program.rb +130 -183
- data/lib/apollo_crawler/program/platform_program.rb +137 -0
- data/lib/apollo_crawler/program/programs.rb +23 -1
- data/lib/apollo_crawler/store/stores.rb +20 -0
- data/lib/apollo_crawler/version.rb +2 -2
- metadata +55 -3
@@ -1,2 +1,24 @@
|
|
1
|
+
# Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
1
21
|
require File.join(File.dirname(__FILE__), 'base_program')
|
2
|
-
require File.join(File.dirname(__FILE__), '
|
22
|
+
require File.join(File.dirname(__FILE__), 'console_program')
|
23
|
+
require File.join(File.dirname(__FILE__), 'crawler_program')
|
24
|
+
require File.join(File.dirname(__FILE__), 'platform_program')
|
@@ -1 +1,21 @@
|
|
1
|
+
# Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in
|
11
|
+
# all copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
# THE SOFTWARE.
|
20
|
+
|
1
21
|
require File.join(File.dirname(__FILE__), 'base_store')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: apollo-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomas Korcak
|
@@ -262,6 +262,34 @@ dependencies:
|
|
262
262
|
- - ! '>='
|
263
263
|
- !ruby/object:Gem::Version
|
264
264
|
version: 1.5.2
|
265
|
+
- !ruby/object:Gem::Dependency
|
266
|
+
name: right_aws
|
267
|
+
requirement: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - ! '>='
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: 3.0.5
|
272
|
+
type: :runtime
|
273
|
+
prerelease: false
|
274
|
+
version_requirements: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - ! '>='
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: 3.0.5
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: right_http_connection
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - ! '>='
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: 1.3.0
|
286
|
+
type: :runtime
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - ! '>='
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: 1.3.0
|
265
293
|
- !ruby/object:Gem::Dependency
|
266
294
|
name: terminal-table
|
267
295
|
requirement: !ruby/object:Gem::Requirement
|
@@ -364,15 +392,31 @@ description: Gem for crawling data from external sources
|
|
364
392
|
email: korczis@gmail.com
|
365
393
|
executables:
|
366
394
|
- apollo-crawler
|
395
|
+
- apollo-platform
|
367
396
|
extensions: []
|
368
397
|
extra_rdoc_files: []
|
369
398
|
files:
|
399
|
+
- ./config/mongo.yml.default
|
400
|
+
- ./config/mongo.yml
|
401
|
+
- ./config/memcached.yml
|
402
|
+
- ./config/mongoid.yml
|
403
|
+
- ./config/apollo.yml.default
|
404
|
+
- ./config/amqp.yml.default
|
405
|
+
- ./config/memcached.yml.default
|
406
|
+
- ./config/amqp.yml
|
407
|
+
- ./config/mongoid.yml.default
|
408
|
+
- ./config/apollo.yml
|
370
409
|
- ./lib/apollo_crawler/fetcher/smart_fetcher.rb
|
371
410
|
- ./lib/apollo_crawler/fetcher/fetchers.rb
|
372
411
|
- ./lib/apollo_crawler/fetcher/simple_fetcher.rb
|
373
412
|
- ./lib/apollo_crawler/fetcher/base_fetcher.rb
|
413
|
+
- ./lib/apollo_crawler/planner/base_planner.rb
|
414
|
+
- ./lib/apollo_crawler/planner/planners.rb
|
415
|
+
- ./lib/apollo_crawler/planner/smart_planner.rb
|
374
416
|
- ./lib/apollo_crawler/lib.rb
|
375
417
|
- ./lib/apollo_crawler/version.rb
|
418
|
+
- ./lib/apollo_crawler/program/console_program.rb
|
419
|
+
- ./lib/apollo_crawler/program/platform_program.rb
|
376
420
|
- ./lib/apollo_crawler/program/crawler_program.rb
|
377
421
|
- ./lib/apollo_crawler/program/base_program.rb
|
378
422
|
- ./lib/apollo_crawler/program/programs.rb
|
@@ -380,8 +424,12 @@ files:
|
|
380
424
|
- ./lib/apollo_crawler/logger/base_logger.rb
|
381
425
|
- ./lib/apollo_crawler/logger/loggers.rb
|
382
426
|
- ./lib/apollo_crawler/helper/core_helper.rb
|
427
|
+
- ./lib/apollo_crawler/helper/amqp_helper.rb
|
383
428
|
- ./lib/apollo_crawler/helper/helpers.rb
|
384
|
-
- ./lib/apollo_crawler/
|
429
|
+
- ./lib/apollo_crawler/helper/mongo_helper.rb
|
430
|
+
- ./lib/apollo_crawler/adapter/adapters.rb
|
431
|
+
- ./lib/apollo_crawler/adapter/mongo_adapter.rb
|
432
|
+
- ./lib/apollo_crawler/adapter/amqp_adapter.rb
|
385
433
|
- ./lib/apollo_crawler/config.rb
|
386
434
|
- ./lib/apollo_crawler/cache/sqlite_cache.rb
|
387
435
|
- ./lib/apollo_crawler/cache/factory.rb
|
@@ -399,6 +447,9 @@ files:
|
|
399
447
|
- ./lib/apollo_crawler/crawler/base_crawler.rb
|
400
448
|
- ./lib/apollo_crawler/crawler/crawlers.rb
|
401
449
|
- ./lib/apollo_crawler/crawler/stackoverflow_crawler.rb
|
450
|
+
- ./lib/apollo_crawler/env.rb
|
451
|
+
- ./lib/apollo_crawler/agent/agents.rb
|
452
|
+
- ./lib/apollo_crawler/agent/base_agent.rb
|
402
453
|
- ./lib/apollo_crawler/formatter/table_formatter.rb
|
403
454
|
- ./lib/apollo_crawler/formatter/base_formatter.rb
|
404
455
|
- ./lib/apollo_crawler/formatter/json_formatter.rb
|
@@ -408,7 +459,8 @@ files:
|
|
408
459
|
- ./lib/apollo_crawler/store/base_store.rb
|
409
460
|
- ./lib/apollo_crawler.rb
|
410
461
|
- bin/apollo-crawler
|
411
|
-
|
462
|
+
- bin/apollo-platform
|
463
|
+
homepage: http://apollocrawler.com/
|
412
464
|
licenses:
|
413
465
|
- MIT
|
414
466
|
metadata: {}
|