apollo-crawler 0.1.22 → 0.1.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- MjNkN2YzMjExMDc4Mjg5ZDNhNGMxYTY0MTY1NjczMDQ0MzE4YmFlOQ==
5
- data.tar.gz: !binary |-
6
- ZWVlM2UyZjhhZDM4Y2ExZGExNzkzNjNjNjNkNTI4OTRkOTEzY2Y5NA==
7
- !binary "U0hBNTEy":
8
- metadata.gz: !binary |-
9
- ZDc0YmI0MjEyNTgzNTM0NmY2NGE2YTY0Y2MxNTdiNTBkN2M3ZDM2MDdmNjZm
10
- MjNhZDVlNmRkNDdkMDVhNzhjNzg0ZDQ2ZjRkYThhNzJlODMxNjE3NmE4MjVm
11
- ZDg0ZjFlZWFjM2I0NWQ0ZmIzYmM5ZmY0MTRiOGE2YTMwZDVmYzE=
12
- data.tar.gz: !binary |-
13
- N2I5ZGM1NjI2M2QxN2FmMDZkMThkOGU5NDE5MTEyOTNlZWFkNGQ1N2FlZjFi
14
- MTI5ZGNhNjdmZTAyZjAyYTVkZWFlNGJmZDk5YzA3ZjlhN2Q5MTc1NTIyNGVi
15
- M2VjODgxNzQyYTAxNzQ5NWQ5MTQzZjUxNWY5MWZlNDQzNjg2YmQ=
2
+ SHA1:
3
+ metadata.gz: 71fb379b6ae32ceb79e40cce451c8a3646278d32
4
+ data.tar.gz: 08fdec629298945a86993b91be3a743b880ad4a0
5
+ SHA512:
6
+ metadata.gz: d0789d2ef99358144c90d148c378d9bf53e3084b326ede425ed7ff171ab450a4ed9de7a86494dbf49992ed235807f92e6a795cc52676bd61280a006408fc4e90
7
+ data.tar.gz: 16ac99fc7e192fe137348c6d364e730c9c0071f274f200b395746c4247bb3958c7238b315b571a730014b5f8286602bbc67eb9f26db30c217440e15401d3ac95
@@ -0,0 +1,30 @@
1
+ #! /usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+
24
+ require "rubygems"
25
+ require "bundler/setup"
26
+
27
+ require File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler")
28
+
29
+ Apollo::ConsoleProgram.new.run(ARGV)
30
+
data/config/amqp.yml CHANGED
@@ -2,17 +2,18 @@ default: &default
2
2
  host: localhost
3
3
  username: guest
4
4
  password: guest
5
+ port: 5672
5
6
 
6
7
  development:
7
8
  <<: *default
8
- vhost: apollo-crawler-development
9
+ vhost: /apollo-crawler-development
9
10
 
10
11
  test:
11
12
  <<: *default
12
13
  host: apollo-crawler.no-ip.org
13
- vhost: apollo-crawler-test
14
+ vhost: /apollo-crawler-test
14
15
 
15
16
  production:
16
17
  <<: *default
17
18
  host: apollo-crawler.no-ip.org
18
- vhost: apollo-crawler-production
19
+ vhost: /apollo-crawler-production
data/config/deploy.rb ADDED
@@ -0,0 +1,61 @@
1
+ require 'capistrano'
2
+
3
+ require "rubygems"
4
+ require "bundler/setup"
5
+ require "bundler/capistrano"
6
+
7
+ # RVM integration
8
+ require "rvm/capistrano"
9
+
10
+ # Target ruby version
11
+ set :rvm_ruby_string, '2.0.0'
12
+
13
+ set :domain, "apollo-crawler.no-ip.org"
14
+ set :application, "apollo_platform"
15
+ # set :deploy_to, File.join(File.expand_path("~"), "/apps/#{application}")
16
+ set :deploy_to, "/home/ubuntu/apps/#{application}"
17
+
18
+ ssh_options[:keys] = [File.join(ENV["HOME"], ".ssh", "key-webs.pem")]
19
+
20
+ set :user, "ubuntu"
21
+ set :use_sudo, false
22
+
23
+ set :scm, :git
24
+ set :repository, "https://github.com/korczis/apollo-crawler.git"
25
+ set :branch, 'master'
26
+ set :git_shallow_clone, 1
27
+
28
+ role :web, domain
29
+ role :app, domain
30
+ role :db, domain, :primary => true
31
+
32
+ set :deploy_via, :remote_cache
33
+
34
+ namespace :deploy do
35
+ def remote_cmd(cmd)
36
+ run "cd #{deploy_to}/current && #{cmd}"
37
+ end
38
+
39
+ task :start, :roles => [:web, :app] do
40
+ puts "Starting.."
41
+ remote_cmd "./bin/apollo-platform -V"
42
+ end
43
+
44
+ task :stop, :roles => [:web, :app] do
45
+ puts "Stopping.."
46
+ end
47
+
48
+ task :status, :roles => [:web, :app] do
49
+ puts "Statusing.."
50
+ end
51
+
52
+ task :restart, :roles => [:web, :app] do
53
+ puts "Restarting.."
54
+ end
55
+
56
+ # This will make sure that Capistrano doesn't try to run rake:migrate (this is not a Rails project!)
57
+ task :cold do
58
+ deploy.update
59
+ deploy.start
60
+ end
61
+ end
data/config/mongoid.yml CHANGED
@@ -1,23 +1,26 @@
1
- default: &default
1
+ default: &default_options
2
2
  sessions:
3
3
  default:
4
4
  hosts:
5
5
  - apollo-crawler.no-ip.org:27017
6
6
 
7
7
  development:
8
- <<: *default
9
8
  sessions:
10
9
  default:
10
+ hosts:
11
+ - localhost:27017
11
12
  database: apollo-crawler-development
12
13
 
13
14
  test:
14
- <<: *default
15
15
  sessions:
16
16
  default:
17
- database: apollo-crawler-test
17
+ hosts:
18
+ - apollo-crawler.no-ip.org:27017
19
+ database: apollo-crawler-test
18
20
 
19
21
  production:
20
- <<: *default
21
22
  sessions:
22
23
  default:
23
- database: apollo-crawler-production
24
+ hosts:
25
+ - apollo-crawler.no-ip.org:27017
26
+ database: apollo-crawler-production
@@ -48,6 +48,9 @@ require File.join(File.dirname(__FILE__), 'apollo_crawler/helper/helpers')
48
48
  # Loggers
49
49
  require File.join(File.dirname(__FILE__), 'apollo_crawler/logger/loggers')
50
50
 
51
+ # Models
52
+ require File.join(File.dirname(__FILE__), 'apollo_crawler/model/models')
53
+
51
54
  # Planner
52
55
  require File.join(File.dirname(__FILE__), 'apollo_crawler/planner/planners')
53
56
 
@@ -18,4 +18,5 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
- require File.join(File.dirname(__FILE__), 'base_agent')
21
+ require File.join(File.dirname(__FILE__), 'base_agent')
22
+ require File.join(File.dirname(__FILE__), 'fetcher_agent')
@@ -21,6 +21,9 @@
21
21
  module Apollo
22
22
  module Agent
23
23
  class BaseAgent
24
+ def run(amqp)
25
+ return 0
26
+ end
24
27
  end # class BaseAgent
25
28
  end # module Agent
26
29
  end # module Apollo
@@ -0,0 +1,55 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_agent')
22
+ require File.join(File.dirname(__FILE__), '../fetcher/fetchers')
23
+
24
+ module Apollo
25
+ module Agent
26
+ class FetcherAgent < BaseAgent
27
+ attr_accessor :fetcher
28
+
29
+ def initialize(amqp, opts={})
30
+ self.fetcher = Apollo::Fetcher::SmartFetcher.new
31
+
32
+ if(opts[:verbose])
33
+ puts "Initializing fetcher agent..."
34
+ end
35
+
36
+ ch = amqp.create_channel
37
+ q = ch.queue("fetcher", :auto_delete => false, :durable => true)
38
+ x = ch.default_exchange
39
+
40
+ q.subscribe do |delivery_info, metadata, payload|
41
+ res = nil
42
+
43
+ puts "Received #{payload}" if opts[:verbose]
44
+
45
+ Thread.new do |t|
46
+ queued_url = JSON.parse(payload)
47
+ # puts queued_url["url"]
48
+ # res = Apollo::Fetcher::SmartFetcher::fetch(queued_url["url"])
49
+ # puts "#{queued_url['url']} - " + res.inspect
50
+ end
51
+ end
52
+ end
53
+ end # class FetcherAgent
54
+ end # module Agent
55
+ end # module Apollo
@@ -87,8 +87,8 @@ module Apollo
87
87
  }
88
88
 
89
89
  # Used caching mechanism by default
90
- CACHE_CLASS = Apollo::Cache::SqliteCache
91
- CACHE_CLASS_OPTIONS = CACHE_CLASS_OPTIONS_MONGO
90
+ CACHE_CLASS = Apollo::Cache::MemcachedCache
91
+ CACHE_CLASS_OPTIONS = nil
92
92
 
93
93
  ############################################################
94
94
  # Crawlers - Built-in out-of box working crawlers
@@ -18,9 +18,30 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
+ require 'amqp'
22
+ require 'bunny'
23
+ require 'thread'
24
+
21
25
  module Apollo
22
26
  module Helper
23
27
  module Amqp
28
+ def self.connect(conn, opts={})
29
+ res = nil
30
+
31
+ if(opts[:verbose])
32
+ puts "AMQP Connecting - #{conn.inspect}"
33
+ end
34
+
35
+ res = Bunny.new(:host => conn['host'], :user => conn['username'], :password => conn['password'], :vhost => conn['vhost'], :port => conn['port'])
36
+ res.start
37
+
38
+ sleep(0.001) until res
39
+ if(opts[:verbose])
40
+ puts "AMQP connected - #{res.inspect}"
41
+ end
42
+
43
+ return res
44
+ end
24
45
  end # Amqp
25
46
  end # module Helper
26
47
  end # module Apollo
@@ -24,6 +24,14 @@ class Class
24
24
  end
25
25
  end
26
26
 
27
+ class String
28
+ def to_class
29
+ self.split('::').inject(Object) do |mod, class_name|
30
+ mod.const_get(class_name)
31
+ end
32
+ end
33
+ end
34
+
27
35
  module Apollo
28
36
  module Helper
29
37
  module Core
@@ -18,9 +18,25 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
+ require 'mongo'
22
+ require 'mongoid'
23
+
21
24
  module Apollo
22
25
  module Helper
23
26
  module Mongo
27
+ def self.connect(conn, opts={})
28
+ if(opts[:verbose])
29
+ puts "MongoDB connecting - '#{conn.inspect}"
30
+ end
31
+
32
+ res = ::Mongo::Connection.new(conn['host'])
33
+
34
+ if(opts[:verbose])
35
+ puts "MongoDB connected: #{res.inspect}"
36
+ end
37
+
38
+ return res
39
+ end
24
40
  end # Mongo
25
41
  end # module Helper
26
42
  end # module Apollo
@@ -45,6 +45,9 @@ require File.join(File.dirname(__FILE__), 'helper/helpers')
45
45
  # Loggers
46
46
  require File.join(File.dirname(__FILE__), 'logger/loggers')
47
47
 
48
+ # Models
49
+ require File.join(File.dirname(__FILE__), 'model/models')
50
+
48
51
  # Programs
49
52
  require File.join(File.dirname(__FILE__), 'planner/planners')
50
53
 
@@ -0,0 +1,29 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require 'mongo'
22
+ require 'mongoid'
23
+
24
+ module Apollo
25
+ module Model
26
+ class BaseModel
27
+ end # class BaseModel
28
+ end # module Model
29
+ end # module Apollo
@@ -0,0 +1,39 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
23
+ module Apollo
24
+ module Model
25
+ class Crawler < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "crawlers"
30
+
31
+ field :name
32
+ field :class_name
33
+ field :source
34
+
35
+ # Indexes
36
+ index({ created_at: 1, updated_at: 1, name: 1, class_name: 1 })
37
+ end # class Crawler
38
+ end # module Model
39
+ end # module Apollo
@@ -0,0 +1,24 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+ require File.join(File.dirname(__FILE__), 'crawler')
23
+ require File.join(File.dirname(__FILE__), 'queued_url')
24
+ require File.join(File.dirname(__FILE__), 'raw_document')
@@ -0,0 +1,38 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
23
+ module Apollo
24
+ module Model
25
+ class QueuedUrl < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "queued_urls"
30
+
31
+ field :url
32
+ field :state
33
+
34
+ # Indexes
35
+ index({ created_at: 1, updated_at: 1 })
36
+ end # class QueuedUrl
37
+ end # module Model
38
+ end # module Apollo
@@ -0,0 +1,37 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
23
+ module Apollo
24
+ module Model
25
+ class RawDocument < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "raw_docs"
30
+
31
+ field :body
32
+
33
+ # Indexes
34
+ index({ created_at: 1, updated_at: 1 })
35
+ end # class RawDocument
36
+ end # module Model
37
+ end # module Apollo
@@ -20,7 +20,10 @@
20
20
 
21
21
  module Apollo
22
22
  module Planner
23
- class BasePlanner
23
+ class BasePlanner
24
+ def run(amqp, mongo)
25
+ return 0
26
+ end
24
27
  end # class BasePlanner
25
28
  end # module Planner
26
29
  end # module Apollo
@@ -20,9 +20,58 @@
20
20
 
21
21
  require File.join(File.dirname(__FILE__),'base_planner')
22
22
 
23
+ require File.join(File.dirname(__FILE__),'../model/models.rb')
24
+
23
25
  module Apollo
24
26
  module Planner
25
27
  class SmartPlanner < BasePlanner
28
+ attr_accessor :amqp
29
+ attr_accessor :mongo
30
+
31
+ def initialize(amqp=nil, mongo=nil)
32
+ self.amqp = amqp
33
+ self.mongo = mongo
34
+ end
35
+
36
+ def fetch_url(url, opts={})
37
+ puts "AMQP fetching '#{url.inspect}'"
38
+
39
+ ch = amqp.create_channel
40
+ x = ch.default_exchange
41
+ x.publish(url.to_json, :routing_key => "fetcher")
42
+
43
+ end
44
+
45
+ def fetch_queued_urls(opts={})
46
+ urls = Apollo::Model::QueuedUrl.where({:state => :queued})
47
+ return if urls.count < 1
48
+
49
+ if(opts[:verbose])
50
+ puts "Fetching Queued URLS"
51
+ end
52
+
53
+ puts "Count of URLs in Queue: #{urls.count}" if opts[:verbose]
54
+
55
+ urls.each do |url|
56
+ url.state = :fetching
57
+ url.save
58
+
59
+ fetch_url(url, opts)
60
+
61
+ # puts "Removing URL from Queue '#{url.inspect}'" if opts[:verbose]
62
+ end
63
+ end
64
+
65
+ def run(opts={})
66
+ request_exit = false
67
+
68
+ while request_exit == false
69
+ fetch_queued_urls(opts)
70
+ sleep 1
71
+ end
72
+
73
+ return 0
74
+ end
26
75
  end # class SmartPlanner
27
76
  end # module Planner
28
77
  end # module Apollo
@@ -20,6 +20,8 @@
20
20
 
21
21
  require 'yaml'
22
22
 
23
+ require File.join(File.dirname(__FILE__), "../model/models.rb")
24
+
23
25
  module Apollo
24
26
  class BaseProgram
25
27
  CONFIG_DIR = File.join(Apollo::BASE_DIR, "config")
@@ -33,16 +35,16 @@ module Apollo
33
35
  attr_accessor :options
34
36
  attr_accessor :optparser
35
37
 
38
+ attr_accessor :amqp
36
39
  attr_accessor :mongo
37
- attr_accessor :mongo_db
38
40
 
39
41
  def initialize
40
42
  self.config = {}
41
43
  self.options = DEFAULT_OPTIONS
42
44
  self.optparser = nil
43
45
 
46
+ self.amqp = nil
44
47
  self.mongo = nil
45
- self.mongo_db = nil
46
48
  end
47
49
 
48
50
  def self.get_config_path(config)
@@ -113,6 +115,55 @@ module Apollo
113
115
  return nil
114
116
  end
115
117
 
118
+ def init_amqp()
119
+ conn_opts = self.config["amqp"]
120
+ if(conn_opts)
121
+ self.amqp = Apollo::Helper::Amqp::connect(conn_opts, self.options)
122
+ end
123
+
124
+ return self.amqp
125
+ end
126
+
127
+ def init_mongo()
128
+ conn_opts = self.config["mongo"]
129
+ if(conn_opts)
130
+ self.mongo = Apollo::Helper::Mongo::connect(conn_opts, self.options)
131
+
132
+ # Init Mongoid
133
+ path = File.join(Apollo::BASE_DIR, "config/mongoid.yml")
134
+ Mongoid.load!(path, @options[:env])
135
+ end
136
+
137
+ return self.mongo
138
+ end
139
+
140
+ def init_seeds_crawlers(opts={})
141
+ objs = Apollo::Crawler::BaseCrawler.subclasses
142
+ objs.each do |o|
143
+ crawler = Apollo::Model::Crawler.new
144
+ i = o.new
145
+ crawler.name = i.name
146
+ crawler.class_name = o.to_s
147
+
148
+ res = Apollo::Model::Crawler.where(class_name: crawler.class_name)
149
+ # puts "RES: '#{res.inspect}'"
150
+ if(res.nil? || res.count < 1)
151
+ crawler.save
152
+ if(opts[:verbose])
153
+ puts "Adding new crawler - '#{crawler.inspect}'"
154
+ end
155
+ else
156
+ if(opts[:verbose])
157
+ puts "Using crawler - '#{res[0].inspect}'"
158
+ end
159
+ end
160
+ end
161
+ end
162
+
163
+ def init_seeds(opts={})
164
+ init_seeds_crawlers(opts)
165
+ end
166
+
116
167
  # Init program
117
168
  def init_program(args)
118
169
  res = nil
@@ -134,6 +185,12 @@ module Apollo
134
185
  # Init Mongo Connection
135
186
  init_mongo()
136
187
 
188
+ # Init AMQP
189
+ init_amqp()
190
+
191
+ # Init Seed data
192
+ init_seeds(@options)
193
+
137
194
  return nil
138
195
  end
139
196
 
@@ -45,16 +45,6 @@ require File.join(File.dirname(__FILE__), '..', 'version')
45
45
 
46
46
  require File.join(File.dirname(__FILE__),'base_program')
47
47
 
48
-
49
- # Hack
50
- class String
51
- def to_class
52
- self.split('::').inject(Object) do |mod, class_name|
53
- mod.const_get(class_name)
54
- end
55
- end
56
- end
57
-
58
48
  module Apollo
59
49
  # Apollo Crawler Base Directory
60
50
  APOLLO_CRAWLER_BASE_DIR = File.join(File.dirname(__FILE__), "..")
@@ -174,6 +164,8 @@ module Apollo
174
164
 
175
165
  opts.on('-i', '--include [PATH]', 'Include additional crawler or crawler directory') do |path|
176
166
  @options[:crawler_dirs] << path
167
+
168
+ init_additional_crawlers([path])
177
169
  end
178
170
 
179
171
  opts.on('-n', '--doc-limit [NUM]', 'Limit count of documents to be processed') do |count|
@@ -406,6 +398,22 @@ module Apollo
406
398
  end
407
399
  end
408
400
 
401
+ def init_additional_crawlers(dirs)
402
+ # puts "Initializing aditional crawlers ..."
403
+ dirs.each do |dir|
404
+ if(@options[:verbose])
405
+ puts "Registering additional crawler dir '#{dir}'"
406
+ end
407
+
408
+ Dir.glob("#{dir}/*.rb").each do |f|
409
+ if(@options[:verbose])
410
+ puts "Registering crawler '#{f}'"
411
+ end
412
+ require f
413
+ end
414
+ end
415
+ end
416
+
409
417
  # Init program
410
418
  def init_program(args)
411
419
  init_options()
@@ -80,6 +80,10 @@ module Apollo
80
80
  self.options[:env] = name
81
81
  end
82
82
 
83
+ opts.on('-d', '--daemon', 'Run Apollo Platform daemon') do
84
+ self.options[:daemon] = true
85
+ end
86
+
83
87
  opts.on('-v', '--verbose', 'Enable verbose output') do
84
88
  self.options[:verbose] = true
85
89
  end
@@ -90,6 +94,40 @@ module Apollo
90
94
  end
91
95
  end
92
96
 
97
+ def enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={})
98
+ crawlers.each do |crawler|
99
+ i = crawler.new
100
+ puts "Queuying Crawler base URL: '#{i.url}'" if opts[:verbose]
101
+ qu = Apollo::Model::QueuedUrl.new(:url => i.url, :state => :queued)
102
+ qu.save
103
+ end
104
+ end
105
+
106
+ def init_fetchers(amqp, opts={})
107
+ fetchers = []
108
+ fetchers << Apollo::Agent::FetcherAgent.new(amqp, self.options)
109
+
110
+ enqueue_crawlers_urls(amqp, Apollo::Crawler::BaseCrawler.subclasses, opts)
111
+
112
+ # ch = self.amqp.create_channel
113
+ # x = ch.default_exchange
114
+ # x.publish("Hello!", :routing_key => "fetcher")
115
+ end
116
+
117
+ def init_agents(amqp, opts={})
118
+ puts "Initializing agents"
119
+
120
+ init_fetchers(amqp, opts)
121
+ end
122
+
123
+ def init_program(args)
124
+ res = super(args)
125
+ return res unless res.nil?
126
+
127
+ init_agents(self.amqp, self.options)
128
+ return nil
129
+ end
130
+
93
131
  def process_options(args)
94
132
  if(self.options[:version])
95
133
  puts Apollo::VERSION
@@ -105,30 +143,21 @@ module Apollo
105
143
  return nil
106
144
  end
107
145
 
108
- def init_mongo()
109
- self.mongo = Mongo::Connection.new(self.config['mongo']['host'])
110
- self.mongo_db = self.mongo.db(self.config['mongo']['db'])
111
-
112
- if(self.options[:verbose])
113
- puts "(Mongo) Connection Inited: #{self.mongo.inspect}"
114
- puts "(Mongo) Database Inited: #{self.mongo_db.inspect}"
115
- end
116
-
117
- return self.mongo
118
- end
119
-
120
146
  # Run Program
121
147
  def run(args = ARGV)
122
148
  res = super(args)
123
149
  return res unless res.nil?
124
-
125
- # Print classes
126
- # puts Apollo::Crawler::BaseCrawler.subclasses.inspect
127
150
 
128
151
  # Here we start
129
- if(ARGV.length < 1)
130
- puts optparser
131
- return 0
152
+ # if(ARGV.length < 1)
153
+ # puts optparser
154
+ # return 0
155
+ # end
156
+
157
+ res_code = 0
158
+ if(self.options[:daemon])
159
+ planner = Apollo::Planner::SmartPlanner.new(self.amqp, self.mongo)
160
+ res_code = planner.run(self.options)
132
161
  end
133
162
 
134
163
  return request_exit(res_code)
@@ -19,5 +19,5 @@
19
19
  # THE SOFTWARE.
20
20
 
21
21
  module Apollo
22
- VERSION = '0.1.22'
22
+ VERSION = '0.1.24'
23
23
  end # Apollo
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.22
4
+ version: 0.1.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomas Korcak
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-04 00:00:00.000000000 Z
11
+ date: 2013-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: awesome_print
@@ -28,28 +28,28 @@ dependencies:
28
28
  name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: 3.2.12
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ! '>='
38
+ - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.2.12
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: dalli
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ! '>='
45
+ - - '>='
46
46
  - !ruby/object:Gem::Version
47
47
  version: 2.6.2
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ! '>='
52
+ - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: 2.6.2
55
55
  - !ruby/object:Gem::Dependency
@@ -70,42 +70,42 @@ dependencies:
70
70
  name: eventmachine
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ! '>='
73
+ - - '>='
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ! '>='
80
+ - - '>='
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: em-http-request
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ! '>='
87
+ - - '>='
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ! '>='
94
+ - - '>='
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: em-synchrony
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ! '>='
101
+ - - '>='
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ! '>='
108
+ - - '>='
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
@@ -154,56 +154,56 @@ dependencies:
154
154
  name: memcache-client
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
- - - ! '>='
157
+ - - '>='
158
158
  - !ruby/object:Gem::Version
159
159
  version: '0'
160
160
  type: :runtime
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
- - - ! '>='
164
+ - - '>='
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  - !ruby/object:Gem::Dependency
168
168
  name: mongo
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
- - - ! '>='
171
+ - - '>='
172
172
  - !ruby/object:Gem::Version
173
173
  version: 1.8.2
174
174
  type: :runtime
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
- - - ! '>='
178
+ - - '>='
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.8.2
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: mongoid
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
- - - ! '>='
185
+ - - '>='
186
186
  - !ruby/object:Gem::Version
187
187
  version: 3.1.2
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
- - - ! '>='
192
+ - - '>='
193
193
  - !ruby/object:Gem::Version
194
194
  version: 3.1.2
195
195
  - !ruby/object:Gem::Dependency
196
196
  name: mime-types
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
- - - ! '>='
199
+ - - '>='
200
200
  - !ruby/object:Gem::Version
201
201
  version: '0'
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
- - - ! '>='
206
+ - - '>='
207
207
  - !ruby/object:Gem::Version
208
208
  version: '0'
209
209
  - !ruby/object:Gem::Dependency
@@ -224,70 +224,70 @@ dependencies:
224
224
  name: openurl
225
225
  requirement: !ruby/object:Gem::Requirement
226
226
  requirements:
227
- - - ! '>='
227
+ - - '>='
228
228
  - !ruby/object:Gem::Version
229
229
  version: 0.4.2
230
230
  type: :runtime
231
231
  prerelease: false
232
232
  version_requirements: !ruby/object:Gem::Requirement
233
233
  requirements:
234
- - - ! '>='
234
+ - - '>='
235
235
  - !ruby/object:Gem::Version
236
236
  version: 0.4.2
237
237
  - !ruby/object:Gem::Dependency
238
238
  name: parallel
239
239
  requirement: !ruby/object:Gem::Requirement
240
240
  requirements:
241
- - - ! '>='
241
+ - - '>='
242
242
  - !ruby/object:Gem::Version
243
243
  version: 0.6.2
244
244
  type: :runtime
245
245
  prerelease: false
246
246
  version_requirements: !ruby/object:Gem::Requirement
247
247
  requirements:
248
- - - ! '>='
248
+ - - '>='
249
249
  - !ruby/object:Gem::Version
250
250
  version: 0.6.2
251
251
  - !ruby/object:Gem::Dependency
252
252
  name: rack
253
253
  requirement: !ruby/object:Gem::Requirement
254
254
  requirements:
255
- - - ! '>='
255
+ - - '>='
256
256
  - !ruby/object:Gem::Version
257
257
  version: 1.5.2
258
258
  type: :runtime
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
- - - ! '>='
262
+ - - '>='
263
263
  - !ruby/object:Gem::Version
264
264
  version: 1.5.2
265
265
  - !ruby/object:Gem::Dependency
266
266
  name: right_aws
267
267
  requirement: !ruby/object:Gem::Requirement
268
268
  requirements:
269
- - - ! '>='
269
+ - - '>='
270
270
  - !ruby/object:Gem::Version
271
271
  version: 3.0.5
272
272
  type: :runtime
273
273
  prerelease: false
274
274
  version_requirements: !ruby/object:Gem::Requirement
275
275
  requirements:
276
- - - ! '>='
276
+ - - '>='
277
277
  - !ruby/object:Gem::Version
278
278
  version: 3.0.5
279
279
  - !ruby/object:Gem::Dependency
280
280
  name: right_http_connection
281
281
  requirement: !ruby/object:Gem::Requirement
282
282
  requirements:
283
- - - ! '>='
283
+ - - '>='
284
284
  - !ruby/object:Gem::Version
285
285
  version: 1.3.0
286
286
  type: :runtime
287
287
  prerelease: false
288
288
  version_requirements: !ruby/object:Gem::Requirement
289
289
  requirements:
290
- - - ! '>='
290
+ - - '>='
291
291
  - !ruby/object:Gem::Version
292
292
  version: 1.3.0
293
293
  - !ruby/object:Gem::Dependency
@@ -336,146 +336,155 @@ dependencies:
336
336
  name: ffi
337
337
  requirement: !ruby/object:Gem::Requirement
338
338
  requirements:
339
- - - ! '>='
339
+ - - '>='
340
340
  - !ruby/object:Gem::Version
341
341
  version: 1.4.0
342
342
  type: :runtime
343
343
  prerelease: false
344
344
  version_requirements: !ruby/object:Gem::Requirement
345
345
  requirements:
346
- - - ! '>='
346
+ - - '>='
347
347
  - !ruby/object:Gem::Version
348
348
  version: 1.4.0
349
349
  - !ruby/object:Gem::Dependency
350
350
  name: guard
351
351
  requirement: !ruby/object:Gem::Requirement
352
352
  requirements:
353
- - - ! '>='
353
+ - - '>='
354
354
  - !ruby/object:Gem::Version
355
355
  version: 1.6.2
356
356
  type: :development
357
357
  prerelease: false
358
358
  version_requirements: !ruby/object:Gem::Requirement
359
359
  requirements:
360
- - - ! '>='
360
+ - - '>='
361
361
  - !ruby/object:Gem::Version
362
362
  version: 1.6.2
363
363
  - !ruby/object:Gem::Dependency
364
364
  name: guard-rake
365
365
  requirement: !ruby/object:Gem::Requirement
366
366
  requirements:
367
- - - ! '>='
367
+ - - '>='
368
368
  - !ruby/object:Gem::Version
369
369
  version: 0.0.7
370
370
  type: :development
371
371
  prerelease: false
372
372
  version_requirements: !ruby/object:Gem::Requirement
373
373
  requirements:
374
- - - ! '>='
374
+ - - '>='
375
375
  - !ruby/object:Gem::Version
376
376
  version: 0.0.7
377
377
  - !ruby/object:Gem::Dependency
378
378
  name: guard-rspec
379
379
  requirement: !ruby/object:Gem::Requirement
380
380
  requirements:
381
- - - ! '>='
381
+ - - '>='
382
382
  - !ruby/object:Gem::Version
383
383
  version: 2.5.0
384
384
  type: :development
385
385
  prerelease: false
386
386
  version_requirements: !ruby/object:Gem::Requirement
387
387
  requirements:
388
- - - ! '>='
388
+ - - '>='
389
389
  - !ruby/object:Gem::Version
390
390
  version: 2.5.0
391
391
  description: Gem for crawling data from external sources
392
392
  email: korczis@gmail.com
393
393
  executables:
394
+ - apollo-console
394
395
  - apollo-crawler
395
396
  - apollo-platform
396
397
  extensions: []
397
398
  extra_rdoc_files: []
398
399
  files:
399
- - ./config/mongo.yml.default
400
- - ./config/mongo.yml
401
- - ./config/memcached.yml
402
- - ./config/mongoid.yml
403
- - ./config/apollo.yml.default
400
+ - ./config/amqp.yml
404
401
  - ./config/amqp.yml.default
402
+ - ./config/apollo.yml
403
+ - ./config/apollo.yml.default
404
+ - ./config/deploy.rb
405
+ - ./config/memcached.yml
405
406
  - ./config/memcached.yml.default
406
- - ./config/amqp.yml
407
+ - ./config/mongo.yml
408
+ - ./config/mongo.yml.default
409
+ - ./config/mongoid.yml
407
410
  - ./config/mongoid.yml.default
408
- - ./config/apollo.yml
409
- - ./lib/apollo_crawler/fetcher/smart_fetcher.rb
410
- - ./lib/apollo_crawler/fetcher/fetchers.rb
411
- - ./lib/apollo_crawler/fetcher/simple_fetcher.rb
412
- - ./lib/apollo_crawler/fetcher/base_fetcher.rb
413
- - ./lib/apollo_crawler/planner/base_planner.rb
414
- - ./lib/apollo_crawler/planner/planners.rb
415
- - ./lib/apollo_crawler/planner/smart_planner.rb
416
- - ./lib/apollo_crawler/lib.rb
417
- - ./lib/apollo_crawler/version.rb
418
- - ./lib/apollo_crawler/program/console_program.rb
419
- - ./lib/apollo_crawler/program/platform_program.rb
420
- - ./lib/apollo_crawler/program/crawler_program.rb
421
- - ./lib/apollo_crawler/program/base_program.rb
422
- - ./lib/apollo_crawler/program/programs.rb
423
- - ./lib/apollo_crawler/logger/console_logger.rb
424
- - ./lib/apollo_crawler/logger/base_logger.rb
425
- - ./lib/apollo_crawler/logger/loggers.rb
426
- - ./lib/apollo_crawler/helper/core_helper.rb
427
- - ./lib/apollo_crawler/helper/amqp_helper.rb
428
- - ./lib/apollo_crawler/helper/helpers.rb
429
- - ./lib/apollo_crawler/helper/mongo_helper.rb
411
+ - ./lib/apollo_crawler.rb
430
412
  - ./lib/apollo_crawler/adapter/adapters.rb
431
- - ./lib/apollo_crawler/adapter/mongo_adapter.rb
432
413
  - ./lib/apollo_crawler/adapter/amqp_adapter.rb
433
- - ./lib/apollo_crawler/config.rb
434
- - ./lib/apollo_crawler/cache/sqlite_cache.rb
414
+ - ./lib/apollo_crawler/adapter/mongo_adapter.rb
415
+ - ./lib/apollo_crawler/agent/agents.rb
416
+ - ./lib/apollo_crawler/agent/base_agent.rb
417
+ - ./lib/apollo_crawler/agent/fetcher_agent.rb
418
+ - ./lib/apollo_crawler/cache/base_cache.rb
419
+ - ./lib/apollo_crawler/cache/caches.rb
435
420
  - ./lib/apollo_crawler/cache/factory.rb
436
- - ./lib/apollo_crawler/cache/null_cache.rb
421
+ - ./lib/apollo_crawler/cache/memcached_cache.rb
437
422
  - ./lib/apollo_crawler/cache/memory_cache.rb
438
- - ./lib/apollo_crawler/cache/base_cache.rb
439
423
  - ./lib/apollo_crawler/cache/mongo_cache.rb
440
- - ./lib/apollo_crawler/cache/memcached_cache.rb
441
- - ./lib/apollo_crawler/cache/caches.rb
442
- - ./lib/apollo_crawler/crawler/xkcd_crawler.rb
443
- - ./lib/apollo_crawler/crawler/google_crawler.rb
444
- - ./lib/apollo_crawler/crawler/youjizz_crawler.rb
445
- - ./lib/apollo_crawler/crawler/slashdot_crawler.rb
446
- - ./lib/apollo_crawler/crawler/hacker_news_crawler.rb
424
+ - ./lib/apollo_crawler/cache/null_cache.rb
425
+ - ./lib/apollo_crawler/cache/sqlite_cache.rb
426
+ - ./lib/apollo_crawler/config.rb
447
427
  - ./lib/apollo_crawler/crawler/base_crawler.rb
448
428
  - ./lib/apollo_crawler/crawler/crawlers.rb
429
+ - ./lib/apollo_crawler/crawler/google_crawler.rb
430
+ - ./lib/apollo_crawler/crawler/hacker_news_crawler.rb
431
+ - ./lib/apollo_crawler/crawler/slashdot_crawler.rb
449
432
  - ./lib/apollo_crawler/crawler/stackoverflow_crawler.rb
433
+ - ./lib/apollo_crawler/crawler/xkcd_crawler.rb
434
+ - ./lib/apollo_crawler/crawler/youjizz_crawler.rb
450
435
  - ./lib/apollo_crawler/env.rb
451
- - ./lib/apollo_crawler/agent/agents.rb
452
- - ./lib/apollo_crawler/agent/base_agent.rb
453
- - ./lib/apollo_crawler/formatter/table_formatter.rb
436
+ - ./lib/apollo_crawler/fetcher/base_fetcher.rb
437
+ - ./lib/apollo_crawler/fetcher/fetchers.rb
438
+ - ./lib/apollo_crawler/fetcher/simple_fetcher.rb
439
+ - ./lib/apollo_crawler/fetcher/smart_fetcher.rb
454
440
  - ./lib/apollo_crawler/formatter/base_formatter.rb
441
+ - ./lib/apollo_crawler/formatter/formatters.rb
455
442
  - ./lib/apollo_crawler/formatter/json_formatter.rb
456
443
  - ./lib/apollo_crawler/formatter/plain_formatter.rb
457
- - ./lib/apollo_crawler/formatter/formatters.rb
458
- - ./lib/apollo_crawler/store/stores.rb
444
+ - ./lib/apollo_crawler/formatter/table_formatter.rb
445
+ - ./lib/apollo_crawler/helper/amqp_helper.rb
446
+ - ./lib/apollo_crawler/helper/core_helper.rb
447
+ - ./lib/apollo_crawler/helper/helpers.rb
448
+ - ./lib/apollo_crawler/helper/mongo_helper.rb
449
+ - ./lib/apollo_crawler/lib.rb
450
+ - ./lib/apollo_crawler/logger/base_logger.rb
451
+ - ./lib/apollo_crawler/logger/console_logger.rb
452
+ - ./lib/apollo_crawler/logger/loggers.rb
453
+ - ./lib/apollo_crawler/model/base_model.rb
454
+ - ./lib/apollo_crawler/model/crawler.rb
455
+ - ./lib/apollo_crawler/model/models.rb
456
+ - ./lib/apollo_crawler/model/queued_url.rb
457
+ - ./lib/apollo_crawler/model/raw_document.rb
458
+ - ./lib/apollo_crawler/planner/base_planner.rb
459
+ - ./lib/apollo_crawler/planner/planners.rb
460
+ - ./lib/apollo_crawler/planner/smart_planner.rb
461
+ - ./lib/apollo_crawler/program/base_program.rb
462
+ - ./lib/apollo_crawler/program/console_program.rb
463
+ - ./lib/apollo_crawler/program/crawler_program.rb
464
+ - ./lib/apollo_crawler/program/platform_program.rb
465
+ - ./lib/apollo_crawler/program/programs.rb
459
466
  - ./lib/apollo_crawler/store/base_store.rb
460
- - ./lib/apollo_crawler.rb
467
+ - ./lib/apollo_crawler/store/stores.rb
468
+ - ./lib/apollo_crawler/version.rb
469
+ - bin/apollo-console
461
470
  - bin/apollo-crawler
462
471
  - bin/apollo-platform
463
472
  homepage: http://apollocrawler.com/
464
473
  licenses:
465
474
  - MIT
466
475
  metadata: {}
467
- post_install_message: Thanks for installing Apollo Crawler!
476
+ post_install_message: Thank you for installing Apollo Crawler!
468
477
  rdoc_options: []
469
478
  require_paths:
470
479
  - lib
471
480
  required_ruby_version: !ruby/object:Gem::Requirement
472
481
  requirements:
473
- - - ! '>='
482
+ - - '>='
474
483
  - !ruby/object:Gem::Version
475
484
  version: 1.9.3
476
485
  required_rubygems_version: !ruby/object:Gem::Requirement
477
486
  requirements:
478
- - - ! '>='
487
+ - - '>='
479
488
  - !ruby/object:Gem::Version
480
489
  version: 1.8.11
481
490
  requirements: []