apollo-crawler 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- MjNkN2YzMjExMDc4Mjg5ZDNhNGMxYTY0MTY1NjczMDQ0MzE4YmFlOQ==
5
- data.tar.gz: !binary |-
6
- ZWVlM2UyZjhhZDM4Y2ExZGExNzkzNjNjNjNkNTI4OTRkOTEzY2Y5NA==
7
- !binary "U0hBNTEy":
8
- metadata.gz: !binary |-
9
- ZDc0YmI0MjEyNTgzNTM0NmY2NGE2YTY0Y2MxNTdiNTBkN2M3ZDM2MDdmNjZm
10
- MjNhZDVlNmRkNDdkMDVhNzhjNzg0ZDQ2ZjRkYThhNzJlODMxNjE3NmE4MjVm
11
- ZDg0ZjFlZWFjM2I0NWQ0ZmIzYmM5ZmY0MTRiOGE2YTMwZDVmYzE=
12
- data.tar.gz: !binary |-
13
- N2I5ZGM1NjI2M2QxN2FmMDZkMThkOGU5NDE5MTEyOTNlZWFkNGQ1N2FlZjFi
14
- MTI5ZGNhNjdmZTAyZjAyYTVkZWFlNGJmZDk5YzA3ZjlhN2Q5MTc1NTIyNGVi
15
- M2VjODgxNzQyYTAxNzQ5NWQ5MTQzZjUxNWY5MWZlNDQzNjg2YmQ=
2
+ SHA1:
3
+ metadata.gz: 71fb379b6ae32ceb79e40cce451c8a3646278d32
4
+ data.tar.gz: 08fdec629298945a86993b91be3a743b880ad4a0
5
+ SHA512:
6
+ metadata.gz: d0789d2ef99358144c90d148c378d9bf53e3084b326ede425ed7ff171ab450a4ed9de7a86494dbf49992ed235807f92e6a795cc52676bd61280a006408fc4e90
7
+ data.tar.gz: 16ac99fc7e192fe137348c6d364e730c9c0071f274f200b395746c4247bb3958c7238b315b571a730014b5f8286602bbc67eb9f26db30c217440e15401d3ac95
@@ -0,0 +1,30 @@
1
+ #! /usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ # of this software and associated documentation files (the "Software"), to deal
8
+ # in the Software without restriction, including without limitation the rights
9
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ # copies of the Software, and to permit persons to whom the Software is
11
+ # furnished to do so, subject to the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be included in
14
+ # all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ # THE SOFTWARE.
23
+
24
+ require "rubygems"
25
+ require "bundler/setup"
26
+
27
+ require File.join(File.dirname(__FILE__), "..", "lib", "apollo_crawler")
28
+
29
+ Apollo::ConsoleProgram.new.run(ARGV)
30
+
data/config/amqp.yml CHANGED
@@ -2,17 +2,18 @@ default: &default
2
2
  host: localhost
3
3
  username: guest
4
4
  password: guest
5
+ port: 5672
5
6
 
6
7
  development:
7
8
  <<: *default
8
- vhost: apollo-crawler-development
9
+ vhost: /apollo-crawler-development
9
10
 
10
11
  test:
11
12
  <<: *default
12
13
  host: apollo-crawler.no-ip.org
13
- vhost: apollo-crawler-test
14
+ vhost: /apollo-crawler-test
14
15
 
15
16
  production:
16
17
  <<: *default
17
18
  host: apollo-crawler.no-ip.org
18
- vhost: apollo-crawler-production
19
+ vhost: /apollo-crawler-production
data/config/deploy.rb ADDED
@@ -0,0 +1,61 @@
1
+ require 'capistrano'
2
+
3
+ require "rubygems"
4
+ require "bundler/setup"
5
+ require "bundler/capistrano"
6
+
7
+ # RVM integration
8
+ require "rvm/capistrano"
9
+
10
+ # Target ruby version
11
+ set :rvm_ruby_string, '2.0.0'
12
+
13
+ set :domain, "apollo-crawler.no-ip.org"
14
+ set :application, "apollo_platform"
15
+ # set :deploy_to, File.join(File.expand_path("~"), "/apps/#{application}")
16
+ set :deploy_to, "/home/ubuntu/apps/#{application}"
17
+
18
+ ssh_options[:keys] = [File.join(ENV["HOME"], ".ssh", "key-webs.pem")]
19
+
20
+ set :user, "ubuntu"
21
+ set :use_sudo, false
22
+
23
+ set :scm, :git
24
+ set :repository, "https://github.com/korczis/apollo-crawler.git"
25
+ set :branch, 'master'
26
+ set :git_shallow_clone, 1
27
+
28
+ role :web, domain
29
+ role :app, domain
30
+ role :db, domain, :primary => true
31
+
32
+ set :deploy_via, :remote_cache
33
+
34
+ namespace :deploy do
35
+ def remote_cmd(cmd)
36
+ run "cd #{deploy_to}/current && #{cmd}"
37
+ end
38
+
39
+ task :start, :roles => [:web, :app] do
40
+ puts "Starting.."
41
+ remote_cmd "./bin/apollo-platform -V"
42
+ end
43
+
44
+ task :stop, :roles => [:web, :app] do
45
+ puts "Stopping.."
46
+ end
47
+
48
+ task :status, :roles => [:web, :app] do
49
+ puts "Statusing.."
50
+ end
51
+
52
+ task :restart, :roles => [:web, :app] do
53
+ puts "Restarting.."
54
+ end
55
+
56
+ # This will make sure that Capistrano doesn't try to run rake:migrate (this is not a Rails project!)
57
+ task :cold do
58
+ deploy.update
59
+ deploy.start
60
+ end
61
+ end
data/config/mongoid.yml CHANGED
@@ -1,23 +1,26 @@
1
- default: &default
1
+ default: &default_options
2
2
  sessions:
3
3
  default:
4
4
  hosts:
5
5
  - apollo-crawler.no-ip.org:27017
6
6
 
7
7
  development:
8
- <<: *default
9
8
  sessions:
10
9
  default:
10
+ hosts:
11
+ - localhost:27017
11
12
  database: apollo-crawler-development
12
13
 
13
14
  test:
14
- <<: *default
15
15
  sessions:
16
16
  default:
17
- database: apollo-crawler-test
17
+ hosts:
18
+ - apollo-crawler.no-ip.org:27017
19
+ database: apollo-crawler-test
18
20
 
19
21
  production:
20
- <<: *default
21
22
  sessions:
22
23
  default:
23
- database: apollo-crawler-production
24
+ hosts:
25
+ - apollo-crawler.no-ip.org:27017
26
+ database: apollo-crawler-production
@@ -48,6 +48,9 @@ require File.join(File.dirname(__FILE__), 'apollo_crawler/helper/helpers')
48
48
  # Loggers
49
49
  require File.join(File.dirname(__FILE__), 'apollo_crawler/logger/loggers')
50
50
 
51
+ # Models
52
+ require File.join(File.dirname(__FILE__), 'apollo_crawler/model/models')
53
+
51
54
  # Planner
52
55
  require File.join(File.dirname(__FILE__), 'apollo_crawler/planner/planners')
53
56
 
@@ -18,4 +18,5 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
- require File.join(File.dirname(__FILE__), 'base_agent')
21
+ require File.join(File.dirname(__FILE__), 'base_agent')
22
+ require File.join(File.dirname(__FILE__), 'fetcher_agent')
@@ -21,6 +21,9 @@
21
21
  module Apollo
22
22
  module Agent
23
23
  class BaseAgent
24
+ def run(amqp)
25
+ return 0
26
+ end
24
27
  end # class BaseAgent
25
28
  end # module Agent
26
29
  end # module Apollo
@@ -0,0 +1,55 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_agent')
22
+ require File.join(File.dirname(__FILE__), '../fetcher/fetchers')
23
+
24
+ module Apollo
25
+ module Agent
26
+ class FetcherAgent < BaseAgent
27
+ attr_accessor :fetcher
28
+
29
+ def initialize(amqp, opts={})
30
+ self.fetcher = Apollo::Fetcher::SmartFetcher.new
31
+
32
+ if(opts[:verbose])
33
+ puts "Initializing fetcher agent..."
34
+ end
35
+
36
+ ch = amqp.create_channel
37
+ q = ch.queue("fetcher", :auto_delete => false, :durable => true)
38
+ x = ch.default_exchange
39
+
40
+ q.subscribe do |delivery_info, metadata, payload|
41
+ res = nil
42
+
43
+ puts "Received #{payload}" if opts[:verbose]
44
+
45
+ Thread.new do |t|
46
+ queued_url = JSON.parse(payload)
47
+ # puts queued_url["url"]
48
+ # res = Apollo::Fetcher::SmartFetcher::fetch(queued_url["url"])
49
+ # puts "#{queued_url['url']} - " + res.inspect
50
+ end
51
+ end
52
+ end
53
+ end # class FetcherAgent
54
+ end # module Agent
55
+ end # module Apollo
@@ -87,8 +87,8 @@ module Apollo
87
87
  }
88
88
 
89
89
  # Used caching mechanism by default
90
- CACHE_CLASS = Apollo::Cache::SqliteCache
91
- CACHE_CLASS_OPTIONS = CACHE_CLASS_OPTIONS_MONGO
90
+ CACHE_CLASS = Apollo::Cache::MemcachedCache
91
+ CACHE_CLASS_OPTIONS = nil
92
92
 
93
93
  ############################################################
94
94
  # Crawlers - Built-in out-of box working crawlers
@@ -18,9 +18,30 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
+ require 'amqp'
22
+ require 'bunny'
23
+ require 'thread'
24
+
21
25
  module Apollo
22
26
  module Helper
23
27
  module Amqp
28
+ def self.connect(conn, opts={})
29
+ res = nil
30
+
31
+ if(opts[:verbose])
32
+ puts "AMQP Connecting - #{conn.inspect}"
33
+ end
34
+
35
+ res = Bunny.new(:host => conn['host'], :user => conn['username'], :password => conn['password'], :vhost => conn['vhost'], :port => conn['port'])
36
+ res.start
37
+
38
+ sleep(0.001) until res
39
+ if(opts[:verbose])
40
+ puts "AMQP connected - #{res.inspect}"
41
+ end
42
+
43
+ return res
44
+ end
24
45
  end # Amqp
25
46
  end # module Helper
26
47
  end # module Apollo
@@ -24,6 +24,14 @@ class Class
24
24
  end
25
25
  end
26
26
 
27
+ class String
28
+ def to_class
29
+ self.split('::').inject(Object) do |mod, class_name|
30
+ mod.const_get(class_name)
31
+ end
32
+ end
33
+ end
34
+
27
35
  module Apollo
28
36
  module Helper
29
37
  module Core
@@ -18,9 +18,25 @@
18
18
  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
19
  # THE SOFTWARE.
20
20
 
21
+ require 'mongo'
22
+ require 'mongoid'
23
+
21
24
  module Apollo
22
25
  module Helper
23
26
  module Mongo
27
+ def self.connect(conn, opts={})
28
+ if(opts[:verbose])
29
+ puts "MongoDB connecting - '#{conn.inspect}"
30
+ end
31
+
32
+ res = ::Mongo::Connection.new(conn['host'])
33
+
34
+ if(opts[:verbose])
35
+ puts "MongoDB connected: #{res.inspect}"
36
+ end
37
+
38
+ return res
39
+ end
24
40
  end # Mongo
25
41
  end # module Helper
26
42
  end # module Apollo
@@ -45,6 +45,9 @@ require File.join(File.dirname(__FILE__), 'helper/helpers')
45
45
  # Loggers
46
46
  require File.join(File.dirname(__FILE__), 'logger/loggers')
47
47
 
48
+ # Models
49
+ require File.join(File.dirname(__FILE__), 'model/models')
50
+
48
51
  # Programs
49
52
  require File.join(File.dirname(__FILE__), 'planner/planners')
50
53
 
@@ -0,0 +1,29 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require 'mongo'
22
+ require 'mongoid'
23
+
24
+ module Apollo
25
+ module Model
26
+ class BaseModel
27
+ end # class BaseModel
28
+ end # module Model
29
+ end # module Apollo
@@ -0,0 +1,39 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
23
+ module Apollo
24
+ module Model
25
+ class Crawler < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "crawlers"
30
+
31
+ field :name
32
+ field :class_name
33
+ field :source
34
+
35
+ # Indexes
36
+ index({ created_at: 1, updated_at: 1, name: 1, class_name: 1 })
37
+ end # class Crawler
38
+ end # module Model
39
+ end # module Apollo
@@ -0,0 +1,24 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+ require File.join(File.dirname(__FILE__), 'crawler')
23
+ require File.join(File.dirname(__FILE__), 'queued_url')
24
+ require File.join(File.dirname(__FILE__), 'raw_document')
@@ -0,0 +1,38 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
23
+ module Apollo
24
+ module Model
25
+ class QueuedUrl < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "queued_urls"
30
+
31
+ field :url
32
+ field :state
33
+
34
+ # Indexes
35
+ index({ created_at: 1, updated_at: 1 })
36
+ end # class QueuedUrl
37
+ end # module Model
38
+ end # module Apollo
@@ -0,0 +1,37 @@
1
+ # Copyright, 2013, by Tomas Korcak. <korczis@gmail.com>
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ require File.join(File.dirname(__FILE__), 'base_model')
22
+
23
+ module Apollo
24
+ module Model
25
+ class RawDocument < BaseModel
26
+ include Mongoid::Document
27
+ include Mongoid::Timestamps
28
+
29
+ store_in collection: "raw_docs"
30
+
31
+ field :body
32
+
33
+ # Indexes
34
+ index({ created_at: 1, updated_at: 1 })
35
+ end # class RawDocument
36
+ end # module Model
37
+ end # module Apollo
@@ -20,7 +20,10 @@
20
20
 
21
21
  module Apollo
22
22
  module Planner
23
- class BasePlanner
23
+ class BasePlanner
24
+ def run(amqp, mongo)
25
+ return 0
26
+ end
24
27
  end # class BasePlanner
25
28
  end # module Planner
26
29
  end # module Apollo
@@ -20,9 +20,58 @@
20
20
 
21
21
  require File.join(File.dirname(__FILE__),'base_planner')
22
22
 
23
+ require File.join(File.dirname(__FILE__),'../model/models.rb')
24
+
23
25
  module Apollo
24
26
  module Planner
25
27
  class SmartPlanner < BasePlanner
28
+ attr_accessor :amqp
29
+ attr_accessor :mongo
30
+
31
+ def initialize(amqp=nil, mongo=nil)
32
+ self.amqp = amqp
33
+ self.mongo = mongo
34
+ end
35
+
36
+ def fetch_url(url, opts={})
37
+ puts "AMQP fetching '#{url.inspect}'"
38
+
39
+ ch = amqp.create_channel
40
+ x = ch.default_exchange
41
+ x.publish(url.to_json, :routing_key => "fetcher")
42
+
43
+ end
44
+
45
+ def fetch_queued_urls(opts={})
46
+ urls = Apollo::Model::QueuedUrl.where({:state => :queued})
47
+ return if urls.count < 1
48
+
49
+ if(opts[:verbose])
50
+ puts "Fetching Queued URLS"
51
+ end
52
+
53
+ puts "Count of URLs in Queue: #{urls.count}" if opts[:verbose]
54
+
55
+ urls.each do |url|
56
+ url.state = :fetching
57
+ url.save
58
+
59
+ fetch_url(url, opts)
60
+
61
+ # puts "Removing URL from Queue '#{url.inspect}'" if opts[:verbose]
62
+ end
63
+ end
64
+
65
+ def run(opts={})
66
+ request_exit = false
67
+
68
+ while request_exit == false
69
+ fetch_queued_urls(opts)
70
+ sleep 1
71
+ end
72
+
73
+ return 0
74
+ end
26
75
  end # class SmartPlanner
27
76
  end # module Planner
28
77
  end # module Apollo
@@ -20,6 +20,8 @@
20
20
 
21
21
  require 'yaml'
22
22
 
23
+ require File.join(File.dirname(__FILE__), "../model/models.rb")
24
+
23
25
  module Apollo
24
26
  class BaseProgram
25
27
  CONFIG_DIR = File.join(Apollo::BASE_DIR, "config")
@@ -33,16 +35,16 @@ module Apollo
33
35
  attr_accessor :options
34
36
  attr_accessor :optparser
35
37
 
38
+ attr_accessor :amqp
36
39
  attr_accessor :mongo
37
- attr_accessor :mongo_db
38
40
 
39
41
  def initialize
40
42
  self.config = {}
41
43
  self.options = DEFAULT_OPTIONS
42
44
  self.optparser = nil
43
45
 
46
+ self.amqp = nil
44
47
  self.mongo = nil
45
- self.mongo_db = nil
46
48
  end
47
49
 
48
50
  def self.get_config_path(config)
@@ -113,6 +115,55 @@ module Apollo
113
115
  return nil
114
116
  end
115
117
 
118
+ def init_amqp()
119
+ conn_opts = self.config["amqp"]
120
+ if(conn_opts)
121
+ self.amqp = Apollo::Helper::Amqp::connect(conn_opts, self.options)
122
+ end
123
+
124
+ return self.amqp
125
+ end
126
+
127
+ def init_mongo()
128
+ conn_opts = self.config["mongo"]
129
+ if(conn_opts)
130
+ self.mongo = Apollo::Helper::Mongo::connect(conn_opts, self.options)
131
+
132
+ # Init Mongoid
133
+ path = File.join(Apollo::BASE_DIR, "config/mongoid.yml")
134
+ Mongoid.load!(path, @options[:env])
135
+ end
136
+
137
+ return self.mongo
138
+ end
139
+
140
+ def init_seeds_crawlers(opts={})
141
+ objs = Apollo::Crawler::BaseCrawler.subclasses
142
+ objs.each do |o|
143
+ crawler = Apollo::Model::Crawler.new
144
+ i = o.new
145
+ crawler.name = i.name
146
+ crawler.class_name = o.to_s
147
+
148
+ res = Apollo::Model::Crawler.where(class_name: crawler.class_name)
149
+ # puts "RES: '#{res.inspect}'"
150
+ if(res.nil? || res.count < 1)
151
+ crawler.save
152
+ if(opts[:verbose])
153
+ puts "Adding new crawler - '#{crawler.inspect}'"
154
+ end
155
+ else
156
+ if(opts[:verbose])
157
+ puts "Using crawler - '#{res[0].inspect}'"
158
+ end
159
+ end
160
+ end
161
+ end
162
+
163
+ def init_seeds(opts={})
164
+ init_seeds_crawlers(opts)
165
+ end
166
+
116
167
  # Init program
117
168
  def init_program(args)
118
169
  res = nil
@@ -134,6 +185,12 @@ module Apollo
134
185
  # Init Mongo Connection
135
186
  init_mongo()
136
187
 
188
+ # Init AMQP
189
+ init_amqp()
190
+
191
+ # Init Seed data
192
+ init_seeds(@options)
193
+
137
194
  return nil
138
195
  end
139
196
 
@@ -45,16 +45,6 @@ require File.join(File.dirname(__FILE__), '..', 'version')
45
45
 
46
46
  require File.join(File.dirname(__FILE__),'base_program')
47
47
 
48
-
49
- # Hack
50
- class String
51
- def to_class
52
- self.split('::').inject(Object) do |mod, class_name|
53
- mod.const_get(class_name)
54
- end
55
- end
56
- end
57
-
58
48
  module Apollo
59
49
  # Apollo Crawler Base Directory
60
50
  APOLLO_CRAWLER_BASE_DIR = File.join(File.dirname(__FILE__), "..")
@@ -174,6 +164,8 @@ module Apollo
174
164
 
175
165
  opts.on('-i', '--include [PATH]', 'Include additional crawler or crawler directory') do |path|
176
166
  @options[:crawler_dirs] << path
167
+
168
+ init_additional_crawlers([path])
177
169
  end
178
170
 
179
171
  opts.on('-n', '--doc-limit [NUM]', 'Limit count of documents to be processed') do |count|
@@ -406,6 +398,22 @@ module Apollo
406
398
  end
407
399
  end
408
400
 
401
+ def init_additional_crawlers(dirs)
402
+ # puts "Initializing aditional crawlers ..."
403
+ dirs.each do |dir|
404
+ if(@options[:verbose])
405
+ puts "Registering additional crawler dir '#{dir}'"
406
+ end
407
+
408
+ Dir.glob("#{dir}/*.rb").each do |f|
409
+ if(@options[:verbose])
410
+ puts "Registering crawler '#{f}'"
411
+ end
412
+ require f
413
+ end
414
+ end
415
+ end
416
+
409
417
  # Init program
410
418
  def init_program(args)
411
419
  init_options()
@@ -80,6 +80,10 @@ module Apollo
80
80
  self.options[:env] = name
81
81
  end
82
82
 
83
+ opts.on('-d', '--daemon', 'Run Apollo Platform daemon') do
84
+ self.options[:daemon] = true
85
+ end
86
+
83
87
  opts.on('-v', '--verbose', 'Enable verbose output') do
84
88
  self.options[:verbose] = true
85
89
  end
@@ -90,6 +94,40 @@ module Apollo
90
94
  end
91
95
  end
92
96
 
97
+ def enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={})
98
+ crawlers.each do |crawler|
99
+ i = crawler.new
100
+ puts "Queuying Crawler base URL: '#{i.url}'" if opts[:verbose]
101
+ qu = Apollo::Model::QueuedUrl.new(:url => i.url, :state => :queued)
102
+ qu.save
103
+ end
104
+ end
105
+
106
+ def init_fetchers(amqp, opts={})
107
+ fetchers = []
108
+ fetchers << Apollo::Agent::FetcherAgent.new(amqp, self.options)
109
+
110
+ enqueue_crawlers_urls(amqp, Apollo::Crawler::BaseCrawler.subclasses, opts)
111
+
112
+ # ch = self.amqp.create_channel
113
+ # x = ch.default_exchange
114
+ # x.publish("Hello!", :routing_key => "fetcher")
115
+ end
116
+
117
+ def init_agents(amqp, opts={})
118
+ puts "Initializing agents"
119
+
120
+ init_fetchers(amqp, opts)
121
+ end
122
+
123
+ def init_program(args)
124
+ res = super(args)
125
+ return res unless res.nil?
126
+
127
+ init_agents(self.amqp, self.options)
128
+ return nil
129
+ end
130
+
93
131
  def process_options(args)
94
132
  if(self.options[:version])
95
133
  puts Apollo::VERSION
@@ -105,30 +143,21 @@ module Apollo
105
143
  return nil
106
144
  end
107
145
 
108
- def init_mongo()
109
- self.mongo = Mongo::Connection.new(self.config['mongo']['host'])
110
- self.mongo_db = self.mongo.db(self.config['mongo']['db'])
111
-
112
- if(self.options[:verbose])
113
- puts "(Mongo) Connection Inited: #{self.mongo.inspect}"
114
- puts "(Mongo) Database Inited: #{self.mongo_db.inspect}"
115
- end
116
-
117
- return self.mongo
118
- end
119
-
120
146
  # Run Program
121
147
  def run(args = ARGV)
122
148
  res = super(args)
123
149
  return res unless res.nil?
124
-
125
- # Print classes
126
- # puts Apollo::Crawler::BaseCrawler.subclasses.inspect
127
150
 
128
151
  # Here we start
129
- if(ARGV.length < 1)
130
- puts optparser
131
- return 0
152
+ # if(ARGV.length < 1)
153
+ # puts optparser
154
+ # return 0
155
+ # end
156
+
157
+ res_code = 0
158
+ if(self.options[:daemon])
159
+ planner = Apollo::Planner::SmartPlanner.new(self.amqp, self.mongo)
160
+ res_code = planner.run(self.options)
132
161
  end
133
162
 
134
163
  return request_exit(res_code)
@@ -19,5 +19,5 @@
19
19
  # THE SOFTWARE.
20
20
 
21
21
  module Apollo
22
- VERSION = '0.1.22'
22
+ VERSION = '0.1.24'
23
23
  end # Apollo
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: apollo-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.22
4
+ version: 0.1.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomas Korcak
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-03-04 00:00:00.000000000 Z
11
+ date: 2013-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: awesome_print
@@ -28,28 +28,28 @@ dependencies:
28
28
  name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: 3.2.12
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ! '>='
38
+ - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.2.12
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: dalli
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ! '>='
45
+ - - '>='
46
46
  - !ruby/object:Gem::Version
47
47
  version: 2.6.2
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ! '>='
52
+ - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: 2.6.2
55
55
  - !ruby/object:Gem::Dependency
@@ -70,42 +70,42 @@ dependencies:
70
70
  name: eventmachine
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ! '>='
73
+ - - '>='
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ! '>='
80
+ - - '>='
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: em-http-request
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ! '>='
87
+ - - '>='
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ! '>='
94
+ - - '>='
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: em-synchrony
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ! '>='
101
+ - - '>='
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ! '>='
108
+ - - '>='
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
@@ -154,56 +154,56 @@ dependencies:
154
154
  name: memcache-client
155
155
  requirement: !ruby/object:Gem::Requirement
156
156
  requirements:
157
- - - ! '>='
157
+ - - '>='
158
158
  - !ruby/object:Gem::Version
159
159
  version: '0'
160
160
  type: :runtime
161
161
  prerelease: false
162
162
  version_requirements: !ruby/object:Gem::Requirement
163
163
  requirements:
164
- - - ! '>='
164
+ - - '>='
165
165
  - !ruby/object:Gem::Version
166
166
  version: '0'
167
167
  - !ruby/object:Gem::Dependency
168
168
  name: mongo
169
169
  requirement: !ruby/object:Gem::Requirement
170
170
  requirements:
171
- - - ! '>='
171
+ - - '>='
172
172
  - !ruby/object:Gem::Version
173
173
  version: 1.8.2
174
174
  type: :runtime
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
- - - ! '>='
178
+ - - '>='
179
179
  - !ruby/object:Gem::Version
180
180
  version: 1.8.2
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: mongoid
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
- - - ! '>='
185
+ - - '>='
186
186
  - !ruby/object:Gem::Version
187
187
  version: 3.1.2
188
188
  type: :runtime
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
- - - ! '>='
192
+ - - '>='
193
193
  - !ruby/object:Gem::Version
194
194
  version: 3.1.2
195
195
  - !ruby/object:Gem::Dependency
196
196
  name: mime-types
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
- - - ! '>='
199
+ - - '>='
200
200
  - !ruby/object:Gem::Version
201
201
  version: '0'
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
- - - ! '>='
206
+ - - '>='
207
207
  - !ruby/object:Gem::Version
208
208
  version: '0'
209
209
  - !ruby/object:Gem::Dependency
@@ -224,70 +224,70 @@ dependencies:
224
224
  name: openurl
225
225
  requirement: !ruby/object:Gem::Requirement
226
226
  requirements:
227
- - - ! '>='
227
+ - - '>='
228
228
  - !ruby/object:Gem::Version
229
229
  version: 0.4.2
230
230
  type: :runtime
231
231
  prerelease: false
232
232
  version_requirements: !ruby/object:Gem::Requirement
233
233
  requirements:
234
- - - ! '>='
234
+ - - '>='
235
235
  - !ruby/object:Gem::Version
236
236
  version: 0.4.2
237
237
  - !ruby/object:Gem::Dependency
238
238
  name: parallel
239
239
  requirement: !ruby/object:Gem::Requirement
240
240
  requirements:
241
- - - ! '>='
241
+ - - '>='
242
242
  - !ruby/object:Gem::Version
243
243
  version: 0.6.2
244
244
  type: :runtime
245
245
  prerelease: false
246
246
  version_requirements: !ruby/object:Gem::Requirement
247
247
  requirements:
248
- - - ! '>='
248
+ - - '>='
249
249
  - !ruby/object:Gem::Version
250
250
  version: 0.6.2
251
251
  - !ruby/object:Gem::Dependency
252
252
  name: rack
253
253
  requirement: !ruby/object:Gem::Requirement
254
254
  requirements:
255
- - - ! '>='
255
+ - - '>='
256
256
  - !ruby/object:Gem::Version
257
257
  version: 1.5.2
258
258
  type: :runtime
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
- - - ! '>='
262
+ - - '>='
263
263
  - !ruby/object:Gem::Version
264
264
  version: 1.5.2
265
265
  - !ruby/object:Gem::Dependency
266
266
  name: right_aws
267
267
  requirement: !ruby/object:Gem::Requirement
268
268
  requirements:
269
- - - ! '>='
269
+ - - '>='
270
270
  - !ruby/object:Gem::Version
271
271
  version: 3.0.5
272
272
  type: :runtime
273
273
  prerelease: false
274
274
  version_requirements: !ruby/object:Gem::Requirement
275
275
  requirements:
276
- - - ! '>='
276
+ - - '>='
277
277
  - !ruby/object:Gem::Version
278
278
  version: 3.0.5
279
279
  - !ruby/object:Gem::Dependency
280
280
  name: right_http_connection
281
281
  requirement: !ruby/object:Gem::Requirement
282
282
  requirements:
283
- - - ! '>='
283
+ - - '>='
284
284
  - !ruby/object:Gem::Version
285
285
  version: 1.3.0
286
286
  type: :runtime
287
287
  prerelease: false
288
288
  version_requirements: !ruby/object:Gem::Requirement
289
289
  requirements:
290
- - - ! '>='
290
+ - - '>='
291
291
  - !ruby/object:Gem::Version
292
292
  version: 1.3.0
293
293
  - !ruby/object:Gem::Dependency
@@ -336,146 +336,155 @@ dependencies:
336
336
  name: ffi
337
337
  requirement: !ruby/object:Gem::Requirement
338
338
  requirements:
339
- - - ! '>='
339
+ - - '>='
340
340
  - !ruby/object:Gem::Version
341
341
  version: 1.4.0
342
342
  type: :runtime
343
343
  prerelease: false
344
344
  version_requirements: !ruby/object:Gem::Requirement
345
345
  requirements:
346
- - - ! '>='
346
+ - - '>='
347
347
  - !ruby/object:Gem::Version
348
348
  version: 1.4.0
349
349
  - !ruby/object:Gem::Dependency
350
350
  name: guard
351
351
  requirement: !ruby/object:Gem::Requirement
352
352
  requirements:
353
- - - ! '>='
353
+ - - '>='
354
354
  - !ruby/object:Gem::Version
355
355
  version: 1.6.2
356
356
  type: :development
357
357
  prerelease: false
358
358
  version_requirements: !ruby/object:Gem::Requirement
359
359
  requirements:
360
- - - ! '>='
360
+ - - '>='
361
361
  - !ruby/object:Gem::Version
362
362
  version: 1.6.2
363
363
  - !ruby/object:Gem::Dependency
364
364
  name: guard-rake
365
365
  requirement: !ruby/object:Gem::Requirement
366
366
  requirements:
367
- - - ! '>='
367
+ - - '>='
368
368
  - !ruby/object:Gem::Version
369
369
  version: 0.0.7
370
370
  type: :development
371
371
  prerelease: false
372
372
  version_requirements: !ruby/object:Gem::Requirement
373
373
  requirements:
374
- - - ! '>='
374
+ - - '>='
375
375
  - !ruby/object:Gem::Version
376
376
  version: 0.0.7
377
377
  - !ruby/object:Gem::Dependency
378
378
  name: guard-rspec
379
379
  requirement: !ruby/object:Gem::Requirement
380
380
  requirements:
381
- - - ! '>='
381
+ - - '>='
382
382
  - !ruby/object:Gem::Version
383
383
  version: 2.5.0
384
384
  type: :development
385
385
  prerelease: false
386
386
  version_requirements: !ruby/object:Gem::Requirement
387
387
  requirements:
388
- - - ! '>='
388
+ - - '>='
389
389
  - !ruby/object:Gem::Version
390
390
  version: 2.5.0
391
391
  description: Gem for crawling data from external sources
392
392
  email: korczis@gmail.com
393
393
  executables:
394
+ - apollo-console
394
395
  - apollo-crawler
395
396
  - apollo-platform
396
397
  extensions: []
397
398
  extra_rdoc_files: []
398
399
  files:
399
- - ./config/mongo.yml.default
400
- - ./config/mongo.yml
401
- - ./config/memcached.yml
402
- - ./config/mongoid.yml
403
- - ./config/apollo.yml.default
400
+ - ./config/amqp.yml
404
401
  - ./config/amqp.yml.default
402
+ - ./config/apollo.yml
403
+ - ./config/apollo.yml.default
404
+ - ./config/deploy.rb
405
+ - ./config/memcached.yml
405
406
  - ./config/memcached.yml.default
406
- - ./config/amqp.yml
407
+ - ./config/mongo.yml
408
+ - ./config/mongo.yml.default
409
+ - ./config/mongoid.yml
407
410
  - ./config/mongoid.yml.default
408
- - ./config/apollo.yml
409
- - ./lib/apollo_crawler/fetcher/smart_fetcher.rb
410
- - ./lib/apollo_crawler/fetcher/fetchers.rb
411
- - ./lib/apollo_crawler/fetcher/simple_fetcher.rb
412
- - ./lib/apollo_crawler/fetcher/base_fetcher.rb
413
- - ./lib/apollo_crawler/planner/base_planner.rb
414
- - ./lib/apollo_crawler/planner/planners.rb
415
- - ./lib/apollo_crawler/planner/smart_planner.rb
416
- - ./lib/apollo_crawler/lib.rb
417
- - ./lib/apollo_crawler/version.rb
418
- - ./lib/apollo_crawler/program/console_program.rb
419
- - ./lib/apollo_crawler/program/platform_program.rb
420
- - ./lib/apollo_crawler/program/crawler_program.rb
421
- - ./lib/apollo_crawler/program/base_program.rb
422
- - ./lib/apollo_crawler/program/programs.rb
423
- - ./lib/apollo_crawler/logger/console_logger.rb
424
- - ./lib/apollo_crawler/logger/base_logger.rb
425
- - ./lib/apollo_crawler/logger/loggers.rb
426
- - ./lib/apollo_crawler/helper/core_helper.rb
427
- - ./lib/apollo_crawler/helper/amqp_helper.rb
428
- - ./lib/apollo_crawler/helper/helpers.rb
429
- - ./lib/apollo_crawler/helper/mongo_helper.rb
411
+ - ./lib/apollo_crawler.rb
430
412
  - ./lib/apollo_crawler/adapter/adapters.rb
431
- - ./lib/apollo_crawler/adapter/mongo_adapter.rb
432
413
  - ./lib/apollo_crawler/adapter/amqp_adapter.rb
433
- - ./lib/apollo_crawler/config.rb
434
- - ./lib/apollo_crawler/cache/sqlite_cache.rb
414
+ - ./lib/apollo_crawler/adapter/mongo_adapter.rb
415
+ - ./lib/apollo_crawler/agent/agents.rb
416
+ - ./lib/apollo_crawler/agent/base_agent.rb
417
+ - ./lib/apollo_crawler/agent/fetcher_agent.rb
418
+ - ./lib/apollo_crawler/cache/base_cache.rb
419
+ - ./lib/apollo_crawler/cache/caches.rb
435
420
  - ./lib/apollo_crawler/cache/factory.rb
436
- - ./lib/apollo_crawler/cache/null_cache.rb
421
+ - ./lib/apollo_crawler/cache/memcached_cache.rb
437
422
  - ./lib/apollo_crawler/cache/memory_cache.rb
438
- - ./lib/apollo_crawler/cache/base_cache.rb
439
423
  - ./lib/apollo_crawler/cache/mongo_cache.rb
440
- - ./lib/apollo_crawler/cache/memcached_cache.rb
441
- - ./lib/apollo_crawler/cache/caches.rb
442
- - ./lib/apollo_crawler/crawler/xkcd_crawler.rb
443
- - ./lib/apollo_crawler/crawler/google_crawler.rb
444
- - ./lib/apollo_crawler/crawler/youjizz_crawler.rb
445
- - ./lib/apollo_crawler/crawler/slashdot_crawler.rb
446
- - ./lib/apollo_crawler/crawler/hacker_news_crawler.rb
424
+ - ./lib/apollo_crawler/cache/null_cache.rb
425
+ - ./lib/apollo_crawler/cache/sqlite_cache.rb
426
+ - ./lib/apollo_crawler/config.rb
447
427
  - ./lib/apollo_crawler/crawler/base_crawler.rb
448
428
  - ./lib/apollo_crawler/crawler/crawlers.rb
429
+ - ./lib/apollo_crawler/crawler/google_crawler.rb
430
+ - ./lib/apollo_crawler/crawler/hacker_news_crawler.rb
431
+ - ./lib/apollo_crawler/crawler/slashdot_crawler.rb
449
432
  - ./lib/apollo_crawler/crawler/stackoverflow_crawler.rb
433
+ - ./lib/apollo_crawler/crawler/xkcd_crawler.rb
434
+ - ./lib/apollo_crawler/crawler/youjizz_crawler.rb
450
435
  - ./lib/apollo_crawler/env.rb
451
- - ./lib/apollo_crawler/agent/agents.rb
452
- - ./lib/apollo_crawler/agent/base_agent.rb
453
- - ./lib/apollo_crawler/formatter/table_formatter.rb
436
+ - ./lib/apollo_crawler/fetcher/base_fetcher.rb
437
+ - ./lib/apollo_crawler/fetcher/fetchers.rb
438
+ - ./lib/apollo_crawler/fetcher/simple_fetcher.rb
439
+ - ./lib/apollo_crawler/fetcher/smart_fetcher.rb
454
440
  - ./lib/apollo_crawler/formatter/base_formatter.rb
441
+ - ./lib/apollo_crawler/formatter/formatters.rb
455
442
  - ./lib/apollo_crawler/formatter/json_formatter.rb
456
443
  - ./lib/apollo_crawler/formatter/plain_formatter.rb
457
- - ./lib/apollo_crawler/formatter/formatters.rb
458
- - ./lib/apollo_crawler/store/stores.rb
444
+ - ./lib/apollo_crawler/formatter/table_formatter.rb
445
+ - ./lib/apollo_crawler/helper/amqp_helper.rb
446
+ - ./lib/apollo_crawler/helper/core_helper.rb
447
+ - ./lib/apollo_crawler/helper/helpers.rb
448
+ - ./lib/apollo_crawler/helper/mongo_helper.rb
449
+ - ./lib/apollo_crawler/lib.rb
450
+ - ./lib/apollo_crawler/logger/base_logger.rb
451
+ - ./lib/apollo_crawler/logger/console_logger.rb
452
+ - ./lib/apollo_crawler/logger/loggers.rb
453
+ - ./lib/apollo_crawler/model/base_model.rb
454
+ - ./lib/apollo_crawler/model/crawler.rb
455
+ - ./lib/apollo_crawler/model/models.rb
456
+ - ./lib/apollo_crawler/model/queued_url.rb
457
+ - ./lib/apollo_crawler/model/raw_document.rb
458
+ - ./lib/apollo_crawler/planner/base_planner.rb
459
+ - ./lib/apollo_crawler/planner/planners.rb
460
+ - ./lib/apollo_crawler/planner/smart_planner.rb
461
+ - ./lib/apollo_crawler/program/base_program.rb
462
+ - ./lib/apollo_crawler/program/console_program.rb
463
+ - ./lib/apollo_crawler/program/crawler_program.rb
464
+ - ./lib/apollo_crawler/program/platform_program.rb
465
+ - ./lib/apollo_crawler/program/programs.rb
459
466
  - ./lib/apollo_crawler/store/base_store.rb
460
- - ./lib/apollo_crawler.rb
467
+ - ./lib/apollo_crawler/store/stores.rb
468
+ - ./lib/apollo_crawler/version.rb
469
+ - bin/apollo-console
461
470
  - bin/apollo-crawler
462
471
  - bin/apollo-platform
463
472
  homepage: http://apollocrawler.com/
464
473
  licenses:
465
474
  - MIT
466
475
  metadata: {}
467
- post_install_message: Thanks for installing Apollo Crawler!
476
+ post_install_message: Thank you for installing Apollo Crawler!
468
477
  rdoc_options: []
469
478
  require_paths:
470
479
  - lib
471
480
  required_ruby_version: !ruby/object:Gem::Requirement
472
481
  requirements:
473
- - - ! '>='
482
+ - - '>='
474
483
  - !ruby/object:Gem::Version
475
484
  version: 1.9.3
476
485
  required_rubygems_version: !ruby/object:Gem::Requirement
477
486
  requirements:
478
- - - ! '>='
487
+ - - '>='
479
488
  - !ruby/object:Gem::Version
480
489
  version: 1.8.11
481
490
  requirements: []