aranha 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +3 -0
  4. data/Rakefile +37 -0
  5. data/app/assets/javascripts/aranha/application.js +14 -0
  6. data/app/assets/stylesheets/aranha/application.css +16 -0
  7. data/app/controllers/aranha/addresses_controller.rb +9 -0
  8. data/app/controllers/aranha/application_controller.rb +5 -0
  9. data/app/helpers/aranha/application_helper.rb +4 -0
  10. data/app/models/aranha/address.rb +57 -0
  11. data/app/views/layouts/aranha/application.html.erb +14 -0
  12. data/config/routes.rb +3 -0
  13. data/db/migrate/20171201021251_create_aranha_addresses.rb +12 -0
  14. data/lib/aranha.rb +9 -0
  15. data/lib/aranha/engine.rb +11 -0
  16. data/lib/aranha/processor.rb +15 -0
  17. data/lib/aranha/version.rb +4 -0
  18. data/lib/tasks/aranha_tasks.rake +6 -0
  19. data/test/aranha_test.rb +7 -0
  20. data/test/dummy/README.rdoc +28 -0
  21. data/test/dummy/Rakefile +6 -0
  22. data/test/dummy/app/assets/javascripts/application.js +13 -0
  23. data/test/dummy/app/assets/stylesheets/application.css +15 -0
  24. data/test/dummy/app/controllers/application_controller.rb +5 -0
  25. data/test/dummy/app/helpers/application_helper.rb +2 -0
  26. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  27. data/test/dummy/bin/bundle +3 -0
  28. data/test/dummy/bin/rails +4 -0
  29. data/test/dummy/bin/rake +4 -0
  30. data/test/dummy/bin/setup +29 -0
  31. data/test/dummy/config.ru +4 -0
  32. data/test/dummy/config/application.rb +26 -0
  33. data/test/dummy/config/boot.rb +5 -0
  34. data/test/dummy/config/database.yml +25 -0
  35. data/test/dummy/config/environment.rb +5 -0
  36. data/test/dummy/config/environments/development.rb +41 -0
  37. data/test/dummy/config/environments/production.rb +79 -0
  38. data/test/dummy/config/environments/test.rb +42 -0
  39. data/test/dummy/config/initializers/assets.rb +11 -0
  40. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  41. data/test/dummy/config/initializers/cookies_serializer.rb +3 -0
  42. data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  43. data/test/dummy/config/initializers/inflections.rb +16 -0
  44. data/test/dummy/config/initializers/mime_types.rb +4 -0
  45. data/test/dummy/config/initializers/session_store.rb +3 -0
  46. data/test/dummy/config/initializers/to_time_preserves_timezone.rb +10 -0
  47. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  48. data/test/dummy/config/locales/en.yml +23 -0
  49. data/test/dummy/config/routes.rb +4 -0
  50. data/test/dummy/config/secrets.yml +22 -0
  51. data/test/dummy/db/schema.rb +24 -0
  52. data/test/dummy/public/404.html +67 -0
  53. data/test/dummy/public/422.html +67 -0
  54. data/test/dummy/public/500.html +66 -0
  55. data/test/dummy/public/favicon.ico +0 -0
  56. data/test/integration/navigation_test.rb +8 -0
  57. data/test/test_helper.rb +22 -0
  58. metadata +181 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7776940b9f7bcb2d542e4993240fd10c18ff2024
4
+ data.tar.gz: d6579b4134a9a4ce5ec98cf335db1d11ab1b7038
5
+ SHA512:
6
+ metadata.gz: 8d4924f70f27bb9a4809c2c56034fcaba0013b50925e8b204391034fa0cb88d5c2a3dc6de0fc365b8da8db5c81f45458c5b87b34ed058ab037dcc604d4f4fe0b
7
+ data.tar.gz: f5774605515bc18a2b57c4c794e1a0d050026dd96d16d13e7eb8683492199a48032281995a773f8b937b8e9c90ae0514fd304759b4a27e830629caefd59e19c4
@@ -0,0 +1,20 @@
1
+ Copyright 2017 Eduardo H. Bogoni
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,3 @@
1
+ = Aranha
2
+
3
+ Rails utilities for web crawling.
@@ -0,0 +1,37 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'Aranha'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+ APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
18
+ load 'rails/tasks/engine.rake'
19
+
20
+
21
+ load 'rails/tasks/statistics.rake'
22
+
23
+
24
+
25
+ Bundler::GemHelper.install_tasks
26
+
27
+ require 'rake/testtask'
28
+
29
+ Rake::TestTask.new(:test) do |t|
30
+ t.libs << 'lib'
31
+ t.libs << 'test'
32
+ t.pattern = 'test/**/*_test.rb'
33
+ t.verbose = false
34
+ end
35
+
36
+
37
+ task default: :test
@@ -0,0 +1,14 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
14
+ //= require active_scaffold
@@ -0,0 +1,16 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
+ * compiled file so the styles you add here take precedence over styles defined in any styles
10
+ * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
+ * file per style scope.
12
+ *
13
+ *= require_tree .
14
+ *= require_self
15
+ *= require active_scaffold
16
+ */
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ require_dependency 'aranha/application_controller'
3
+
4
+ module Aranha
5
+ class AddressesController < ApplicationController
6
+ active_scaffold :'aranha/address' do |_conf|
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,5 @@
1
+ module Aranha
2
+ class ApplicationController < ActionController::Base
3
+ protect_from_forgery with: :exception
4
+ end
5
+ end
@@ -0,0 +1,4 @@
1
+ module Aranha
2
+ module ApplicationHelper
3
+ end
4
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ class Address < ActiveRecord::Base
4
+ include ::Eac::InequalityQueries
5
+
6
+ add_inequality_queries(:created_at)
7
+
8
+ class << self
9
+ def set_start_point(url, processor)
10
+ start_points[url] = processor
11
+ end
12
+
13
+ def add_start_points
14
+ start_points.each do |url, processor|
15
+ add(url, processor)
16
+ end
17
+ end
18
+
19
+ def add(url, processor)
20
+ a = find_or_initialize_by(url: url)
21
+ a.processor = processor
22
+ a.save!
23
+ end
24
+
25
+ def clear_expired
26
+ q = by_created_at_lt(Time.zone.now - 12.hours)
27
+ Rails.logger.info("Addresses expired: #{q.count}")
28
+ q.destroy_all
29
+ end
30
+
31
+ private
32
+
33
+ def start_points
34
+ @start_points ||= {}
35
+ end
36
+ end
37
+
38
+ validates :url, presence: true, uniqueness: true
39
+ validates :processor, presence: true
40
+
41
+ scope :unprocessed, lambda {
42
+ where(processed_at: nil)
43
+ }
44
+
45
+ def to_s
46
+ "#{processor}|#{url}"
47
+ end
48
+
49
+ def process
50
+ ActiveRecord::Base.transaction do
51
+ processor.constantize.new(url).process
52
+ self.processed_at = Time.zone.now
53
+ save!
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Aranha</title>
5
+ <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
+ <%= javascript_include_tag "aranha/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ Aranha::Engine.routes.draw do
2
+ resources(:addresses) { as_routes }
3
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+ class CreateAranhaAddresses < ActiveRecord::Migration
3
+ def change
4
+ create_table :aranha_addresses do |t|
5
+ t.string :url
6
+ t.string :processor
7
+ t.timestamp :processed_at
8
+
9
+ t.timestamps null: false
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ require 'active_support/dependencies'
3
+ require_dependency 'aranha/engine'
4
+ require_dependency 'active_scaffold'
5
+
6
+ module Aranha
7
+ end
8
+
9
+ require_dependency 'aranha/processor'
@@ -0,0 +1,11 @@
1
+ module Aranha
2
+ class Engine < ::Rails::Engine
3
+ isolate_namespace Aranha
4
+
5
+ initializer :append_migrations do |app|
6
+ config.paths['db/migrate'].expanded.each do |expanded_path|
7
+ app.config.paths['db/migrate'] << expanded_path
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ class Processor
4
+ def initialize
5
+ ::Aranha::Address.clear_expired
6
+ ::Aranha::Address.add_start_points
7
+ loop do
8
+ a = ::Aranha::Address.unprocessed.first
9
+ break unless a
10
+ Rails.logger.info("Processing #{a}")
11
+ a.process
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ VERSION = '0.0.1'
4
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+ namespace(:aranha) do
3
+ task process: :environment do
4
+ ::Aranha::Processor.new
5
+ end
6
+ end
@@ -0,0 +1,7 @@
1
+ require 'test_helper'
2
+
3
+ class AranhaTest < ActiveSupport::TestCase
4
+ test "truth" do
5
+ assert_kind_of Module, Aranha
6
+ end
7
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.
@@ -0,0 +1,6 @@
1
+ # Add your own tasks in files placed in lib/tasks ending in .rake,
2
+ # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
+
4
+ require File.expand_path('../config/application', __FILE__)
5
+
6
+ Rails.application.load_tasks
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,15 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
+ * compiled file so the styles you add here take precedence over styles defined in any styles
10
+ * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
+ * file per style scope.
12
+ *
13
+ *= require_tree .
14
+ *= require_self
15
+ */
@@ -0,0 +1,5 @@
1
+ class ApplicationController < ActionController::Base
2
+ # Prevent CSRF attacks by raising an exception.
3
+ # For APIs, you may want to use :null_session instead.
4
+ protect_from_forgery with: :exception
5
+ end
@@ -0,0 +1,2 @@
1
+ module ApplicationHelper
2
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
6
+ <%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
3
+ load Gem.bin_path('bundler', 'bundle')
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ APP_PATH = File.expand_path('../../config/application', __FILE__)
3
+ require_relative '../config/boot'
4
+ require 'rails/commands'
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../config/boot'
3
+ require 'rake'
4
+ Rake.application.run
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pathname'
3
+
4
+ # path to your application root.
5
+ APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
6
+
7
+ Dir.chdir APP_ROOT do
8
+ # This script is a starting point to setup your application.
9
+ # Add necessary setup steps to this file:
10
+
11
+ puts "== Installing dependencies =="
12
+ system "gem install bundler --conservative"
13
+ system "bundle check || bundle install"
14
+
15
+ # puts "\n== Copying sample files =="
16
+ # unless File.exist?("config/database.yml")
17
+ # system "cp config/database.yml.sample config/database.yml"
18
+ # end
19
+
20
+ puts "\n== Preparing database =="
21
+ system "bin/rake db:setup"
22
+
23
+ puts "\n== Removing old logs and tempfiles =="
24
+ system "rm -f log/*"
25
+ system "rm -rf tmp/cache"
26
+
27
+ puts "\n== Restarting application server =="
28
+ system "touch tmp/restart.txt"
29
+ end
@@ -0,0 +1,4 @@
1
+ # This file is used by Rack-based servers to start the application.
2
+
3
+ require ::File.expand_path('../config/environment', __FILE__)
4
+ run Rails.application
@@ -0,0 +1,26 @@
1
+ require File.expand_path('../boot', __FILE__)
2
+
3
+ require 'rails/all'
4
+
5
+ Bundler.require(*Rails.groups)
6
+ require "aranha"
7
+
8
+ module Dummy
9
+ class Application < Rails::Application
10
+ # Settings in config/environments/* take precedence over those specified here.
11
+ # Application configuration should go into files in config/initializers
12
+ # -- all .rb files in that directory are automatically loaded.
13
+
14
+ # Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
15
+ # Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
16
+ # config.time_zone = 'Central Time (US & Canada)'
17
+
18
+ # The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
19
+ # config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
20
+ # config.i18n.default_locale = :de
21
+
22
+ # Do not swallow errors in after_commit/after_rollback callbacks.
23
+ config.active_record.raise_in_transactional_callbacks = true
24
+ end
25
+ end
26
+