aranha 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +3 -0
  4. data/Rakefile +37 -0
  5. data/app/assets/javascripts/aranha/application.js +14 -0
  6. data/app/assets/stylesheets/aranha/application.css +16 -0
  7. data/app/controllers/aranha/addresses_controller.rb +9 -0
  8. data/app/controllers/aranha/application_controller.rb +5 -0
  9. data/app/helpers/aranha/application_helper.rb +4 -0
  10. data/app/models/aranha/address.rb +57 -0
  11. data/app/views/layouts/aranha/application.html.erb +14 -0
  12. data/config/routes.rb +3 -0
  13. data/db/migrate/20171201021251_create_aranha_addresses.rb +12 -0
  14. data/lib/aranha.rb +9 -0
  15. data/lib/aranha/engine.rb +11 -0
  16. data/lib/aranha/processor.rb +15 -0
  17. data/lib/aranha/version.rb +4 -0
  18. data/lib/tasks/aranha_tasks.rake +6 -0
  19. data/test/aranha_test.rb +7 -0
  20. data/test/dummy/README.rdoc +28 -0
  21. data/test/dummy/Rakefile +6 -0
  22. data/test/dummy/app/assets/javascripts/application.js +13 -0
  23. data/test/dummy/app/assets/stylesheets/application.css +15 -0
  24. data/test/dummy/app/controllers/application_controller.rb +5 -0
  25. data/test/dummy/app/helpers/application_helper.rb +2 -0
  26. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  27. data/test/dummy/bin/bundle +3 -0
  28. data/test/dummy/bin/rails +4 -0
  29. data/test/dummy/bin/rake +4 -0
  30. data/test/dummy/bin/setup +29 -0
  31. data/test/dummy/config.ru +4 -0
  32. data/test/dummy/config/application.rb +26 -0
  33. data/test/dummy/config/boot.rb +5 -0
  34. data/test/dummy/config/database.yml +25 -0
  35. data/test/dummy/config/environment.rb +5 -0
  36. data/test/dummy/config/environments/development.rb +41 -0
  37. data/test/dummy/config/environments/production.rb +79 -0
  38. data/test/dummy/config/environments/test.rb +42 -0
  39. data/test/dummy/config/initializers/assets.rb +11 -0
  40. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  41. data/test/dummy/config/initializers/cookies_serializer.rb +3 -0
  42. data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  43. data/test/dummy/config/initializers/inflections.rb +16 -0
  44. data/test/dummy/config/initializers/mime_types.rb +4 -0
  45. data/test/dummy/config/initializers/session_store.rb +3 -0
  46. data/test/dummy/config/initializers/to_time_preserves_timezone.rb +10 -0
  47. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  48. data/test/dummy/config/locales/en.yml +23 -0
  49. data/test/dummy/config/routes.rb +4 -0
  50. data/test/dummy/config/secrets.yml +22 -0
  51. data/test/dummy/db/schema.rb +24 -0
  52. data/test/dummy/public/404.html +67 -0
  53. data/test/dummy/public/422.html +67 -0
  54. data/test/dummy/public/500.html +66 -0
  55. data/test/dummy/public/favicon.ico +0 -0
  56. data/test/integration/navigation_test.rb +8 -0
  57. data/test/test_helper.rb +22 -0
  58. metadata +181 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7776940b9f7bcb2d542e4993240fd10c18ff2024
4
+ data.tar.gz: d6579b4134a9a4ce5ec98cf335db1d11ab1b7038
5
+ SHA512:
6
+ metadata.gz: 8d4924f70f27bb9a4809c2c56034fcaba0013b50925e8b204391034fa0cb88d5c2a3dc6de0fc365b8da8db5c81f45458c5b87b34ed058ab037dcc604d4f4fe0b
7
+ data.tar.gz: f5774605515bc18a2b57c4c794e1a0d050026dd96d16d13e7eb8683492199a48032281995a773f8b937b8e9c90ae0514fd304759b4a27e830629caefd59e19c4
@@ -0,0 +1,20 @@
1
+ Copyright 2017 Eduardo H. Bogoni
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,3 @@
1
+ = Aranha
2
+
3
+ Rails utilities for web crawling.
@@ -0,0 +1,37 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'Aranha'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+ APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
18
+ load 'rails/tasks/engine.rake'
19
+
20
+
21
+ load 'rails/tasks/statistics.rake'
22
+
23
+
24
+
25
+ Bundler::GemHelper.install_tasks
26
+
27
+ require 'rake/testtask'
28
+
29
+ Rake::TestTask.new(:test) do |t|
30
+ t.libs << 'lib'
31
+ t.libs << 'test'
32
+ t.pattern = 'test/**/*_test.rb'
33
+ t.verbose = false
34
+ end
35
+
36
+
37
+ task default: :test
@@ -0,0 +1,14 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
14
+ //= require active_scaffold
@@ -0,0 +1,16 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
+ * compiled file so the styles you add here take precedence over styles defined in any styles
10
+ * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
+ * file per style scope.
12
+ *
13
+ *= require_tree .
14
+ *= require_self
15
+ *= require active_scaffold
16
+ */
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ require_dependency 'aranha/application_controller'
3
+
4
+ module Aranha
5
+ class AddressesController < ApplicationController
6
+ active_scaffold :'aranha/address' do |_conf|
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,5 @@
1
+ module Aranha
2
+ class ApplicationController < ActionController::Base
3
+ protect_from_forgery with: :exception
4
+ end
5
+ end
@@ -0,0 +1,4 @@
1
+ module Aranha
2
+ module ApplicationHelper
3
+ end
4
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ class Address < ActiveRecord::Base
4
+ include ::Eac::InequalityQueries
5
+
6
+ add_inequality_queries(:created_at)
7
+
8
+ class << self
9
+ def set_start_point(url, processor)
10
+ start_points[url] = processor
11
+ end
12
+
13
+ def add_start_points
14
+ start_points.each do |url, processor|
15
+ add(url, processor)
16
+ end
17
+ end
18
+
19
+ def add(url, processor)
20
+ a = find_or_initialize_by(url: url)
21
+ a.processor = processor
22
+ a.save!
23
+ end
24
+
25
+ def clear_expired
26
+ q = by_created_at_lt(Time.zone.now - 12.hours)
27
+ Rails.logger.info("Addresses expired: #{q.count}")
28
+ q.destroy_all
29
+ end
30
+
31
+ private
32
+
33
+ def start_points
34
+ @start_points ||= {}
35
+ end
36
+ end
37
+
38
+ validates :url, presence: true, uniqueness: true
39
+ validates :processor, presence: true
40
+
41
+ scope :unprocessed, lambda {
42
+ where(processed_at: nil)
43
+ }
44
+
45
+ def to_s
46
+ "#{processor}|#{url}"
47
+ end
48
+
49
+ def process
50
+ ActiveRecord::Base.transaction do
51
+ processor.constantize.new(url).process
52
+ self.processed_at = Time.zone.now
53
+ save!
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Aranha</title>
5
+ <%= stylesheet_link_tag "aranha/application", media: "all" %>
6
+ <%= javascript_include_tag "aranha/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ Aranha::Engine.routes.draw do
2
+ resources(:addresses) { as_routes }
3
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+ class CreateAranhaAddresses < ActiveRecord::Migration
3
+ def change
4
+ create_table :aranha_addresses do |t|
5
+ t.string :url
6
+ t.string :processor
7
+ t.timestamp :processed_at
8
+
9
+ t.timestamps null: false
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+ require 'active_support/dependencies'
3
+ require_dependency 'aranha/engine'
4
+ require_dependency 'active_scaffold'
5
+
6
+ module Aranha
7
+ end
8
+
9
+ require_dependency 'aranha/processor'
@@ -0,0 +1,11 @@
1
+ module Aranha
2
+ class Engine < ::Rails::Engine
3
+ isolate_namespace Aranha
4
+
5
+ initializer :append_migrations do |app|
6
+ config.paths['db/migrate'].expanded.each do |expanded_path|
7
+ app.config.paths['db/migrate'] << expanded_path
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ class Processor
4
+ def initialize
5
+ ::Aranha::Address.clear_expired
6
+ ::Aranha::Address.add_start_points
7
+ loop do
8
+ a = ::Aranha::Address.unprocessed.first
9
+ break unless a
10
+ Rails.logger.info("Processing #{a}")
11
+ a.process
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+ module Aranha
3
+ VERSION = '0.0.1'
4
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+ namespace(:aranha) do
3
+ task process: :environment do
4
+ ::Aranha::Processor.new
5
+ end
6
+ end
@@ -0,0 +1,7 @@
1
+ require 'test_helper'
2
+
3
+ class AranhaTest < ActiveSupport::TestCase
4
+ test "truth" do
5
+ assert_kind_of Module, Aranha
6
+ end
7
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.
@@ -0,0 +1,6 @@
1
+ # Add your own tasks in files placed in lib/tasks ending in .rake,
2
+ # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
+
4
+ require File.expand_path('../config/application', __FILE__)
5
+
6
+ Rails.application.load_tasks
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,15 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
+ * compiled file so the styles you add here take precedence over styles defined in any styles
10
+ * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
+ * file per style scope.
12
+ *
13
+ *= require_tree .
14
+ *= require_self
15
+ */
@@ -0,0 +1,5 @@
1
+ class ApplicationController < ActionController::Base
2
+ # Prevent CSRF attacks by raising an exception.
3
+ # For APIs, you may want to use :null_session instead.
4
+ protect_from_forgery with: :exception
5
+ end
@@ -0,0 +1,2 @@
1
+ module ApplicationHelper
2
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag 'application', media: 'all', 'data-turbolinks-track' => true %>
6
+ <%= javascript_include_tag 'application', 'data-turbolinks-track' => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
3
+ load Gem.bin_path('bundler', 'bundle')
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ APP_PATH = File.expand_path('../../config/application', __FILE__)
3
+ require_relative '../config/boot'
4
+ require 'rails/commands'
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../config/boot'
3
+ require 'rake'
4
+ Rake.application.run
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pathname'
3
+
4
+ # path to your application root.
5
+ APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
6
+
7
+ Dir.chdir APP_ROOT do
8
+ # This script is a starting point to setup your application.
9
+ # Add necessary setup steps to this file:
10
+
11
+ puts "== Installing dependencies =="
12
+ system "gem install bundler --conservative"
13
+ system "bundle check || bundle install"
14
+
15
+ # puts "\n== Copying sample files =="
16
+ # unless File.exist?("config/database.yml")
17
+ # system "cp config/database.yml.sample config/database.yml"
18
+ # end
19
+
20
+ puts "\n== Preparing database =="
21
+ system "bin/rake db:setup"
22
+
23
+ puts "\n== Removing old logs and tempfiles =="
24
+ system "rm -f log/*"
25
+ system "rm -rf tmp/cache"
26
+
27
+ puts "\n== Restarting application server =="
28
+ system "touch tmp/restart.txt"
29
+ end
@@ -0,0 +1,4 @@
1
+ # This file is used by Rack-based servers to start the application.
2
+
3
+ require ::File.expand_path('../config/environment', __FILE__)
4
+ run Rails.application
@@ -0,0 +1,26 @@
1
+ require File.expand_path('../boot', __FILE__)
2
+
3
+ require 'rails/all'
4
+
5
+ Bundler.require(*Rails.groups)
6
+ require "aranha"
7
+
8
+ module Dummy
9
+ class Application < Rails::Application
10
+ # Settings in config/environments/* take precedence over those specified here.
11
+ # Application configuration should go into files in config/initializers
12
+ # -- all .rb files in that directory are automatically loaded.
13
+
14
+ # Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
15
+ # Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
16
+ # config.time_zone = 'Central Time (US & Canada)'
17
+
18
+ # The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
19
+ # config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
20
+ # config.i18n.default_locale = :de
21
+
22
+ # Do not swallow errors in after_commit/after_rollback callbacks.
23
+ config.active_record.raise_in_transactional_callbacks = true
24
+ end
25
+ end
26
+