pageflow-chart 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +21 -0
  3. data/CHANGELOG.md +7 -0
  4. data/Gemfile +10 -0
  5. data/README.md +91 -0
  6. data/Rakefile +20 -0
  7. data/app/assets/images/pageflow/chart/fs_close_sprite.png +0 -0
  8. data/app/assets/images/pageflow/chart_pictogram.png +0 -0
  9. data/app/assets/images/pageflow/chart_pictogram_small.png +0 -0
  10. data/app/assets/images/pageflow/chart_sprite.png +0 -0
  11. data/app/assets/images/pageflow/ov-chart.png +0 -0
  12. data/app/assets/javascripts/pageflow/chart.js +5 -0
  13. data/app/assets/javascripts/pageflow/chart/asset_urls.js.erb +3 -0
  14. data/app/assets/javascripts/pageflow/chart/editor.js +9 -0
  15. data/app/assets/javascripts/pageflow/chart/editor/collections/scraped_sites_collection.js +23 -0
  16. data/app/assets/javascripts/pageflow/chart/editor/initializers/setup_collections.js +1 -0
  17. data/app/assets/javascripts/pageflow/chart/editor/models/scraped_site.js +55 -0
  18. data/app/assets/javascripts/pageflow/chart/editor/templates/scraped_site_status.jst.ejs +2 -0
  19. data/app/assets/javascripts/pageflow/chart/editor/templates/url_input.jst.ejs +7 -0
  20. data/app/assets/javascripts/pageflow/chart/editor/views/configuration_editor.js +26 -0
  21. data/app/assets/javascripts/pageflow/chart/editor/views/embedded/iframe_embedded_view.js +47 -0
  22. data/app/assets/javascripts/pageflow/chart/editor/views/inputs/scraped_url_input_view.js +49 -0
  23. data/app/assets/javascripts/pageflow/chart/editor/views/scraped_site_status_view.js +18 -0
  24. data/app/assets/javascripts/pageflow/chart/page_type.js +152 -0
  25. data/app/assets/stylesheets/pageflow/chart.css.scss +130 -0
  26. data/app/assets/stylesheets/pageflow/chart/custom.css.scss +209 -0
  27. data/app/assets/stylesheets/pageflow/chart/editor.css.scss +17 -0
  28. data/app/assets/stylesheets/pageflow/chart/themes/default.css.scss +10 -0
  29. data/app/controllers/pageflow/chart/application_controller.rb +6 -0
  30. data/app/controllers/pageflow/chart/scraped_sites_controller.rb +25 -0
  31. data/app/helpers/pageflow/chart/scraped_sites_helper.rb +13 -0
  32. data/app/jobs/pageflow/chart/scrape_site_job.rb +59 -0
  33. data/app/models/pageflow/chart/scraped_site.rb +51 -0
  34. data/app/views/pageflow/chart/page.html +41 -0
  35. data/app/views/pageflow/chart/page_type.json.jbuilder +2 -0
  36. data/bin/rails +8 -0
  37. data/chart.gemspec +30 -0
  38. data/config/locales/de.yml +40 -0
  39. data/config/locales/en.yml +22 -0
  40. data/config/routes.rb +3 -0
  41. data/db/migrate/20140417112724_create_pageflow_chart_scraped_sites.rb +14 -0
  42. data/lib/pageflow/chart.rb +21 -0
  43. data/lib/pageflow/chart/configuration.rb +63 -0
  44. data/lib/pageflow/chart/downloader.rb +53 -0
  45. data/lib/pageflow/chart/engine.rb +17 -0
  46. data/lib/pageflow/chart/page_type.rb +15 -0
  47. data/lib/pageflow/chart/scraper.rb +107 -0
  48. data/spec/controllers/pageflow/chart/scraped_sites_controller_spec.rb +35 -0
  49. data/spec/dummy/README.rdoc +28 -0
  50. data/spec/dummy/Rakefile +6 -0
  51. data/spec/dummy/app/assets/images/.keep +0 -0
  52. data/spec/dummy/app/assets/javascripts/application.js +13 -0
  53. data/spec/dummy/app/assets/stylesheets/application.css +13 -0
  54. data/spec/dummy/app/controllers/application_controller.rb +5 -0
  55. data/spec/dummy/app/controllers/concerns/.keep +0 -0
  56. data/spec/dummy/app/helpers/application_helper.rb +2 -0
  57. data/spec/dummy/app/mailers/.keep +0 -0
  58. data/spec/dummy/app/models/.keep +0 -0
  59. data/spec/dummy/app/models/concerns/.keep +0 -0
  60. data/spec/dummy/app/views/layouts/application.html.erb +14 -0
  61. data/spec/dummy/bin/bundle +3 -0
  62. data/spec/dummy/bin/rails +4 -0
  63. data/spec/dummy/bin/rake +4 -0
  64. data/spec/dummy/config.ru +4 -0
  65. data/spec/dummy/config/application.rb +22 -0
  66. data/spec/dummy/config/boot.rb +5 -0
  67. data/spec/dummy/config/database.yml +25 -0
  68. data/spec/dummy/config/environment.rb +5 -0
  69. data/spec/dummy/config/environments/development.rb +29 -0
  70. data/spec/dummy/config/environments/production.rb +80 -0
  71. data/spec/dummy/config/environments/test.rb +36 -0
  72. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  73. data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  74. data/spec/dummy/config/initializers/inflections.rb +16 -0
  75. data/spec/dummy/config/initializers/mime_types.rb +5 -0
  76. data/spec/dummy/config/initializers/secret_token.rb +12 -0
  77. data/spec/dummy/config/initializers/session_store.rb +3 -0
  78. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  79. data/spec/dummy/config/locales/en.yml +23 -0
  80. data/spec/dummy/config/routes.rb +4 -0
  81. data/spec/dummy/db/schema.rb +39 -0
  82. data/spec/dummy/lib/assets/.keep +0 -0
  83. data/spec/dummy/public/404.html +58 -0
  84. data/spec/dummy/public/422.html +58 -0
  85. data/spec/dummy/public/500.html +57 -0
  86. data/spec/dummy/public/favicon.ico +0 -0
  87. data/spec/factories/scraped_sites.rb +5 -0
  88. data/spec/fixtures/datawrapper.html +121 -0
  89. data/spec/jobs/pageflow/chart/scrape_site_job_spec.rb +22 -0
  90. data/spec/models/pageflow/chart/scraped_site_spec.rb +19 -0
  91. data/spec/pageflow/chart/downloader_spec.rb +90 -0
  92. data/spec/pageflow/chart/scraper_spec.rb +179 -0
  93. data/spec/requests/scraping_site_spec.rb +23 -0
  94. data/spec/spec_helper.rb +20 -0
  95. data/spec/support/factory_girl.rb +5 -0
  96. data/spec/support/html_fragment.rb +13 -0
  97. data/spec/support/paperclip.rb +11 -0
  98. data/spec/support/resque.rb +20 -0
  99. data/spec/support/webmock.rb +11 -0
  100. metadata +363 -0
@@ -0,0 +1,53 @@
1
+ require 'uri'
2
+ require 'open-uri'
3
+
4
+ module Pageflow
5
+ module Chart
6
+ class Downloader
7
+ attr_reader :options
8
+
9
+ def initialize(options = {})
10
+ @options = options
11
+ end
12
+
13
+ def load(url)
14
+ file = open(make_absolute(url))
15
+
16
+ begin
17
+ yield(file)
18
+ ensure
19
+ file.close
20
+ end
21
+ end
22
+
23
+ def load_all(urls, options = {})
24
+ file = Tempfile.new(['concatenation', options.fetch(:extension, 'txt')])
25
+ file.binmode
26
+
27
+ begin
28
+ urls.map do |url|
29
+ load(url) do |source|
30
+ while data = source.read(16 * 1024)
31
+ file.write(data)
32
+ end
33
+ end
34
+
35
+ file.write(options.fetch(:separator, "\n"))
36
+ end
37
+
38
+ file.rewind
39
+ yield(file)
40
+ ensure
41
+ file.close
42
+ file.unlink
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def make_absolute(url)
49
+ options[:base_url] ? URI.join(options[:base_url], url) : URI.parse(url)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,17 @@
1
+ module Pageflow
2
+ module Chart
3
+ class Engine < Rails::Engine
4
+ isolate_namespace Pageflow::Chart
5
+
6
+ config.autoload_paths << File.join(config.root, 'lib')
7
+ config.assets.precompile += ['pageflow/chart/custom.css']
8
+
9
+ config.generators do |g|
10
+ g.test_framework :rspec,:fixture => false
11
+ g.fixture_replacement :factory_girl, :dir => 'spec/factories'
12
+ g.assets false
13
+ g.helper false
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module Pageflow
2
+ module Chart
3
+ class PageType < Pageflow::PageType
4
+ name 'chart'
5
+
6
+ def view_helpers
7
+ [ScrapedSitesHelper]
8
+ end
9
+
10
+ def json_seed_template
11
+ 'pageflow/chart/page_type.json.jbuilder'
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,107 @@
1
+ require 'nokogiri'
2
+
3
+ module Pageflow
4
+ module Chart
5
+ class Scraper
6
+ attr_reader :document, :options, :javascript_urls, :stylesheet_urls
7
+
8
+ def initialize(html, options = {})
9
+ @document = Nokogiri::HTML(html)
10
+ @options = options
11
+
12
+ parse
13
+ rewrite
14
+ end
15
+
16
+ def html
17
+ document.to_s
18
+ end
19
+
20
+ def csv_url
21
+ end
22
+
23
+ private
24
+
25
+ def parse
26
+ parse_javascript_urls
27
+ parse_stylesheet_urls
28
+ end
29
+
30
+ def parse_javascript_urls
31
+ @javascript_urls = filtered_script_tags_in_head.map do |tag|
32
+ tag[:src]
33
+ end
34
+ end
35
+
36
+ def parse_stylesheet_urls
37
+ @stylesheet_urls = css_link_tags.map do |tag|
38
+ tag[:href]
39
+ end
40
+ end
41
+
42
+ def rewrite
43
+ filter_inline_scripts
44
+ filter_by_selectors
45
+ combine_script_tags_in_head
46
+ combine_css_link_tags
47
+ end
48
+
49
+ def filter_inline_scripts
50
+ document.css('body script').each do |tag|
51
+ if blacklisted_inline_script?(tag)
52
+ tag.remove
53
+ end
54
+ end
55
+ end
56
+
57
+ def blacklisted_inline_script?(tag)
58
+ options.fetch(:inline_script_blacklist, []).any? do |r|
59
+ tag.content =~ r
60
+ end
61
+ end
62
+
63
+ def filter_by_selectors
64
+ options.fetch(:selector_blacklist, []).each do |selector|
65
+ document.css(selector).each(&:remove)
66
+ end
67
+ end
68
+
69
+ def combine_script_tags_in_head
70
+ script_tags_in_head.each(&:remove)
71
+
72
+ all_script_tag = Nokogiri::XML::Node.new('script', document)
73
+ all_script_tag[:src] = 'all.js'
74
+ all_script_tag[:type] = 'text/javascript'
75
+ document.at_css('head') << all_script_tag
76
+ end
77
+
78
+ def combine_css_link_tags
79
+ css_link_tags.each(&:remove)
80
+
81
+ all_css_link_tag = Nokogiri::XML::Node.new('link', document)
82
+ all_css_link_tag[:href] = 'all.css'
83
+ all_css_link_tag[:type] = 'text/css'
84
+ all_css_link_tag[:rel] = 'stylesheet'
85
+ document.at_css('head') << all_css_link_tag
86
+ end
87
+
88
+ def filtered_script_tags_in_head
89
+ script_tags_in_head.reject do |tag|
90
+ options.fetch(:head_script_blacklist, []).any? do |regexp|
91
+ tag[:src] =~ regexp
92
+ end
93
+ end
94
+ end
95
+
96
+ def script_tags_in_head
97
+ document.css('head script[src]')
98
+ end
99
+
100
+ def css_link_tags
101
+ document.css('head link').find_all do |tag|
102
+ tag[:type] == 'text/css'
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ module Pageflow
4
+ module Chart
5
+ describe ScrapedSitesController do
6
+ describe '#create' do
7
+ routes { Pageflow::Chart::Engine.routes }
8
+
9
+ it 'responds with success' do
10
+ post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
11
+
12
+ expect(response.status).to eq(201)
13
+ end
14
+
15
+ it 'creates scraped site' do
16
+ expect {
17
+ post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
18
+ }.to change { ScrapedSite.count }
19
+ end
20
+ end
21
+
22
+ describe '#show' do
23
+ routes { Pageflow::Chart::Engine.routes }
24
+
25
+ it 'responds with success' do
26
+ scraped_site = create(:scraped_site, state: 'unprocessed')
27
+
28
+ get(:show, id: scraped_site.id, format: 'json')
29
+
30
+ expect(response.status).to eq(200)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.
@@ -0,0 +1,6 @@
1
+ # Add your own tasks in files placed in lib/tasks ending in .rake,
2
+ # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
+
4
+ require File.expand_path('../config/application', __FILE__)
5
+
6
+ Dummy::Application.load_tasks
File without changes
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or vendor/assets/javascripts of plugins, if any, can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/sstephenson/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,13 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or vendor/assets/stylesheets of plugins, if any, can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the top of the
9
+ * compiled file, but it's generally better to create a new file per style scope.
10
+ *
11
+ *= require_self
12
+ *= require_tree .
13
+ */
@@ -0,0 +1,5 @@
1
+ class ApplicationController < ActionController::Base
2
+ # Prevent CSRF attacks by raising an exception.
3
+ # For APIs, you may want to use :null_session instead.
4
+ protect_from_forgery with: :exception
5
+ end
File without changes
@@ -0,0 +1,2 @@
1
+ module ApplicationHelper
2
+ end
File without changes
File without changes
File without changes
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag "application", media: "all", "data-turbolinks-track" => true %>
6
+ <%= javascript_include_tag "application", "data-turbolinks-track" => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
3
+ load Gem.bin_path('bundler', 'bundle')
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ APP_PATH = File.expand_path('../../config/application', __FILE__)
3
+ require_relative '../config/boot'
4
+ require 'rails/commands'
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../config/boot'
3
+ require 'rake'
4
+ Rake.application.run
@@ -0,0 +1,4 @@
1
+ # This file is used by Rack-based servers to start the application.
2
+
3
+ require ::File.expand_path('../config/environment', __FILE__)
4
+ run Rails.application
@@ -0,0 +1,22 @@
1
+ require File.expand_path('../boot', __FILE__)
2
+
3
+ require 'rails/all'
4
+
5
+ Bundler.require(*Rails.groups)
6
+ require "pageflow/chart"
7
+
8
+ module Dummy
9
+ class Application < Rails::Application
10
+ # Settings in config/environments/* take precedence over those specified here.
11
+ # Application configuration should go into files in config/initializers
12
+ # -- all .rb files in that directory are automatically loaded.
13
+
14
+ # Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
15
+ # Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
16
+ # config.time_zone = 'Central Time (US & Canada)'
17
+
18
+ # The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
19
+ # config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
20
+ # config.i18n.default_locale = :de
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ # Set up gems listed in the Gemfile.
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../../../Gemfile', __FILE__)
3
+
4
+ require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE'])
5
+ $LOAD_PATH.unshift File.expand_path('../../../../lib', __FILE__)
@@ -0,0 +1,25 @@
1
+ # SQLite version 3.x
2
+ # gem install sqlite3
3
+ #
4
+ # Ensure the SQLite 3 gem is defined in your Gemfile
5
+ # gem 'sqlite3'
6
+ development:
7
+ adapter: sqlite3
8
+ database: db/development.sqlite3
9
+ pool: 5
10
+ timeout: 5000
11
+
12
+ # Warning: The database defined as "test" will be erased and
13
+ # re-generated from your development database when you run "rake".
14
+ # Do not set this db to the same as development or production.
15
+ test:
16
+ adapter: sqlite3
17
+ database: db/test.sqlite3
18
+ pool: 5
19
+ timeout: 5000
20
+
21
+ production:
22
+ adapter: sqlite3
23
+ database: db/production.sqlite3
24
+ pool: 5
25
+ timeout: 5000
@@ -0,0 +1,5 @@
1
+ # Load the Rails application.
2
+ require File.expand_path('../application', __FILE__)
3
+
4
+ # Initialize the Rails application.
5
+ Dummy::Application.initialize!
@@ -0,0 +1,29 @@
1
+ Dummy::Application.configure do
2
+ # Settings specified here will take precedence over those in config/application.rb.
3
+
4
+ # In the development environment your application's code is reloaded on
5
+ # every request. This slows down response time but is perfect for development
6
+ # since you don't have to restart the web server when you make code changes.
7
+ config.cache_classes = false
8
+
9
+ # Do not eager load code on boot.
10
+ config.eager_load = false
11
+
12
+ # Show full error reports and disable caching.
13
+ config.consider_all_requests_local = true
14
+ config.action_controller.perform_caching = false
15
+
16
+ # Don't care if the mailer can't send.
17
+ # config.action_mailer.raise_delivery_errors = false
18
+
19
+ # Print deprecation notices to the Rails logger.
20
+ config.active_support.deprecation = :log
21
+
22
+ # Raise an error on page load if there are pending migrations
23
+ config.active_record.migration_error = :page_load
24
+
25
+ # Debug mode disables concatenation and preprocessing of assets.
26
+ # This option may cause significant delays in view rendering with a large
27
+ # number of complex assets.
28
+ config.assets.debug = true
29
+ end