pageflow-chart 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +15 -0
  2. data/.gitignore +21 -0
  3. data/CHANGELOG.md +7 -0
  4. data/Gemfile +10 -0
  5. data/README.md +91 -0
  6. data/Rakefile +20 -0
  7. data/app/assets/images/pageflow/chart/fs_close_sprite.png +0 -0
  8. data/app/assets/images/pageflow/chart_pictogram.png +0 -0
  9. data/app/assets/images/pageflow/chart_pictogram_small.png +0 -0
  10. data/app/assets/images/pageflow/chart_sprite.png +0 -0
  11. data/app/assets/images/pageflow/ov-chart.png +0 -0
  12. data/app/assets/javascripts/pageflow/chart.js +5 -0
  13. data/app/assets/javascripts/pageflow/chart/asset_urls.js.erb +3 -0
  14. data/app/assets/javascripts/pageflow/chart/editor.js +9 -0
  15. data/app/assets/javascripts/pageflow/chart/editor/collections/scraped_sites_collection.js +23 -0
  16. data/app/assets/javascripts/pageflow/chart/editor/initializers/setup_collections.js +1 -0
  17. data/app/assets/javascripts/pageflow/chart/editor/models/scraped_site.js +55 -0
  18. data/app/assets/javascripts/pageflow/chart/editor/templates/scraped_site_status.jst.ejs +2 -0
  19. data/app/assets/javascripts/pageflow/chart/editor/templates/url_input.jst.ejs +7 -0
  20. data/app/assets/javascripts/pageflow/chart/editor/views/configuration_editor.js +26 -0
  21. data/app/assets/javascripts/pageflow/chart/editor/views/embedded/iframe_embedded_view.js +47 -0
  22. data/app/assets/javascripts/pageflow/chart/editor/views/inputs/scraped_url_input_view.js +49 -0
  23. data/app/assets/javascripts/pageflow/chart/editor/views/scraped_site_status_view.js +18 -0
  24. data/app/assets/javascripts/pageflow/chart/page_type.js +152 -0
  25. data/app/assets/stylesheets/pageflow/chart.css.scss +130 -0
  26. data/app/assets/stylesheets/pageflow/chart/custom.css.scss +209 -0
  27. data/app/assets/stylesheets/pageflow/chart/editor.css.scss +17 -0
  28. data/app/assets/stylesheets/pageflow/chart/themes/default.css.scss +10 -0
  29. data/app/controllers/pageflow/chart/application_controller.rb +6 -0
  30. data/app/controllers/pageflow/chart/scraped_sites_controller.rb +25 -0
  31. data/app/helpers/pageflow/chart/scraped_sites_helper.rb +13 -0
  32. data/app/jobs/pageflow/chart/scrape_site_job.rb +59 -0
  33. data/app/models/pageflow/chart/scraped_site.rb +51 -0
  34. data/app/views/pageflow/chart/page.html +41 -0
  35. data/app/views/pageflow/chart/page_type.json.jbuilder +2 -0
  36. data/bin/rails +8 -0
  37. data/chart.gemspec +30 -0
  38. data/config/locales/de.yml +40 -0
  39. data/config/locales/en.yml +22 -0
  40. data/config/routes.rb +3 -0
  41. data/db/migrate/20140417112724_create_pageflow_chart_scraped_sites.rb +14 -0
  42. data/lib/pageflow/chart.rb +21 -0
  43. data/lib/pageflow/chart/configuration.rb +63 -0
  44. data/lib/pageflow/chart/downloader.rb +53 -0
  45. data/lib/pageflow/chart/engine.rb +17 -0
  46. data/lib/pageflow/chart/page_type.rb +15 -0
  47. data/lib/pageflow/chart/scraper.rb +107 -0
  48. data/spec/controllers/pageflow/chart/scraped_sites_controller_spec.rb +35 -0
  49. data/spec/dummy/README.rdoc +28 -0
  50. data/spec/dummy/Rakefile +6 -0
  51. data/spec/dummy/app/assets/images/.keep +0 -0
  52. data/spec/dummy/app/assets/javascripts/application.js +13 -0
  53. data/spec/dummy/app/assets/stylesheets/application.css +13 -0
  54. data/spec/dummy/app/controllers/application_controller.rb +5 -0
  55. data/spec/dummy/app/controllers/concerns/.keep +0 -0
  56. data/spec/dummy/app/helpers/application_helper.rb +2 -0
  57. data/spec/dummy/app/mailers/.keep +0 -0
  58. data/spec/dummy/app/models/.keep +0 -0
  59. data/spec/dummy/app/models/concerns/.keep +0 -0
  60. data/spec/dummy/app/views/layouts/application.html.erb +14 -0
  61. data/spec/dummy/bin/bundle +3 -0
  62. data/spec/dummy/bin/rails +4 -0
  63. data/spec/dummy/bin/rake +4 -0
  64. data/spec/dummy/config.ru +4 -0
  65. data/spec/dummy/config/application.rb +22 -0
  66. data/spec/dummy/config/boot.rb +5 -0
  67. data/spec/dummy/config/database.yml +25 -0
  68. data/spec/dummy/config/environment.rb +5 -0
  69. data/spec/dummy/config/environments/development.rb +29 -0
  70. data/spec/dummy/config/environments/production.rb +80 -0
  71. data/spec/dummy/config/environments/test.rb +36 -0
  72. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  73. data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  74. data/spec/dummy/config/initializers/inflections.rb +16 -0
  75. data/spec/dummy/config/initializers/mime_types.rb +5 -0
  76. data/spec/dummy/config/initializers/secret_token.rb +12 -0
  77. data/spec/dummy/config/initializers/session_store.rb +3 -0
  78. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  79. data/spec/dummy/config/locales/en.yml +23 -0
  80. data/spec/dummy/config/routes.rb +4 -0
  81. data/spec/dummy/db/schema.rb +39 -0
  82. data/spec/dummy/lib/assets/.keep +0 -0
  83. data/spec/dummy/public/404.html +58 -0
  84. data/spec/dummy/public/422.html +58 -0
  85. data/spec/dummy/public/500.html +57 -0
  86. data/spec/dummy/public/favicon.ico +0 -0
  87. data/spec/factories/scraped_sites.rb +5 -0
  88. data/spec/fixtures/datawrapper.html +121 -0
  89. data/spec/jobs/pageflow/chart/scrape_site_job_spec.rb +22 -0
  90. data/spec/models/pageflow/chart/scraped_site_spec.rb +19 -0
  91. data/spec/pageflow/chart/downloader_spec.rb +90 -0
  92. data/spec/pageflow/chart/scraper_spec.rb +179 -0
  93. data/spec/requests/scraping_site_spec.rb +23 -0
  94. data/spec/spec_helper.rb +20 -0
  95. data/spec/support/factory_girl.rb +5 -0
  96. data/spec/support/html_fragment.rb +13 -0
  97. data/spec/support/paperclip.rb +11 -0
  98. data/spec/support/resque.rb +20 -0
  99. data/spec/support/webmock.rb +11 -0
  100. metadata +363 -0
@@ -0,0 +1,53 @@
1
+ require 'uri'
2
+ require 'open-uri'
3
+
4
+ module Pageflow
5
+ module Chart
6
+ class Downloader
7
+ attr_reader :options
8
+
9
+ def initialize(options = {})
10
+ @options = options
11
+ end
12
+
13
+ def load(url)
14
+ file = open(make_absolute(url))
15
+
16
+ begin
17
+ yield(file)
18
+ ensure
19
+ file.close
20
+ end
21
+ end
22
+
23
+ def load_all(urls, options = {})
24
+ file = Tempfile.new(['concatenation', options.fetch(:extension, 'txt')])
25
+ file.binmode
26
+
27
+ begin
28
+ urls.map do |url|
29
+ load(url) do |source|
30
+ while data = source.read(16 * 1024)
31
+ file.write(data)
32
+ end
33
+ end
34
+
35
+ file.write(options.fetch(:separator, "\n"))
36
+ end
37
+
38
+ file.rewind
39
+ yield(file)
40
+ ensure
41
+ file.close
42
+ file.unlink
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def make_absolute(url)
49
+ options[:base_url] ? URI.join(options[:base_url], url) : URI.parse(url)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,17 @@
1
+ module Pageflow
2
+ module Chart
3
+ class Engine < Rails::Engine
4
+ isolate_namespace Pageflow::Chart
5
+
6
+ config.autoload_paths << File.join(config.root, 'lib')
7
+ config.assets.precompile += ['pageflow/chart/custom.css']
8
+
9
+ config.generators do |g|
10
+ g.test_framework :rspec,:fixture => false
11
+ g.fixture_replacement :factory_girl, :dir => 'spec/factories'
12
+ g.assets false
13
+ g.helper false
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module Pageflow
2
+ module Chart
3
+ class PageType < Pageflow::PageType
4
+ name 'chart'
5
+
6
+ def view_helpers
7
+ [ScrapedSitesHelper]
8
+ end
9
+
10
+ def json_seed_template
11
+ 'pageflow/chart/page_type.json.jbuilder'
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,107 @@
1
+ require 'nokogiri'
2
+
3
+ module Pageflow
4
+ module Chart
5
+ class Scraper
6
+ attr_reader :document, :options, :javascript_urls, :stylesheet_urls
7
+
8
+ def initialize(html, options = {})
9
+ @document = Nokogiri::HTML(html)
10
+ @options = options
11
+
12
+ parse
13
+ rewrite
14
+ end
15
+
16
+ def html
17
+ document.to_s
18
+ end
19
+
20
+ def csv_url
21
+ end
22
+
23
+ private
24
+
25
+ def parse
26
+ parse_javascript_urls
27
+ parse_stylesheet_urls
28
+ end
29
+
30
+ def parse_javascript_urls
31
+ @javascript_urls = filtered_script_tags_in_head.map do |tag|
32
+ tag[:src]
33
+ end
34
+ end
35
+
36
+ def parse_stylesheet_urls
37
+ @stylesheet_urls = css_link_tags.map do |tag|
38
+ tag[:href]
39
+ end
40
+ end
41
+
42
+ def rewrite
43
+ filter_inline_scripts
44
+ filter_by_selectors
45
+ combine_script_tags_in_head
46
+ combine_css_link_tags
47
+ end
48
+
49
+ def filter_inline_scripts
50
+ document.css('body script').each do |tag|
51
+ if blacklisted_inline_script?(tag)
52
+ tag.remove
53
+ end
54
+ end
55
+ end
56
+
57
+ def blacklisted_inline_script?(tag)
58
+ options.fetch(:inline_script_blacklist, []).any? do |r|
59
+ tag.content =~ r
60
+ end
61
+ end
62
+
63
+ def filter_by_selectors
64
+ options.fetch(:selector_blacklist, []).each do |selector|
65
+ document.css(selector).each(&:remove)
66
+ end
67
+ end
68
+
69
+ def combine_script_tags_in_head
70
+ script_tags_in_head.each(&:remove)
71
+
72
+ all_script_tag = Nokogiri::XML::Node.new('script', document)
73
+ all_script_tag[:src] = 'all.js'
74
+ all_script_tag[:type] = 'text/javascript'
75
+ document.at_css('head') << all_script_tag
76
+ end
77
+
78
+ def combine_css_link_tags
79
+ css_link_tags.each(&:remove)
80
+
81
+ all_css_link_tag = Nokogiri::XML::Node.new('link', document)
82
+ all_css_link_tag[:href] = 'all.css'
83
+ all_css_link_tag[:type] = 'text/css'
84
+ all_css_link_tag[:rel] = 'stylesheet'
85
+ document.at_css('head') << all_css_link_tag
86
+ end
87
+
88
+ def filtered_script_tags_in_head
89
+ script_tags_in_head.reject do |tag|
90
+ options.fetch(:head_script_blacklist, []).any? do |regexp|
91
+ tag[:src] =~ regexp
92
+ end
93
+ end
94
+ end
95
+
96
+ def script_tags_in_head
97
+ document.css('head script[src]')
98
+ end
99
+
100
+ def css_link_tags
101
+ document.css('head link').find_all do |tag|
102
+ tag[:type] == 'text/css'
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ module Pageflow
4
+ module Chart
5
+ describe ScrapedSitesController do
6
+ describe '#create' do
7
+ routes { Pageflow::Chart::Engine.routes }
8
+
9
+ it 'responds with success' do
10
+ post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
11
+
12
+ expect(response.status).to eq(201)
13
+ end
14
+
15
+ it 'creates scraped site' do
16
+ expect {
17
+ post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
18
+ }.to change { ScrapedSite.count }
19
+ end
20
+ end
21
+
22
+ describe '#show' do
23
+ routes { Pageflow::Chart::Engine.routes }
24
+
25
+ it 'responds with success' do
26
+ scraped_site = create(:scraped_site, state: 'unprocessed')
27
+
28
+ get(:show, id: scraped_site.id, format: 'json')
29
+
30
+ expect(response.status).to eq(200)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.
@@ -0,0 +1,6 @@
1
+ # Add your own tasks in files placed in lib/tasks ending in .rake,
2
+ # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
+
4
+ require File.expand_path('../config/application', __FILE__)
5
+
6
+ Dummy::Application.load_tasks
File without changes
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or vendor/assets/javascripts of plugins, if any, can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/sstephenson/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,13 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or vendor/assets/stylesheets of plugins, if any, can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the top of the
9
+ * compiled file, but it's generally better to create a new file per style scope.
10
+ *
11
+ *= require_self
12
+ *= require_tree .
13
+ */
@@ -0,0 +1,5 @@
1
+ class ApplicationController < ActionController::Base
2
+ # Prevent CSRF attacks by raising an exception.
3
+ # For APIs, you may want to use :null_session instead.
4
+ protect_from_forgery with: :exception
5
+ end
File without changes
@@ -0,0 +1,2 @@
1
+ module ApplicationHelper
2
+ end
File without changes
File without changes
File without changes
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag "application", media: "all", "data-turbolinks-track" => true %>
6
+ <%= javascript_include_tag "application", "data-turbolinks-track" => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
3
+ load Gem.bin_path('bundler', 'bundle')
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ APP_PATH = File.expand_path('../../config/application', __FILE__)
3
+ require_relative '../config/boot'
4
+ require 'rails/commands'
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../config/boot'
3
+ require 'rake'
4
+ Rake.application.run
@@ -0,0 +1,4 @@
1
+ # This file is used by Rack-based servers to start the application.
2
+
3
+ require ::File.expand_path('../config/environment', __FILE__)
4
+ run Rails.application
@@ -0,0 +1,22 @@
1
+ require File.expand_path('../boot', __FILE__)
2
+
3
+ require 'rails/all'
4
+
5
+ Bundler.require(*Rails.groups)
6
+ require "pageflow/chart"
7
+
8
+ module Dummy
9
+ class Application < Rails::Application
10
+ # Settings in config/environments/* take precedence over those specified here.
11
+ # Application configuration should go into files in config/initializers
12
+ # -- all .rb files in that directory are automatically loaded.
13
+
14
+ # Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
15
+ # Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
16
+ # config.time_zone = 'Central Time (US & Canada)'
17
+
18
+ # The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
19
+ # config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
20
+ # config.i18n.default_locale = :de
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ # Set up gems listed in the Gemfile.
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../../../Gemfile', __FILE__)
3
+
4
+ require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE'])
5
+ $LOAD_PATH.unshift File.expand_path('../../../../lib', __FILE__)
@@ -0,0 +1,25 @@
1
+ # SQLite version 3.x
2
+ # gem install sqlite3
3
+ #
4
+ # Ensure the SQLite 3 gem is defined in your Gemfile
5
+ # gem 'sqlite3'
6
+ development:
7
+ adapter: sqlite3
8
+ database: db/development.sqlite3
9
+ pool: 5
10
+ timeout: 5000
11
+
12
+ # Warning: The database defined as "test" will be erased and
13
+ # re-generated from your development database when you run "rake".
14
+ # Do not set this db to the same as development or production.
15
+ test:
16
+ adapter: sqlite3
17
+ database: db/test.sqlite3
18
+ pool: 5
19
+ timeout: 5000
20
+
21
+ production:
22
+ adapter: sqlite3
23
+ database: db/production.sqlite3
24
+ pool: 5
25
+ timeout: 5000
@@ -0,0 +1,5 @@
1
+ # Load the Rails application.
2
+ require File.expand_path('../application', __FILE__)
3
+
4
+ # Initialize the Rails application.
5
+ Dummy::Application.initialize!
@@ -0,0 +1,29 @@
1
+ Dummy::Application.configure do
2
+ # Settings specified here will take precedence over those in config/application.rb.
3
+
4
+ # In the development environment your application's code is reloaded on
5
+ # every request. This slows down response time but is perfect for development
6
+ # since you don't have to restart the web server when you make code changes.
7
+ config.cache_classes = false
8
+
9
+ # Do not eager load code on boot.
10
+ config.eager_load = false
11
+
12
+ # Show full error reports and disable caching.
13
+ config.consider_all_requests_local = true
14
+ config.action_controller.perform_caching = false
15
+
16
+ # Don't care if the mailer can't send.
17
+ # config.action_mailer.raise_delivery_errors = false
18
+
19
+ # Print deprecation notices to the Rails logger.
20
+ config.active_support.deprecation = :log
21
+
22
+ # Raise an error on page load if there are pending migrations
23
+ config.active_record.migration_error = :page_load
24
+
25
+ # Debug mode disables concatenation and preprocessing of assets.
26
+ # This option may cause significant delays in view rendering with a large
27
+ # number of complex assets.
28
+ config.assets.debug = true
29
+ end