pageflow-chart 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +21 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +10 -0
- data/README.md +91 -0
- data/Rakefile +20 -0
- data/app/assets/images/pageflow/chart/fs_close_sprite.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram_small.png +0 -0
- data/app/assets/images/pageflow/chart_sprite.png +0 -0
- data/app/assets/images/pageflow/ov-chart.png +0 -0
- data/app/assets/javascripts/pageflow/chart.js +5 -0
- data/app/assets/javascripts/pageflow/chart/asset_urls.js.erb +3 -0
- data/app/assets/javascripts/pageflow/chart/editor.js +9 -0
- data/app/assets/javascripts/pageflow/chart/editor/collections/scraped_sites_collection.js +23 -0
- data/app/assets/javascripts/pageflow/chart/editor/initializers/setup_collections.js +1 -0
- data/app/assets/javascripts/pageflow/chart/editor/models/scraped_site.js +55 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/scraped_site_status.jst.ejs +2 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/url_input.jst.ejs +7 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/configuration_editor.js +26 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/embedded/iframe_embedded_view.js +47 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/inputs/scraped_url_input_view.js +49 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/scraped_site_status_view.js +18 -0
- data/app/assets/javascripts/pageflow/chart/page_type.js +152 -0
- data/app/assets/stylesheets/pageflow/chart.css.scss +130 -0
- data/app/assets/stylesheets/pageflow/chart/custom.css.scss +209 -0
- data/app/assets/stylesheets/pageflow/chart/editor.css.scss +17 -0
- data/app/assets/stylesheets/pageflow/chart/themes/default.css.scss +10 -0
- data/app/controllers/pageflow/chart/application_controller.rb +6 -0
- data/app/controllers/pageflow/chart/scraped_sites_controller.rb +25 -0
- data/app/helpers/pageflow/chart/scraped_sites_helper.rb +13 -0
- data/app/jobs/pageflow/chart/scrape_site_job.rb +59 -0
- data/app/models/pageflow/chart/scraped_site.rb +51 -0
- data/app/views/pageflow/chart/page.html +41 -0
- data/app/views/pageflow/chart/page_type.json.jbuilder +2 -0
- data/bin/rails +8 -0
- data/chart.gemspec +30 -0
- data/config/locales/de.yml +40 -0
- data/config/locales/en.yml +22 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20140417112724_create_pageflow_chart_scraped_sites.rb +14 -0
- data/lib/pageflow/chart.rb +21 -0
- data/lib/pageflow/chart/configuration.rb +63 -0
- data/lib/pageflow/chart/downloader.rb +53 -0
- data/lib/pageflow/chart/engine.rb +17 -0
- data/lib/pageflow/chart/page_type.rb +15 -0
- data/lib/pageflow/chart/scraper.rb +107 -0
- data/spec/controllers/pageflow/chart/scraped_sites_controller_spec.rb +35 -0
- data/spec/dummy/README.rdoc +28 -0
- data/spec/dummy/Rakefile +6 -0
- data/spec/dummy/app/assets/images/.keep +0 -0
- data/spec/dummy/app/assets/javascripts/application.js +13 -0
- data/spec/dummy/app/assets/stylesheets/application.css +13 -0
- data/spec/dummy/app/controllers/application_controller.rb +5 -0
- data/spec/dummy/app/controllers/concerns/.keep +0 -0
- data/spec/dummy/app/helpers/application_helper.rb +2 -0
- data/spec/dummy/app/mailers/.keep +0 -0
- data/spec/dummy/app/models/.keep +0 -0
- data/spec/dummy/app/models/concerns/.keep +0 -0
- data/spec/dummy/app/views/layouts/application.html.erb +14 -0
- data/spec/dummy/bin/bundle +3 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/bin/rake +4 -0
- data/spec/dummy/config.ru +4 -0
- data/spec/dummy/config/application.rb +22 -0
- data/spec/dummy/config/boot.rb +5 -0
- data/spec/dummy/config/database.yml +25 -0
- data/spec/dummy/config/environment.rb +5 -0
- data/spec/dummy/config/environments/development.rb +29 -0
- data/spec/dummy/config/environments/production.rb +80 -0
- data/spec/dummy/config/environments/test.rb +36 -0
- data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/spec/dummy/config/initializers/inflections.rb +16 -0
- data/spec/dummy/config/initializers/mime_types.rb +5 -0
- data/spec/dummy/config/initializers/secret_token.rb +12 -0
- data/spec/dummy/config/initializers/session_store.rb +3 -0
- data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/spec/dummy/config/locales/en.yml +23 -0
- data/spec/dummy/config/routes.rb +4 -0
- data/spec/dummy/db/schema.rb +39 -0
- data/spec/dummy/lib/assets/.keep +0 -0
- data/spec/dummy/public/404.html +58 -0
- data/spec/dummy/public/422.html +58 -0
- data/spec/dummy/public/500.html +57 -0
- data/spec/dummy/public/favicon.ico +0 -0
- data/spec/factories/scraped_sites.rb +5 -0
- data/spec/fixtures/datawrapper.html +121 -0
- data/spec/jobs/pageflow/chart/scrape_site_job_spec.rb +22 -0
- data/spec/models/pageflow/chart/scraped_site_spec.rb +19 -0
- data/spec/pageflow/chart/downloader_spec.rb +90 -0
- data/spec/pageflow/chart/scraper_spec.rb +179 -0
- data/spec/requests/scraping_site_spec.rb +23 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/factory_girl.rb +5 -0
- data/spec/support/html_fragment.rb +13 -0
- data/spec/support/paperclip.rb +11 -0
- data/spec/support/resque.rb +20 -0
- data/spec/support/webmock.rb +11 -0
- metadata +363 -0
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
module Pageflow
|
5
|
+
module Chart
|
6
|
+
class Downloader
|
7
|
+
attr_reader :options
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
def load(url)
|
14
|
+
file = open(make_absolute(url))
|
15
|
+
|
16
|
+
begin
|
17
|
+
yield(file)
|
18
|
+
ensure
|
19
|
+
file.close
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_all(urls, options = {})
|
24
|
+
file = Tempfile.new(['concatenation', options.fetch(:extension, 'txt')])
|
25
|
+
file.binmode
|
26
|
+
|
27
|
+
begin
|
28
|
+
urls.map do |url|
|
29
|
+
load(url) do |source|
|
30
|
+
while data = source.read(16 * 1024)
|
31
|
+
file.write(data)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
file.write(options.fetch(:separator, "\n"))
|
36
|
+
end
|
37
|
+
|
38
|
+
file.rewind
|
39
|
+
yield(file)
|
40
|
+
ensure
|
41
|
+
file.close
|
42
|
+
file.unlink
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def make_absolute(url)
|
49
|
+
options[:base_url] ? URI.join(options[:base_url], url) : URI.parse(url)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Pageflow
|
2
|
+
module Chart
|
3
|
+
class Engine < Rails::Engine
|
4
|
+
isolate_namespace Pageflow::Chart
|
5
|
+
|
6
|
+
config.autoload_paths << File.join(config.root, 'lib')
|
7
|
+
config.assets.precompile += ['pageflow/chart/custom.css']
|
8
|
+
|
9
|
+
config.generators do |g|
|
10
|
+
g.test_framework :rspec,:fixture => false
|
11
|
+
g.fixture_replacement :factory_girl, :dir => 'spec/factories'
|
12
|
+
g.assets false
|
13
|
+
g.helper false
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Pageflow
|
4
|
+
module Chart
|
5
|
+
class Scraper
|
6
|
+
attr_reader :document, :options, :javascript_urls, :stylesheet_urls
|
7
|
+
|
8
|
+
def initialize(html, options = {})
|
9
|
+
@document = Nokogiri::HTML(html)
|
10
|
+
@options = options
|
11
|
+
|
12
|
+
parse
|
13
|
+
rewrite
|
14
|
+
end
|
15
|
+
|
16
|
+
def html
|
17
|
+
document.to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def csv_url
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def parse
|
26
|
+
parse_javascript_urls
|
27
|
+
parse_stylesheet_urls
|
28
|
+
end
|
29
|
+
|
30
|
+
def parse_javascript_urls
|
31
|
+
@javascript_urls = filtered_script_tags_in_head.map do |tag|
|
32
|
+
tag[:src]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_stylesheet_urls
|
37
|
+
@stylesheet_urls = css_link_tags.map do |tag|
|
38
|
+
tag[:href]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def rewrite
|
43
|
+
filter_inline_scripts
|
44
|
+
filter_by_selectors
|
45
|
+
combine_script_tags_in_head
|
46
|
+
combine_css_link_tags
|
47
|
+
end
|
48
|
+
|
49
|
+
def filter_inline_scripts
|
50
|
+
document.css('body script').each do |tag|
|
51
|
+
if blacklisted_inline_script?(tag)
|
52
|
+
tag.remove
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def blacklisted_inline_script?(tag)
|
58
|
+
options.fetch(:inline_script_blacklist, []).any? do |r|
|
59
|
+
tag.content =~ r
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def filter_by_selectors
|
64
|
+
options.fetch(:selector_blacklist, []).each do |selector|
|
65
|
+
document.css(selector).each(&:remove)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def combine_script_tags_in_head
|
70
|
+
script_tags_in_head.each(&:remove)
|
71
|
+
|
72
|
+
all_script_tag = Nokogiri::XML::Node.new('script', document)
|
73
|
+
all_script_tag[:src] = 'all.js'
|
74
|
+
all_script_tag[:type] = 'text/javascript'
|
75
|
+
document.at_css('head') << all_script_tag
|
76
|
+
end
|
77
|
+
|
78
|
+
def combine_css_link_tags
|
79
|
+
css_link_tags.each(&:remove)
|
80
|
+
|
81
|
+
all_css_link_tag = Nokogiri::XML::Node.new('link', document)
|
82
|
+
all_css_link_tag[:href] = 'all.css'
|
83
|
+
all_css_link_tag[:type] = 'text/css'
|
84
|
+
all_css_link_tag[:rel] = 'stylesheet'
|
85
|
+
document.at_css('head') << all_css_link_tag
|
86
|
+
end
|
87
|
+
|
88
|
+
def filtered_script_tags_in_head
|
89
|
+
script_tags_in_head.reject do |tag|
|
90
|
+
options.fetch(:head_script_blacklist, []).any? do |regexp|
|
91
|
+
tag[:src] =~ regexp
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def script_tags_in_head
|
97
|
+
document.css('head script[src]')
|
98
|
+
end
|
99
|
+
|
100
|
+
def css_link_tags
|
101
|
+
document.css('head link').find_all do |tag|
|
102
|
+
tag[:type] == 'text/css'
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Pageflow
|
4
|
+
module Chart
|
5
|
+
describe ScrapedSitesController do
|
6
|
+
describe '#create' do
|
7
|
+
routes { Pageflow::Chart::Engine.routes }
|
8
|
+
|
9
|
+
it 'responds with success' do
|
10
|
+
post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
|
11
|
+
|
12
|
+
expect(response.status).to eq(201)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'creates scraped site' do
|
16
|
+
expect {
|
17
|
+
post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
|
18
|
+
}.to change { ScrapedSite.count }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#show' do
|
23
|
+
routes { Pageflow::Chart::Engine.routes }
|
24
|
+
|
25
|
+
it 'responds with success' do
|
26
|
+
scraped_site = create(:scraped_site, state: 'unprocessed')
|
27
|
+
|
28
|
+
get(:show, id: scraped_site.id, format: 'json')
|
29
|
+
|
30
|
+
expect(response.status).to eq(200)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
== README
|
2
|
+
|
3
|
+
This README would normally document whatever steps are necessary to get the
|
4
|
+
application up and running.
|
5
|
+
|
6
|
+
Things you may want to cover:
|
7
|
+
|
8
|
+
* Ruby version
|
9
|
+
|
10
|
+
* System dependencies
|
11
|
+
|
12
|
+
* Configuration
|
13
|
+
|
14
|
+
* Database creation
|
15
|
+
|
16
|
+
* Database initialization
|
17
|
+
|
18
|
+
* How to run the test suite
|
19
|
+
|
20
|
+
* Services (job queues, cache servers, search engines, etc.)
|
21
|
+
|
22
|
+
* Deployment instructions
|
23
|
+
|
24
|
+
* ...
|
25
|
+
|
26
|
+
|
27
|
+
Please feel free to use a different markup language if you do not plan to run
|
28
|
+
<tt>rake doc:app</tt>.
|
data/spec/dummy/Rakefile
ADDED
File without changes
|
@@ -0,0 +1,13 @@
|
|
1
|
+
// This is a manifest file that'll be compiled into application.js, which will include all the files
|
2
|
+
// listed below.
|
3
|
+
//
|
4
|
+
// Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
|
5
|
+
// or vendor/assets/javascripts of plugins, if any, can be referenced here using a relative path.
|
6
|
+
//
|
7
|
+
// It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
|
8
|
+
// compiled file.
|
9
|
+
//
|
10
|
+
// Read Sprockets README (https://github.com/sstephenson/sprockets#sprockets-directives) for details
|
11
|
+
// about supported directives.
|
12
|
+
//
|
13
|
+
//= require_tree .
|
@@ -0,0 +1,13 @@
|
|
1
|
+
/*
|
2
|
+
* This is a manifest file that'll be compiled into application.css, which will include all the files
|
3
|
+
* listed below.
|
4
|
+
*
|
5
|
+
* Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
|
6
|
+
* or vendor/assets/stylesheets of plugins, if any, can be referenced here using a relative path.
|
7
|
+
*
|
8
|
+
* You're free to add application-wide styles to this file and they'll appear at the top of the
|
9
|
+
* compiled file, but it's generally better to create a new file per style scope.
|
10
|
+
*
|
11
|
+
*= require_self
|
12
|
+
*= require_tree .
|
13
|
+
*/
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,14 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>Dummy</title>
|
5
|
+
<%= stylesheet_link_tag "application", media: "all", "data-turbolinks-track" => true %>
|
6
|
+
<%= javascript_include_tag "application", "data-turbolinks-track" => true %>
|
7
|
+
<%= csrf_meta_tags %>
|
8
|
+
</head>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<%= yield %>
|
12
|
+
|
13
|
+
</body>
|
14
|
+
</html>
|
data/spec/dummy/bin/rake
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.expand_path('../boot', __FILE__)
|
2
|
+
|
3
|
+
require 'rails/all'
|
4
|
+
|
5
|
+
Bundler.require(*Rails.groups)
|
6
|
+
require "pageflow/chart"
|
7
|
+
|
8
|
+
module Dummy
|
9
|
+
class Application < Rails::Application
|
10
|
+
# Settings in config/environments/* take precedence over those specified here.
|
11
|
+
# Application configuration should go into files in config/initializers
|
12
|
+
# -- all .rb files in that directory are automatically loaded.
|
13
|
+
|
14
|
+
# Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
|
15
|
+
# Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
|
16
|
+
# config.time_zone = 'Central Time (US & Canada)'
|
17
|
+
|
18
|
+
# The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
|
19
|
+
# config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
|
20
|
+
# config.i18n.default_locale = :de
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# SQLite version 3.x
|
2
|
+
# gem install sqlite3
|
3
|
+
#
|
4
|
+
# Ensure the SQLite 3 gem is defined in your Gemfile
|
5
|
+
# gem 'sqlite3'
|
6
|
+
development:
|
7
|
+
adapter: sqlite3
|
8
|
+
database: db/development.sqlite3
|
9
|
+
pool: 5
|
10
|
+
timeout: 5000
|
11
|
+
|
12
|
+
# Warning: The database defined as "test" will be erased and
|
13
|
+
# re-generated from your development database when you run "rake".
|
14
|
+
# Do not set this db to the same as development or production.
|
15
|
+
test:
|
16
|
+
adapter: sqlite3
|
17
|
+
database: db/test.sqlite3
|
18
|
+
pool: 5
|
19
|
+
timeout: 5000
|
20
|
+
|
21
|
+
production:
|
22
|
+
adapter: sqlite3
|
23
|
+
database: db/production.sqlite3
|
24
|
+
pool: 5
|
25
|
+
timeout: 5000
|
@@ -0,0 +1,29 @@
|
|
1
|
+
Dummy::Application.configure do
|
2
|
+
# Settings specified here will take precedence over those in config/application.rb.
|
3
|
+
|
4
|
+
# In the development environment your application's code is reloaded on
|
5
|
+
# every request. This slows down response time but is perfect for development
|
6
|
+
# since you don't have to restart the web server when you make code changes.
|
7
|
+
config.cache_classes = false
|
8
|
+
|
9
|
+
# Do not eager load code on boot.
|
10
|
+
config.eager_load = false
|
11
|
+
|
12
|
+
# Show full error reports and disable caching.
|
13
|
+
config.consider_all_requests_local = true
|
14
|
+
config.action_controller.perform_caching = false
|
15
|
+
|
16
|
+
# Don't care if the mailer can't send.
|
17
|
+
# config.action_mailer.raise_delivery_errors = false
|
18
|
+
|
19
|
+
# Print deprecation notices to the Rails logger.
|
20
|
+
config.active_support.deprecation = :log
|
21
|
+
|
22
|
+
# Raise an error on page load if there are pending migrations
|
23
|
+
config.active_record.migration_error = :page_load
|
24
|
+
|
25
|
+
# Debug mode disables concatenation and preprocessing of assets.
|
26
|
+
# This option may cause significant delays in view rendering with a large
|
27
|
+
# number of complex assets.
|
28
|
+
config.assets.debug = true
|
29
|
+
end
|