pageflow-chart 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +21 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +10 -0
- data/README.md +91 -0
- data/Rakefile +20 -0
- data/app/assets/images/pageflow/chart/fs_close_sprite.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram_small.png +0 -0
- data/app/assets/images/pageflow/chart_sprite.png +0 -0
- data/app/assets/images/pageflow/ov-chart.png +0 -0
- data/app/assets/javascripts/pageflow/chart.js +5 -0
- data/app/assets/javascripts/pageflow/chart/asset_urls.js.erb +3 -0
- data/app/assets/javascripts/pageflow/chart/editor.js +9 -0
- data/app/assets/javascripts/pageflow/chart/editor/collections/scraped_sites_collection.js +23 -0
- data/app/assets/javascripts/pageflow/chart/editor/initializers/setup_collections.js +1 -0
- data/app/assets/javascripts/pageflow/chart/editor/models/scraped_site.js +55 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/scraped_site_status.jst.ejs +2 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/url_input.jst.ejs +7 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/configuration_editor.js +26 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/embedded/iframe_embedded_view.js +47 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/inputs/scraped_url_input_view.js +49 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/scraped_site_status_view.js +18 -0
- data/app/assets/javascripts/pageflow/chart/page_type.js +152 -0
- data/app/assets/stylesheets/pageflow/chart.css.scss +130 -0
- data/app/assets/stylesheets/pageflow/chart/custom.css.scss +209 -0
- data/app/assets/stylesheets/pageflow/chart/editor.css.scss +17 -0
- data/app/assets/stylesheets/pageflow/chart/themes/default.css.scss +10 -0
- data/app/controllers/pageflow/chart/application_controller.rb +6 -0
- data/app/controllers/pageflow/chart/scraped_sites_controller.rb +25 -0
- data/app/helpers/pageflow/chart/scraped_sites_helper.rb +13 -0
- data/app/jobs/pageflow/chart/scrape_site_job.rb +59 -0
- data/app/models/pageflow/chart/scraped_site.rb +51 -0
- data/app/views/pageflow/chart/page.html +41 -0
- data/app/views/pageflow/chart/page_type.json.jbuilder +2 -0
- data/bin/rails +8 -0
- data/chart.gemspec +30 -0
- data/config/locales/de.yml +40 -0
- data/config/locales/en.yml +22 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20140417112724_create_pageflow_chart_scraped_sites.rb +14 -0
- data/lib/pageflow/chart.rb +21 -0
- data/lib/pageflow/chart/configuration.rb +63 -0
- data/lib/pageflow/chart/downloader.rb +53 -0
- data/lib/pageflow/chart/engine.rb +17 -0
- data/lib/pageflow/chart/page_type.rb +15 -0
- data/lib/pageflow/chart/scraper.rb +107 -0
- data/spec/controllers/pageflow/chart/scraped_sites_controller_spec.rb +35 -0
- data/spec/dummy/README.rdoc +28 -0
- data/spec/dummy/Rakefile +6 -0
- data/spec/dummy/app/assets/images/.keep +0 -0
- data/spec/dummy/app/assets/javascripts/application.js +13 -0
- data/spec/dummy/app/assets/stylesheets/application.css +13 -0
- data/spec/dummy/app/controllers/application_controller.rb +5 -0
- data/spec/dummy/app/controllers/concerns/.keep +0 -0
- data/spec/dummy/app/helpers/application_helper.rb +2 -0
- data/spec/dummy/app/mailers/.keep +0 -0
- data/spec/dummy/app/models/.keep +0 -0
- data/spec/dummy/app/models/concerns/.keep +0 -0
- data/spec/dummy/app/views/layouts/application.html.erb +14 -0
- data/spec/dummy/bin/bundle +3 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/bin/rake +4 -0
- data/spec/dummy/config.ru +4 -0
- data/spec/dummy/config/application.rb +22 -0
- data/spec/dummy/config/boot.rb +5 -0
- data/spec/dummy/config/database.yml +25 -0
- data/spec/dummy/config/environment.rb +5 -0
- data/spec/dummy/config/environments/development.rb +29 -0
- data/spec/dummy/config/environments/production.rb +80 -0
- data/spec/dummy/config/environments/test.rb +36 -0
- data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/spec/dummy/config/initializers/inflections.rb +16 -0
- data/spec/dummy/config/initializers/mime_types.rb +5 -0
- data/spec/dummy/config/initializers/secret_token.rb +12 -0
- data/spec/dummy/config/initializers/session_store.rb +3 -0
- data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/spec/dummy/config/locales/en.yml +23 -0
- data/spec/dummy/config/routes.rb +4 -0
- data/spec/dummy/db/schema.rb +39 -0
- data/spec/dummy/lib/assets/.keep +0 -0
- data/spec/dummy/public/404.html +58 -0
- data/spec/dummy/public/422.html +58 -0
- data/spec/dummy/public/500.html +57 -0
- data/spec/dummy/public/favicon.ico +0 -0
- data/spec/factories/scraped_sites.rb +5 -0
- data/spec/fixtures/datawrapper.html +121 -0
- data/spec/jobs/pageflow/chart/scrape_site_job_spec.rb +22 -0
- data/spec/models/pageflow/chart/scraped_site_spec.rb +19 -0
- data/spec/pageflow/chart/downloader_spec.rb +90 -0
- data/spec/pageflow/chart/scraper_spec.rb +179 -0
- data/spec/requests/scraping_site_spec.rb +23 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/factory_girl.rb +5 -0
- data/spec/support/html_fragment.rb +13 -0
- data/spec/support/paperclip.rb +11 -0
- data/spec/support/resque.rb +20 -0
- data/spec/support/webmock.rb +11 -0
- metadata +363 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
require 'uri'
|
|
2
|
+
require 'open-uri'
|
|
3
|
+
|
|
4
|
+
module Pageflow
|
|
5
|
+
module Chart
|
|
6
|
+
class Downloader
|
|
7
|
+
attr_reader :options
|
|
8
|
+
|
|
9
|
+
def initialize(options = {})
|
|
10
|
+
@options = options
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def load(url)
|
|
14
|
+
file = open(make_absolute(url))
|
|
15
|
+
|
|
16
|
+
begin
|
|
17
|
+
yield(file)
|
|
18
|
+
ensure
|
|
19
|
+
file.close
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def load_all(urls, options = {})
|
|
24
|
+
file = Tempfile.new(['concatenation', options.fetch(:extension, 'txt')])
|
|
25
|
+
file.binmode
|
|
26
|
+
|
|
27
|
+
begin
|
|
28
|
+
urls.map do |url|
|
|
29
|
+
load(url) do |source|
|
|
30
|
+
while data = source.read(16 * 1024)
|
|
31
|
+
file.write(data)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
file.write(options.fetch(:separator, "\n"))
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
file.rewind
|
|
39
|
+
yield(file)
|
|
40
|
+
ensure
|
|
41
|
+
file.close
|
|
42
|
+
file.unlink
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def make_absolute(url)
|
|
49
|
+
options[:base_url] ? URI.join(options[:base_url], url) : URI.parse(url)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Pageflow
|
|
2
|
+
module Chart
|
|
3
|
+
class Engine < Rails::Engine
|
|
4
|
+
isolate_namespace Pageflow::Chart
|
|
5
|
+
|
|
6
|
+
config.autoload_paths << File.join(config.root, 'lib')
|
|
7
|
+
config.assets.precompile += ['pageflow/chart/custom.css']
|
|
8
|
+
|
|
9
|
+
config.generators do |g|
|
|
10
|
+
g.test_framework :rspec,:fixture => false
|
|
11
|
+
g.fixture_replacement :factory_girl, :dir => 'spec/factories'
|
|
12
|
+
g.assets false
|
|
13
|
+
g.helper false
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
|
|
3
|
+
module Pageflow
|
|
4
|
+
module Chart
|
|
5
|
+
class Scraper
|
|
6
|
+
attr_reader :document, :options, :javascript_urls, :stylesheet_urls
|
|
7
|
+
|
|
8
|
+
def initialize(html, options = {})
|
|
9
|
+
@document = Nokogiri::HTML(html)
|
|
10
|
+
@options = options
|
|
11
|
+
|
|
12
|
+
parse
|
|
13
|
+
rewrite
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def html
|
|
17
|
+
document.to_s
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def csv_url
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def parse
|
|
26
|
+
parse_javascript_urls
|
|
27
|
+
parse_stylesheet_urls
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def parse_javascript_urls
|
|
31
|
+
@javascript_urls = filtered_script_tags_in_head.map do |tag|
|
|
32
|
+
tag[:src]
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def parse_stylesheet_urls
|
|
37
|
+
@stylesheet_urls = css_link_tags.map do |tag|
|
|
38
|
+
tag[:href]
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def rewrite
|
|
43
|
+
filter_inline_scripts
|
|
44
|
+
filter_by_selectors
|
|
45
|
+
combine_script_tags_in_head
|
|
46
|
+
combine_css_link_tags
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def filter_inline_scripts
|
|
50
|
+
document.css('body script').each do |tag|
|
|
51
|
+
if blacklisted_inline_script?(tag)
|
|
52
|
+
tag.remove
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def blacklisted_inline_script?(tag)
|
|
58
|
+
options.fetch(:inline_script_blacklist, []).any? do |r|
|
|
59
|
+
tag.content =~ r
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def filter_by_selectors
|
|
64
|
+
options.fetch(:selector_blacklist, []).each do |selector|
|
|
65
|
+
document.css(selector).each(&:remove)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def combine_script_tags_in_head
|
|
70
|
+
script_tags_in_head.each(&:remove)
|
|
71
|
+
|
|
72
|
+
all_script_tag = Nokogiri::XML::Node.new('script', document)
|
|
73
|
+
all_script_tag[:src] = 'all.js'
|
|
74
|
+
all_script_tag[:type] = 'text/javascript'
|
|
75
|
+
document.at_css('head') << all_script_tag
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def combine_css_link_tags
|
|
79
|
+
css_link_tags.each(&:remove)
|
|
80
|
+
|
|
81
|
+
all_css_link_tag = Nokogiri::XML::Node.new('link', document)
|
|
82
|
+
all_css_link_tag[:href] = 'all.css'
|
|
83
|
+
all_css_link_tag[:type] = 'text/css'
|
|
84
|
+
all_css_link_tag[:rel] = 'stylesheet'
|
|
85
|
+
document.at_css('head') << all_css_link_tag
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def filtered_script_tags_in_head
|
|
89
|
+
script_tags_in_head.reject do |tag|
|
|
90
|
+
options.fetch(:head_script_blacklist, []).any? do |regexp|
|
|
91
|
+
tag[:src] =~ regexp
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def script_tags_in_head
|
|
97
|
+
document.css('head script[src]')
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def css_link_tags
|
|
101
|
+
document.css('head link').find_all do |tag|
|
|
102
|
+
tag[:type] == 'text/css'
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
module Pageflow
|
|
4
|
+
module Chart
|
|
5
|
+
describe ScrapedSitesController do
|
|
6
|
+
describe '#create' do
|
|
7
|
+
routes { Pageflow::Chart::Engine.routes }
|
|
8
|
+
|
|
9
|
+
it 'responds with success' do
|
|
10
|
+
post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
|
|
11
|
+
|
|
12
|
+
expect(response.status).to eq(201)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'creates scraped site' do
|
|
16
|
+
expect {
|
|
17
|
+
post(:create, scraped_site: {url: "http://example.com/chart.html"}, format: 'json')
|
|
18
|
+
}.to change { ScrapedSite.count }
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
describe '#show' do
|
|
23
|
+
routes { Pageflow::Chart::Engine.routes }
|
|
24
|
+
|
|
25
|
+
it 'responds with success' do
|
|
26
|
+
scraped_site = create(:scraped_site, state: 'unprocessed')
|
|
27
|
+
|
|
28
|
+
get(:show, id: scraped_site.id, format: 'json')
|
|
29
|
+
|
|
30
|
+
expect(response.status).to eq(200)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
== README
|
|
2
|
+
|
|
3
|
+
This README would normally document whatever steps are necessary to get the
|
|
4
|
+
application up and running.
|
|
5
|
+
|
|
6
|
+
Things you may want to cover:
|
|
7
|
+
|
|
8
|
+
* Ruby version
|
|
9
|
+
|
|
10
|
+
* System dependencies
|
|
11
|
+
|
|
12
|
+
* Configuration
|
|
13
|
+
|
|
14
|
+
* Database creation
|
|
15
|
+
|
|
16
|
+
* Database initialization
|
|
17
|
+
|
|
18
|
+
* How to run the test suite
|
|
19
|
+
|
|
20
|
+
* Services (job queues, cache servers, search engines, etc.)
|
|
21
|
+
|
|
22
|
+
* Deployment instructions
|
|
23
|
+
|
|
24
|
+
* ...
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Please feel free to use a different markup language if you do not plan to run
|
|
28
|
+
<tt>rake doc:app</tt>.
|
data/spec/dummy/Rakefile
ADDED
|
File without changes
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
// This is a manifest file that'll be compiled into application.js, which will include all the files
|
|
2
|
+
// listed below.
|
|
3
|
+
//
|
|
4
|
+
// Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
|
|
5
|
+
// or vendor/assets/javascripts of plugins, if any, can be referenced here using a relative path.
|
|
6
|
+
//
|
|
7
|
+
// It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
|
|
8
|
+
// compiled file.
|
|
9
|
+
//
|
|
10
|
+
// Read Sprockets README (https://github.com/sstephenson/sprockets#sprockets-directives) for details
|
|
11
|
+
// about supported directives.
|
|
12
|
+
//
|
|
13
|
+
//= require_tree .
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* This is a manifest file that'll be compiled into application.css, which will include all the files
|
|
3
|
+
* listed below.
|
|
4
|
+
*
|
|
5
|
+
* Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
|
|
6
|
+
* or vendor/assets/stylesheets of plugins, if any, can be referenced here using a relative path.
|
|
7
|
+
*
|
|
8
|
+
* You're free to add application-wide styles to this file and they'll appear at the top of the
|
|
9
|
+
* compiled file, but it's generally better to create a new file per style scope.
|
|
10
|
+
*
|
|
11
|
+
*= require_self
|
|
12
|
+
*= require_tree .
|
|
13
|
+
*/
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html>
|
|
3
|
+
<head>
|
|
4
|
+
<title>Dummy</title>
|
|
5
|
+
<%= stylesheet_link_tag "application", media: "all", "data-turbolinks-track" => true %>
|
|
6
|
+
<%= javascript_include_tag "application", "data-turbolinks-track" => true %>
|
|
7
|
+
<%= csrf_meta_tags %>
|
|
8
|
+
</head>
|
|
9
|
+
<body>
|
|
10
|
+
|
|
11
|
+
<%= yield %>
|
|
12
|
+
|
|
13
|
+
</body>
|
|
14
|
+
</html>
|
data/spec/dummy/bin/rake
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
require File.expand_path('../boot', __FILE__)
|
|
2
|
+
|
|
3
|
+
require 'rails/all'
|
|
4
|
+
|
|
5
|
+
Bundler.require(*Rails.groups)
|
|
6
|
+
require "pageflow/chart"
|
|
7
|
+
|
|
8
|
+
module Dummy
|
|
9
|
+
class Application < Rails::Application
|
|
10
|
+
# Settings in config/environments/* take precedence over those specified here.
|
|
11
|
+
# Application configuration should go into files in config/initializers
|
|
12
|
+
# -- all .rb files in that directory are automatically loaded.
|
|
13
|
+
|
|
14
|
+
# Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
|
|
15
|
+
# Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
|
|
16
|
+
# config.time_zone = 'Central Time (US & Canada)'
|
|
17
|
+
|
|
18
|
+
# The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
|
|
19
|
+
# config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
|
|
20
|
+
# config.i18n.default_locale = :de
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SQLite version 3.x
|
|
2
|
+
# gem install sqlite3
|
|
3
|
+
#
|
|
4
|
+
# Ensure the SQLite 3 gem is defined in your Gemfile
|
|
5
|
+
# gem 'sqlite3'
|
|
6
|
+
development:
|
|
7
|
+
adapter: sqlite3
|
|
8
|
+
database: db/development.sqlite3
|
|
9
|
+
pool: 5
|
|
10
|
+
timeout: 5000
|
|
11
|
+
|
|
12
|
+
# Warning: The database defined as "test" will be erased and
|
|
13
|
+
# re-generated from your development database when you run "rake".
|
|
14
|
+
# Do not set this db to the same as development or production.
|
|
15
|
+
test:
|
|
16
|
+
adapter: sqlite3
|
|
17
|
+
database: db/test.sqlite3
|
|
18
|
+
pool: 5
|
|
19
|
+
timeout: 5000
|
|
20
|
+
|
|
21
|
+
production:
|
|
22
|
+
adapter: sqlite3
|
|
23
|
+
database: db/production.sqlite3
|
|
24
|
+
pool: 5
|
|
25
|
+
timeout: 5000
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Dummy::Application.configure do
|
|
2
|
+
# Settings specified here will take precedence over those in config/application.rb.
|
|
3
|
+
|
|
4
|
+
# In the development environment your application's code is reloaded on
|
|
5
|
+
# every request. This slows down response time but is perfect for development
|
|
6
|
+
# since you don't have to restart the web server when you make code changes.
|
|
7
|
+
config.cache_classes = false
|
|
8
|
+
|
|
9
|
+
# Do not eager load code on boot.
|
|
10
|
+
config.eager_load = false
|
|
11
|
+
|
|
12
|
+
# Show full error reports and disable caching.
|
|
13
|
+
config.consider_all_requests_local = true
|
|
14
|
+
config.action_controller.perform_caching = false
|
|
15
|
+
|
|
16
|
+
# Don't care if the mailer can't send.
|
|
17
|
+
# config.action_mailer.raise_delivery_errors = false
|
|
18
|
+
|
|
19
|
+
# Print deprecation notices to the Rails logger.
|
|
20
|
+
config.active_support.deprecation = :log
|
|
21
|
+
|
|
22
|
+
# Raise an error on page load if there are pending migrations
|
|
23
|
+
config.active_record.migration_error = :page_load
|
|
24
|
+
|
|
25
|
+
# Debug mode disables concatenation and preprocessing of assets.
|
|
26
|
+
# This option may cause significant delays in view rendering with a large
|
|
27
|
+
# number of complex assets.
|
|
28
|
+
config.assets.debug = true
|
|
29
|
+
end
|