pageflow-chart 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +21 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +10 -0
- data/README.md +91 -0
- data/Rakefile +20 -0
- data/app/assets/images/pageflow/chart/fs_close_sprite.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram.png +0 -0
- data/app/assets/images/pageflow/chart_pictogram_small.png +0 -0
- data/app/assets/images/pageflow/chart_sprite.png +0 -0
- data/app/assets/images/pageflow/ov-chart.png +0 -0
- data/app/assets/javascripts/pageflow/chart.js +5 -0
- data/app/assets/javascripts/pageflow/chart/asset_urls.js.erb +3 -0
- data/app/assets/javascripts/pageflow/chart/editor.js +9 -0
- data/app/assets/javascripts/pageflow/chart/editor/collections/scraped_sites_collection.js +23 -0
- data/app/assets/javascripts/pageflow/chart/editor/initializers/setup_collections.js +1 -0
- data/app/assets/javascripts/pageflow/chart/editor/models/scraped_site.js +55 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/scraped_site_status.jst.ejs +2 -0
- data/app/assets/javascripts/pageflow/chart/editor/templates/url_input.jst.ejs +7 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/configuration_editor.js +26 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/embedded/iframe_embedded_view.js +47 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/inputs/scraped_url_input_view.js +49 -0
- data/app/assets/javascripts/pageflow/chart/editor/views/scraped_site_status_view.js +18 -0
- data/app/assets/javascripts/pageflow/chart/page_type.js +152 -0
- data/app/assets/stylesheets/pageflow/chart.css.scss +130 -0
- data/app/assets/stylesheets/pageflow/chart/custom.css.scss +209 -0
- data/app/assets/stylesheets/pageflow/chart/editor.css.scss +17 -0
- data/app/assets/stylesheets/pageflow/chart/themes/default.css.scss +10 -0
- data/app/controllers/pageflow/chart/application_controller.rb +6 -0
- data/app/controllers/pageflow/chart/scraped_sites_controller.rb +25 -0
- data/app/helpers/pageflow/chart/scraped_sites_helper.rb +13 -0
- data/app/jobs/pageflow/chart/scrape_site_job.rb +59 -0
- data/app/models/pageflow/chart/scraped_site.rb +51 -0
- data/app/views/pageflow/chart/page.html +41 -0
- data/app/views/pageflow/chart/page_type.json.jbuilder +2 -0
- data/bin/rails +8 -0
- data/chart.gemspec +30 -0
- data/config/locales/de.yml +40 -0
- data/config/locales/en.yml +22 -0
- data/config/routes.rb +3 -0
- data/db/migrate/20140417112724_create_pageflow_chart_scraped_sites.rb +14 -0
- data/lib/pageflow/chart.rb +21 -0
- data/lib/pageflow/chart/configuration.rb +63 -0
- data/lib/pageflow/chart/downloader.rb +53 -0
- data/lib/pageflow/chart/engine.rb +17 -0
- data/lib/pageflow/chart/page_type.rb +15 -0
- data/lib/pageflow/chart/scraper.rb +107 -0
- data/spec/controllers/pageflow/chart/scraped_sites_controller_spec.rb +35 -0
- data/spec/dummy/README.rdoc +28 -0
- data/spec/dummy/Rakefile +6 -0
- data/spec/dummy/app/assets/images/.keep +0 -0
- data/spec/dummy/app/assets/javascripts/application.js +13 -0
- data/spec/dummy/app/assets/stylesheets/application.css +13 -0
- data/spec/dummy/app/controllers/application_controller.rb +5 -0
- data/spec/dummy/app/controllers/concerns/.keep +0 -0
- data/spec/dummy/app/helpers/application_helper.rb +2 -0
- data/spec/dummy/app/mailers/.keep +0 -0
- data/spec/dummy/app/models/.keep +0 -0
- data/spec/dummy/app/models/concerns/.keep +0 -0
- data/spec/dummy/app/views/layouts/application.html.erb +14 -0
- data/spec/dummy/bin/bundle +3 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/bin/rake +4 -0
- data/spec/dummy/config.ru +4 -0
- data/spec/dummy/config/application.rb +22 -0
- data/spec/dummy/config/boot.rb +5 -0
- data/spec/dummy/config/database.yml +25 -0
- data/spec/dummy/config/environment.rb +5 -0
- data/spec/dummy/config/environments/development.rb +29 -0
- data/spec/dummy/config/environments/production.rb +80 -0
- data/spec/dummy/config/environments/test.rb +36 -0
- data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/spec/dummy/config/initializers/inflections.rb +16 -0
- data/spec/dummy/config/initializers/mime_types.rb +5 -0
- data/spec/dummy/config/initializers/secret_token.rb +12 -0
- data/spec/dummy/config/initializers/session_store.rb +3 -0
- data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/spec/dummy/config/locales/en.yml +23 -0
- data/spec/dummy/config/routes.rb +4 -0
- data/spec/dummy/db/schema.rb +39 -0
- data/spec/dummy/lib/assets/.keep +0 -0
- data/spec/dummy/public/404.html +58 -0
- data/spec/dummy/public/422.html +58 -0
- data/spec/dummy/public/500.html +57 -0
- data/spec/dummy/public/favicon.ico +0 -0
- data/spec/factories/scraped_sites.rb +5 -0
- data/spec/fixtures/datawrapper.html +121 -0
- data/spec/jobs/pageflow/chart/scrape_site_job_spec.rb +22 -0
- data/spec/models/pageflow/chart/scraped_site_spec.rb +19 -0
- data/spec/pageflow/chart/downloader_spec.rb +90 -0
- data/spec/pageflow/chart/scraper_spec.rb +179 -0
- data/spec/requests/scraping_site_spec.rb +23 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/factory_girl.rb +5 -0
- data/spec/support/html_fragment.rb +13 -0
- data/spec/support/paperclip.rb +11 -0
- data/spec/support/resque.rb +20 -0
- data/spec/support/webmock.rb +11 -0
- metadata +363 -0
File without changes
|
@@ -0,0 +1,121 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta http-equiv="X-UA-Compatible" content="IE=Edge"/>
|
5
|
+
<title>[ Insert title here ]</title>
|
6
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
7
|
+
<meta name="viewport" content="width=device-width, initial-scale=0.95, maximum-scale=0.95, user-scalable=0" />
|
8
|
+
<!--[if lt IE 7]>
|
9
|
+
<meta http-equiv="refresh" content="0;URL='static.html'">
|
10
|
+
<![endif]-->
|
11
|
+
<!--[if lt IE 9]>
|
12
|
+
<script src="https://datawrapper.de//static/vendor/json-js/json2.min.js"></script>
|
13
|
+
<script type="text/javascript">
|
14
|
+
window.__ltie9 = true;
|
15
|
+
</script>
|
16
|
+
<![endif]-->
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
<link rel="stylesheet" type="text/css" href="aLrJt.all.css"></link>
|
21
|
+
|
22
|
+
<script type="text/javascript">
|
23
|
+
window.__locale = 'en';
|
24
|
+
</script>
|
25
|
+
|
26
|
+
<script type="text/javascript" src="//assets-datawrapper.s3.amazonaws.com/globalize.min.js"></script>
|
27
|
+
<script type="text/javascript" src="//cdnjs.cloudflare.com/ajax/libs/underscore.js/1.5.2/underscore-min.js"></script>
|
28
|
+
<script type="text/javascript" src="//cdnjs.cloudflare.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script>
|
29
|
+
<script type="text/javascript" src="//assets-datawrapper.s3.amazonaws.com/vendor/d3-light/3.1.7/d3-light.min.js"></script>
|
30
|
+
<script type="text/javascript" src="//assets-datawrapper.s3.amazonaws.com/vendor/chroma-js/0.5.4/chroma.min.js"></script>
|
31
|
+
<script type="text/javascript" src="//assets-datawrapper.s3.amazonaws.com/vendor/raphael-js/2.1.2/raphael-min.js"></script>
|
32
|
+
<script type="text/javascript" src="/lib/vis/line-chart-5fb380e5bc3650f9e9661eb780af2d52.min.js"></script>
|
33
|
+
<script type="text/javascript" src="/lib/theme/default-5baee189e1a2f0bf680f4910cc958098.min.js"></script>
|
34
|
+
<script type="text/javascript" src="/lib/chart-206e22793fae102dcb0bb68278f61581.min.js"></script>
|
35
|
+
|
36
|
+
</head>
|
37
|
+
<body class="chart">
|
38
|
+
|
39
|
+
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
<div id="header">
|
45
|
+
|
46
|
+
<h1 >
|
47
|
+
<span class="chart-title">[ Insert title here ]</span>
|
48
|
+
</h1>
|
49
|
+
|
50
|
+
<p class="chart-intro hidden"></p>
|
51
|
+
|
52
|
+
<a href="http://cf.datawrapper.de/aLrJt/1/?fs=1" target="_blank" class="fs-btn top-right" title="Fullscreen" data-toggle="fullscreen"></a>
|
53
|
+
|
54
|
+
|
55
|
+
</div>
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
<div id="chart"></div>
|
60
|
+
|
61
|
+
<div id="footer">
|
62
|
+
<div class="footer-left">
|
63
|
+
Created with <a href="https://datawrapper.de/" target="_blank" class="lgo">Datawrapper</a>
|
64
|
+
</div>
|
65
|
+
<div class="footer-right">
|
66
|
+
<div class="toggleable-panel">
|
67
|
+
|
68
|
+
<span data-src="Source: " class="source-block">Source: <a target="_blank" class="source" href="http://www.census.gov/foreign-trade/balance/c4120.html">US Census Bureau</a></span>
|
69
|
+
|
70
|
+
<a href="data">Get the data</a>
|
71
|
+
</div>
|
72
|
+
<a class="toggle-panel">About</a>
|
73
|
+
</div>
|
74
|
+
<div class="toggleable-panel-overlay"></div>
|
75
|
+
</div>
|
76
|
+
|
77
|
+
<div class="noscript" style="position: absolute;top: 0;left: 0;z-index: -10;">
|
78
|
+
<a href="static.html"><img src="nojs.png" alt="Please activate JavaScript to see the interactive chart." /></a>
|
79
|
+
</div>
|
80
|
+
<div class="noscript" style="height:95%;position:absolute;top:0;width:98%;z-index:10;overflow:hidden">
|
81
|
+
<div style="position:relative;top:50%;text-align:center;font-size:18px;padding:0 20px"><a style="color: rgba(0,0,0,.2); text-shadow: 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff, 0 0 7px #fff; text-decoration: none;" href="static.html" >Please activate JavaScript to see the interactive chart.</a></div>
|
82
|
+
</div>
|
83
|
+
<script type="text/javascript">
|
84
|
+
|
85
|
+
if (document.getElementsByTagName('script').length > 0) $('body').addClass('js');
|
86
|
+
|
87
|
+
__dw.init({
|
88
|
+
chartJSON: JSON.parse('{\"id\":\"aLrJt\",\"title\":\"[ Insert title here ]\",\"theme\":\"default\",\"createdAt\":\"2014-04-15 19:14:01\",\"lastModifiedAt\":\"2014-04-15 19:15:26\",\"type\":\"line-chart\",\"metadata\":{\"data\":{\"transpose\":false,\"vertical-header\":true,\"horizontal-header\":true},\"visualize\":{\"highlighted-series\":[],\"highlighted-values\":[],\"direct-labeling\":false,\"legend-position\":\"right\",\"show-grid\":false,\"fill-between\":false,\"line-mode\":\"straight\",\"invert-y-axis\":false,\"scale-y1\":\"linear\",\"user-change-scale\":false,\"sort-values\":false},\"describe\":{\"source-name\":\"US Census Bureau\",\"source-url\":\"http:\\/\\/www.census.gov\\/foreign-trade\\/balance\\/c4120.html\",\"number-format\":\"n1\",\"number-divisor\":\"3\",\"number-append\":\" Billion USD\",\"number-prepend\":\"\",\"intro\":\"\"},\"publish\":{\"embed-width\":600,\"embed-height\":400,\"background\":\"#ffffff\",\"text\":\"#000000\"}},\"authorId\":9505,\"showInGallery\":false,\"language\":\"en\",\"guestSession\":null,\"lastEditStep\":3,\"publishedAt\":null,\"publicUrl\":\"http:\\/\\/cf.datawrapper.de\\/aLrJt\\/1\\/\",\"publicVersion\":1,\"organizationId\":null,\"forkedFrom\":null,\"author\":{\"id\":9505,\"email\":\"\",\"name\":\"tf\",\"website\":\"stderr.timfischbach.de\",\"socialmedia\":\"https:\\/\\/github.com\\/tf\"}}'),
|
89
|
+
chartLocale: 'en',
|
90
|
+
themeId: 'default',
|
91
|
+
visId: 'line-chart',
|
92
|
+
visJSON: JSON.parse('\x7B\x22title\x22\x3A\x22Line\x20Chart\x22,\x22id\x22\x3A\x22line\x2Dchart\x22,\x22extends\x22\x3A\x22raphael\x2Dchart\x22,\x22dimensions\x22\x3A2,\x22order\x22\x3A40,\x22axes\x22\x3A\x7B\x22x\x22\x3A\x7B\x22accepts\x22\x3A\x5B\x22text\x22,\x22date\x22\x5D\x7D,\x22y1\x22\x3A\x7B\x22accepts\x22\x3A\x5B\x22number\x22\x5D,\x22multiple\x22\x3Atrue\x7D,\x22y2\x22\x3A\x7B\x22accepts\x22\x3A\x5B\x22number\x22\x5D,\x22multiple\x22\x3Atrue,\x22optional\x22\x3Atrue\x7D\x7D,\x22options\x22\x3A\x7B\x22base\x2Dcolor\x22\x3A\x7B\x22type\x22\x3A\x22base\x2Dcolor\x22,\x22label\x22\x3A\x22Base\x20color\x22\x7D,\x22sep\x2Dlabeling\x22\x3A\x7B\x22type\x22\x3A\x22separator\x22,\x22label\x22\x3A\x22Customize\x20labeling\x22,\x22depends\x2Don\x22\x3A\x7B\x22chart.min_columns\x5By1\x5D\x22\x3A2\x7D\x7D,\x22direct\x2Dlabeling\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Direct\x20labeling\x22,\x22default\x22\x3Afalse,\x22depends\x2Don\x22\x3A\x7B\x22chart.min_columns\x5By1\x5D\x22\x3A2,\x22chart.max_columns\x5By2\x5D\x22\x3A0\x7D,\x22help\x22\x3A\x22Show\x20the\x20labels\x20right\x20nearby\x20the\x20line\x20ends\x20instead\x20of\x20a\x20separate\x20legend\x22\x7D,\x22legend\x2Dposition\x22\x3A\x7B\x22type\x22\x3A\x22radio\x2Dleft\x22,\x22label\x22\x3A\x22Legend\x20position\x22,\x22default\x22\x3A\x22right\x22,\x22depends\x2Don\x22\x3A\x7B\x22direct\x2Dlabeling\x22\x3Afalse,\x22chart.min_columns\x5By1\x5D\x22\x3A2\x7D,\x22options\x22\x3A\x5B\x7B\x22value\x22\x3A\x22right\x22,\x22label\x22\x3A\x22right\x22\x7D,\x7B\x22value\x22\x3A\x22top\x22,\x22label\x22\x3A\x22top\x22\x7D,\x7B\x22value\x22\x3A\x22inside\x22,\x22label\x22\x3A\x22inside\x20left\x22\x7D,\x7B\x22value\x22\x3A\x22inside\x2Dright\x22,\x22label\x22\x3A\x22inside\x20right\x22\x7D\x5D\x7D,\x22sep\x2Dlines\x22\x3A\x7B\x22type\x22\x3A\x22separator\x22,\x22label\x22\x3A\x22Customize\x20lines\x22\x7D,\x22force\x2Dbanking\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22hidden\x22\x3Atrue,\x22label\x22\x3A\x22Bank\x20the\x20lines\x20to\x2045\x20degrees\x22\x7D,\x22show\x2Dgrid\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22hidden\x22\x3Atrue,\x22label\x22\x3A\x22Show\x20grid\x22,\x22default\x22\x3Afalse\x7D,\x22connect\x2Dmissing\x2Dvalues\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Connect\x20lines\x20between\x20missing\x20values\x22\x7D,\x22fill\x2Dbetween\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Fill\x20area\x20between\x20lines\x22,\x22default\x22\x3Afalse,\x22depends\x2Don\x22\x3A\x7B\x22chart.min_columns\x5By1\x5D\x22\x3A2,\x22chart.max_columns\x5By1\x5D\x22\x3A2,\x22chart.max_columns\x5By2\x5D\x22\x3A0\x7D\x7D,\x22fill\x2Dbelow\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Fill\x20area\x20below\x20line\x22,\x22defaut\x22\x3Afalse,\x22depends\x2Don\x22\x3A\x7B\x22chart.max_columns\x5By1\x5D\x22\x3A1,\x22chart.max_columns\x5By2\x5D\x22\x3A0\x7D\x7D,\x22line\x2Dmode\x22\x3A\x7B\x22type\x22\x3A\x22radio\x2Dleft\x22,\x22label\x22\x3A\x22Line\x20interpolation\x22,\x22options\x22\x3A\x5B\x7B\x22label\x22\x3A\x22Straight\x22,\x22value\x22\x3A\x22straight\x22\x7D,\x7B\x22label\x22\x3A\x22Curved\x22,\x22value\x22\x3A\x22curved\x22\x7D,\x7B\x22label\x22\x3A\x22Stepped\x22,\x22value\x22\x3A\x22stepped\x22\x7D\x5D,\x22default\x22\x3A\x22straight\x22\x7D,\x22sep\x2Dy\x2Daxis\x22\x3A\x7B\x22type\x22\x3A\x22separator\x22,\x22label\x22\x3A\x22Customize\x20y\x2DAxis\x22\x7D,\x22baseline\x2Dzero\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Extend\x20to\x20zero\x22\x7D,\x22extend\x2Drange\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Extend\x20to\x20nice\x20ticks\x22,\x22help\x22\x3A\x22Extend\x20the\x20y\x2Daxis\x20range\x20to\x20nice,\x20rounded\x20values\x20instead\x20of\x20the\x20default\x20range\x20from\x20the\x20minimum\x20to\x20maximum\x20value.\x22\x7D,\x22invert\x2Dy\x2Daxis\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Invert\x20direction\x22,\x22default\x22\x3Afalse\x7D,\x22scale\x2Dy1\x22\x3A\x7B\x22type\x22\x3A\x22radio\x2Dleft\x22,\x22label\x22\x3A\x22Scale\x20\x28y\x2Daxis\x29\x22,\x22options\x22\x3A\x5B\x7B\x22label\x22\x3A\x22linear\x22,\x22value\x22\x3A\x22linear\x22\x7D,\x7B\x22label\x22\x3A\x22logarithmic\x22,\x22value\x22\x3A\x22log\x22\x7D\x5D,\x22default\x22\x3A\x22linear\x22,\x22depends\x2Don\x22\x3A\x7B\x22chart.min_value\x5By1\x5D\x22\x3A\x22\x3E0\x22,\x22chart.magnitude_range\x5By1\x5D\x22\x3A\x22\x3E3\x22\x7D\x7D,\x22user\x2Dchange\x2Dscale\x22\x3A\x7B\x22type\x22\x3A\x22checkbox\x22,\x22label\x22\x3A\x22Let\x20user\x20change\x20scale\x22,\x22default\x22\x3Afalse,\x22depends\x2Don\x22\x3A\x7B\x22chart.min_value\x5By1\x5D\x22\x3A\x22\x3E0\x22,\x22chart.magnitude_range\x5By1\x5D\x22\x3A\x22\x3E3\x22\x7D\x7D\x7D,\x22locale\x22\x3A\x7B\x22tooManyLinesToLabel\x22\x3A\x22Your\x20chart\x20contains\x20\x3Cb\x3Emore\x20lines\x20than\x20we\x20can\x20label\x3C\x5C\x2Fb\x3E,\x20so\x20automatic\x20labeling\x20is\x20turned\x20off.\x20To\x20fix\x20this\x20\x3Cul\x3E\x3Cli\x3Efilter\x20some\x20columns\x20in\x20the\x20data\x20table\x20in\x20the\x20previous\x20step,\x20or\x3C\x5C\x2Fli\x3E\x3Cli\x3Euse\x20direct\x20labeling\x20and\x20the\x20highlight\x20feature\x20to\x20label\x20the\x20lines\x20that\x20are\x20important\x20to\x20your\x20story.\x3C\x5C\x2Fli\x3E\x3C\x5C\x2Ful\x3E\x22,\x22useLogarithmicScale\x22\x3A\x22Use\x20logarithmic\x20scale\x22,\x22couldNotParseAllDates\x22\x3A\x22Some\x20of\x20the\x20\x3Cb\x3Edates\x20in\x20your\x20x\x2Daxis\x20could\x20not\x20be\x20parsed\x3C\x5C\x2Fb\x3E,\x20hence\x20the\x20line\x20chart\x20cannot\x20display\x20a\x20proper\x20date\x20axis.\x20To\x20fix\x20this\x3Cul\x3E\x3Cli\x3Ereturn\x20to\x20the\x20previous\x20step\x20and\x20clean\x20your\x20date\x20column.\x3C\x5C\x2Fli\x3E\x3Cli\x3E\x3Ca\x20href\x3D\x27http\x3A\x5C\x2F\x5C\x2Fblog.datawrapper.de\x5C\x2F2013\x5C\x2Fcleaning\x2Dyour\x2Ddata\x2Din\x2Ddatawrapper\x5C\x2F\x27\x3ERead\x20more\x20about\x20how\x20to\x20do\x20this.\x3C\x5C\x2Fa\x3E\x3C\x5C\x2Fli\x3E\x3C\x5C\x2Ful\x3E\x22\x7D,\x22annotations\x22\x3A\x5B\x7B\x22type\x22\x3A\x22axis\x2Drange\x22,\x22axis\x22\x3A\x22x\x22\x7D,\x7B\x22type\x22\x3A\x22axis\x2Dpoint\x22,\x22axis\x22\x3A\x22x\x22\x7D,\x7B\x22type\x22\x3A\x22axis\x2Drange\x22,\x22axis\x22\x3A\x22y\x22\x7D,\x7B\x22type\x22\x3A\x22axis\x2Dpoint\x22,\x22axis\x22\x3A\x22y\x22\x7D,\x7B\x22type\x22\x3A\x22data\x2Dpoint\x22\x7D\x5D,\x22__static_path\x22\x3A\x22assets\x5C\x2Fplugins\x5C\x2Fvisualization\x2Dline\x2Dchart\x5C\x2F\x22,\x22version\x22\x3A\x221.5.0\x22,\x22icon\x22\x3A\x22\x3C\x3Fxml\x20version\x3D\x5C\x221.0\x5C\x22\x20encoding\x3D\x5C\x22utf\x2D8\x5C\x22\x3F\x3E\x5Cn\x3C\x21\x2D\x2D\x20Generator\x3A\x20Adobe\x20Illustrator\x2016.2.1,\x20SVG\x20Export\x20Plug\x2DIn\x20.\x20SVG\x20Version\x3A\x206.00\x20Build\x200\x29\x20\x20\x2D\x2D\x3E\x5Cn\x3C\x21DOCTYPE\x20svg\x20PUBLIC\x20\x5C\x22\x2D\x5C\x2F\x5C\x2FW3C\x5C\x2F\x5C\x2FDTD\x20SVG\x201.1\x20Basic\x5C\x2F\x5C\x2FEN\x5C\x22\x20\x5C\x22http\x3A\x5C\x2F\x5C\x2Fwww.w3.org\x5C\x2FGraphics\x5C\x2FSVG\x5C\x2F1.1\x5C\x2FDTD\x5C\x2Fsvg11\x2Dbasic.dtd\x5C\x22\x3E\x5Cn\x3Csvg\x20version\x3D\x5C\x221.1\x5C\x22\x20baseProfile\x3D\x5C\x22basic\x5C\x22\x20id\x3D\x5C\x22Ebene_1\x5C\x22\x5Cn\x5Ct\x20xmlns\x3D\x5C\x22http\x3A\x5C\x2F\x5C\x2Fwww.w3.org\x5C\x2F2000\x5C\x2Fsvg\x5C\x22\x20xmlns\x3Axlink\x3D\x5C\x22http\x3A\x5C\x2F\x5C\x2Fwww.w3.org\x5C\x2F1999\x5C\x2Fxlink\x5C\x22\x20x\x3D\x5C\x220px\x5C\x22\x20y\x3D\x5C\x220px\x5C\x22\x20width\x3D\x5C\x22100px\x5C\x22\x20height\x3D\x5C\x22100px\x5C\x22\x5Cn\x5Ct\x20viewBox\x3D\x5C\x220\x200\x20100\x20100\x5C\x22\x20xml\x3Aspace\x3D\x5C\x22preserve\x5C\x22\x3E\x5Cn\x3Cg\x3E\x5Cn\x5Ct\x3Cline\x20fill\x3D\x5C\x22none\x5C\x22\x20stroke\x3D\x5C\x22\x23000000\x5C\x22\x20stroke\x2Dwidth\x3D\x5C\x220.5\x5C\x22\x20stroke\x2Dmiterlimit\x3D\x5C\x2210\x5C\x22\x20x1\x3D\x5C\x229\x5C\x22\x20y1\x3D\x5C\x2279.5\x5C\x22\x20x2\x3D\x5C\x2291\x5C\x22\x20y2\x3D\x5C\x2279.5\x5C\x22\x5C\x2F\x3E\x5Cn\x5Ct\x3Cline\x20fill\x3D\x5C\x22none\x5C\x22\x20stroke\x3D\x5C\x22\x23000000\x5C\x22\x20stroke\x2Dwidth\x3D\x5C\x220.5\x5C\x22\x20stroke\x2Dmiterlimit\x3D\x5C\x2210\x5C\x22\x20x1\x3D\x5C\x229\x5C\x22\x20y1\x3D\x5C\x2259.5\x5C\x22\x20x2\x3D\x5C\x2291\x5C\x22\x20y2\x3D\x5C\x2259.5\x5C\x22\x5C\x2F\x3E\x5Cn\x5Ct\x3Cline\x20fill\x3D\x5C\x22none\x5C\x22\x20stroke\x3D\x5C\x22\x23000000\x5C\x22\x20stroke\x2Dwidth\x3D\x5C\x220.5\x5C\x22\x20stroke\x2Dmiterlimit\x3D\x5C\x2210\x5C\x22\x20x1\x3D\x5C\x229\x5C\x22\x20y1\x3D\x5C\x2239.5\x5C\x22\x20x2\x3D\x5C\x2291\x5C\x22\x20y2\x3D\x5C\x2239.5\x5C\x22\x5C\x2F\x3E\x5Cn\x5Ct\x3Cline\x20fill\x3D\x5C\x22none\x5C\x22\x20stroke\x3D\x5C\x22\x23000000\x5C\x22\x20stroke\x2Dwidth\x3D\x5C\x220.5\x5C\x22\x20stroke\x2Dmiterlimit\x3D\x5C\x2210\x5C\x22\x20x1\x3D\x5C\x229\x5C\x22\x20y1\x3D\x5C\x2219.5\x5C\x22\x20x2\x3D\x5C\x2291\x5C\x22\x20y2\x3D\x5C\x2219.5\x5C\x22\x5C\x2F\x3E\x5Cn\x3C\x5C\x2Fg\x3E\x5Cn\x3Cline\x20fill\x3D\x5C\x22none\x5C\x22\x20stroke\x3D\x5C\x22\x23000000\x5C\x22\x20stroke\x2Dwidth\x3D\x5C\x224\x5C\x22\x20stroke\x2Dlinecap\x3D\x5C\x22round\x5C\x22\x20stroke\x2Dmiterlimit\x3D\x5C\x2210\x5C\x22\x20x1\x3D\x5C\x2210\x5C\x22\x20y1\x3D\x5C\x2275.814\x5C\x22\x20x2\x3D\x5C\x2235.937\x5C\x22\x20y2\x3D\x5C\x2242.674\x5C\x22\x5C\x2F\x3E\x5Cn\x3Cline\x20fill\x3D\x5C\x22none\x5C\x22\x20stroke\x3D\x5C\x22\x23000000\x5C\x22\x20stroke\x2Dwidth\x3D\x5C\x224\x5C\x22\x20stroke\x2Dlinecap\x3D\x5C\x22round\x5C\x22\x20stroke\x2Dmiterlimit\x3D\x5C\x2210\x5C\x22\x20x1\x3D\x5C\x2235.937\x5C\x22\x20y1\x3D\x5C\x2242.674\x5C\x22\x20x2\x3D\x5C\x2262.719\x5C\x22\x20y2\x3D\x5C\x2255.584\x5C\x22\x5C\x2F\x3E\x5Cn\x3Cline\x20fill\x3D\x5C\x22none\x5C\x22\x20stroke\x3D\x5C\x22\x23000000\x5C\x22\x20stroke\x2Dwidth\x3D\x5C\x224\x5C\x22\x20stroke\x2Dlinecap\x3D\x5C\x22round\x5C\x22\x20stroke\x2Dmiterlimit\x3D\x5C\x2210\x5C\x22\x20x1\x3D\x5C\x2262.719\x5C\x22\x20y1\x3D\x5C\x2255.584\x5C\x22\x20x2\x3D\x5C\x2290.562\x5C\x22\x20y2\x3D\x5C\x2223.755\x5C\x22\x5C\x2F\x3E\x5Cn\x3C\x5C\x2Fsvg\x3E\x5Cn\x22,\x22hasCSS\x22\x3Atrue\x7D'),
|
93
|
+
metricPrefix: JSON.parse('\x7B\x223\x22\x3A\x22k\x22,\x226\x22\x3A\x22m\x22,\x229\x22\x3A\x22b\x22,\x2212\x22\x3A\x22t\x22\x7D'),
|
94
|
+
lang: 'en'
|
95
|
+
});
|
96
|
+
|
97
|
+
</script>
|
98
|
+
|
99
|
+
<!-- Piwik -->
|
100
|
+
<script type="text/javascript">
|
101
|
+
var _paq = _paq || [];
|
102
|
+
_paq.push(["setDocumentTitle", document.domain + "/" + document.title]);
|
103
|
+
_paq.push(["setCookieDomain", "*.www.datawrapper.de"]);
|
104
|
+
_paq.push(["setCustomVariable", 1, "Layout", "default", "page"]);
|
105
|
+
_paq.push(["setCustomVariable", 2, "Author", "9505", "page"]);
|
106
|
+
_paq.push(["setCustomVariable", 3, "Visualization", "line-chart", "page"]);
|
107
|
+
|
108
|
+
_paq.push(["trackPageView"]);
|
109
|
+
_paq.push(["enableLinkTracking"]);
|
110
|
+
|
111
|
+
(function() {
|
112
|
+
var u=(("https:" == document.location.protocol) ? "https" : "http") + "://piwik.datawrapper.de/";
|
113
|
+
_paq.push(["setTrackerUrl", u+"piwik.php"]);
|
114
|
+
_paq.push(["setSiteId", "1"]);
|
115
|
+
var d=document, g=d.createElement("script"), s=d.getElementsByTagName("script")[0]; g.type="text/javascript";
|
116
|
+
g.defer=true; g.async=true; g.src=u+"piwik.js"; s.parentNode.insertBefore(g,s);
|
117
|
+
})();
|
118
|
+
</script>
|
119
|
+
<!-- End Piwik Code -->
|
120
|
+
</body>
|
121
|
+
</html>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Pageflow
|
4
|
+
module Chart
|
5
|
+
describe ScrapeSiteJob do
|
6
|
+
describe '#perform' do
|
7
|
+
it 'scrapes html' do
|
8
|
+
scraper = double("Scraper", html: '<html>rewritten</html>')
|
9
|
+
downloader = double("Downloader", load: '<html>original</html>')
|
10
|
+
job = ScrapeSiteJob.new(downloader)
|
11
|
+
scraped_site = create(:scraped_site, url: 'http://example.com')
|
12
|
+
|
13
|
+
allow(Scraper).to receive(:new).and_return(scraper)
|
14
|
+
|
15
|
+
expect(downloader).to receive(:load).with('http://example.com')
|
16
|
+
|
17
|
+
job.perform(scraped_site)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Pageflow::Chart
|
4
|
+
describe ScrapedSite do
|
5
|
+
describe '#csv_url' do
|
6
|
+
it 'replaces base filename of url with data.csv' do
|
7
|
+
scraped_site = ScrapedSite.new(url: 'http://example.com/foo/index.html')
|
8
|
+
|
9
|
+
expect(scraped_site.csv_url).to eq('http://example.com/foo/data.csv')
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'appends data.csv to directory url' do
|
13
|
+
scraped_site = ScrapedSite.new(url: 'http://example.com/foo/')
|
14
|
+
|
15
|
+
expect(scraped_site.csv_url).to eq('http://example.com/foo/data.csv')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Pageflow
|
4
|
+
module Chart
|
5
|
+
describe Downloader do
|
6
|
+
describe '#load' do
|
7
|
+
it 'yields io containing downloaded files' do
|
8
|
+
downloader = Downloader.new
|
9
|
+
result = ''
|
10
|
+
|
11
|
+
stub_request(:get, "http://example.com/a").to_return(status: 200, body: 'aaa')
|
12
|
+
|
13
|
+
downloader.load('http://example.com/a') do |io|
|
14
|
+
result = io.read
|
15
|
+
end
|
16
|
+
|
17
|
+
expect(result).to eq("aaa")
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'derives protocol from base_url' do
|
21
|
+
downloader = Downloader.new(base_url: 'http://someother.com')
|
22
|
+
result = ''
|
23
|
+
|
24
|
+
stub_request(:get, "http://example.com/a").to_return(status: 200, body: 'aaa')
|
25
|
+
|
26
|
+
downloader.load('//example.com/a') do |io|
|
27
|
+
result = io.read
|
28
|
+
end
|
29
|
+
|
30
|
+
expect(result).to eq("aaa")
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'handles urls relative to base_url' do
|
34
|
+
downloader = Downloader.new(base_url: 'http://example.com/b/')
|
35
|
+
result = ''
|
36
|
+
|
37
|
+
stub_request(:get, "http://example.com/a").to_return(status: 200, body: 'aaa')
|
38
|
+
|
39
|
+
downloader.load('../a') do |io|
|
40
|
+
result = io.read
|
41
|
+
end
|
42
|
+
|
43
|
+
expect(result).to eq("aaa")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe '#load_all' do
|
48
|
+
it 'yields io containing concatenation of downloaded files' do
|
49
|
+
downloader = Downloader.new
|
50
|
+
result = ''
|
51
|
+
|
52
|
+
stub_request(:get, "http://example.com/a").to_return(status: 200, body: 'aaa')
|
53
|
+
stub_request(:get, "http://example.com/b").to_return(status: 200, body: 'bbb')
|
54
|
+
|
55
|
+
downloader.load_all(['http://example.com/a', 'http://example.com/b']) do |io|
|
56
|
+
result = io.read
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
it 'allows to specify temp file extension' do
|
62
|
+
downloader = Downloader.new
|
63
|
+
path = ''
|
64
|
+
|
65
|
+
stub_request(:get, "http://example.com/a").to_return(status: 200, body: 'aaa')
|
66
|
+
|
67
|
+
downloader.load_all(['http://example.com/a'], extension: '.js') do |file|
|
68
|
+
path = file.path
|
69
|
+
end
|
70
|
+
|
71
|
+
expect(File.extname(path)).to eq('.js')
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'allows to specify seprator string' do
|
75
|
+
downloader = Downloader.new
|
76
|
+
result = ''
|
77
|
+
|
78
|
+
stub_request(:get, "http://example.com/a").to_return(status: 200, body: 'aaa')
|
79
|
+
stub_request(:get, "http://example.com/b").to_return(status: 200, body: 'bbb')
|
80
|
+
|
81
|
+
downloader.load_all(['http://example.com/a', 'http://example.com/b'], separator: ';') do |io|
|
82
|
+
result = io.read
|
83
|
+
end
|
84
|
+
|
85
|
+
expect(result).to eq("aaa;bbb;")
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Pageflow
|
4
|
+
module Chart
|
5
|
+
describe Scraper do
|
6
|
+
describe '#html' do
|
7
|
+
it 'returns contents of parsed html' do
|
8
|
+
html = <<-HTML
|
9
|
+
<!DOCTYPE html>
|
10
|
+
<html>
|
11
|
+
<head></head>
|
12
|
+
<body>contents</body>
|
13
|
+
</html>
|
14
|
+
HTML
|
15
|
+
scraper = Scraper.new(html)
|
16
|
+
|
17
|
+
expect(scraper.html).to include('contents')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'combines script tags in head' do
|
21
|
+
html = <<-HTML
|
22
|
+
<!DOCTYPE html>
|
23
|
+
<html>
|
24
|
+
<head>
|
25
|
+
<script type="text/javascript" src="/some.js"></script>
|
26
|
+
<script type="text/javascript" src="/other.js"></script>
|
27
|
+
</head>
|
28
|
+
<body>
|
29
|
+
</body>
|
30
|
+
</html>
|
31
|
+
HTML
|
32
|
+
scraper = Scraper.new(html)
|
33
|
+
|
34
|
+
expect(HtmlFragment.new(scraper.html)).not_to have_tag('head script[src="/some.js"]')
|
35
|
+
expect(HtmlFragment.new(scraper.html)).to have_tag('head script[src="all.js"]')
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'combines link tags in head' do
|
39
|
+
html = <<-HTML
|
40
|
+
<!DOCTYPE html>
|
41
|
+
<html>
|
42
|
+
<head>
|
43
|
+
<link rel="stylesheet" type="text/css" href="/some.css">
|
44
|
+
<link rel="stylesheet" type="text/css" href="/other.css">
|
45
|
+
</head>
|
46
|
+
<body>
|
47
|
+
</body>
|
48
|
+
</html>
|
49
|
+
HTML
|
50
|
+
scraper = Scraper.new(html)
|
51
|
+
|
52
|
+
expect(HtmlFragment.new(scraper.html)).not_to have_tag('head link[href="/some.css"]')
|
53
|
+
expect(HtmlFragment.new(scraper.html)).to have_tag('head link[href="all.css"]')
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'filters blacklisted inline scripts' do
|
57
|
+
html = <<-HTML
|
58
|
+
<!DOCTYPE html>
|
59
|
+
<html>
|
60
|
+
<head></head>
|
61
|
+
<body>
|
62
|
+
<script id="good">window.ok = true;</script>
|
63
|
+
<script id="bad">alert();</script>
|
64
|
+
</body>
|
65
|
+
</html>
|
66
|
+
HTML
|
67
|
+
scraper = Scraper.new(html, inline_script_blacklist: [/alert/])
|
68
|
+
|
69
|
+
expect(HtmlFragment.new(scraper.html)).to have_tag('body script#good')
|
70
|
+
expect(HtmlFragment.new(scraper.html)).not_to have_tag('body script#bad')
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'filters blacklisted selectors' do
|
74
|
+
html = <<-HTML
|
75
|
+
<!DOCTYPE html>
|
76
|
+
<html>
|
77
|
+
<head></head>
|
78
|
+
<body>
|
79
|
+
<div id="bad" class="noscript"></div>
|
80
|
+
<div id="good"></div>
|
81
|
+
</body>
|
82
|
+
</html>
|
83
|
+
HTML
|
84
|
+
scraper = Scraper.new(html, selector_blacklist: ['body .noscript'])
|
85
|
+
|
86
|
+
expect(HtmlFragment.new(scraper.html)).to have_tag('body #good')
|
87
|
+
expect(HtmlFragment.new(scraper.html)).not_to have_tag('body #bad')
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
describe '#javascript_urls' do
|
92
|
+
it 'returns list of urls to javascript files' do
|
93
|
+
html = <<-HTML
|
94
|
+
<!DOCTYPE html>
|
95
|
+
<html>
|
96
|
+
<head>
|
97
|
+
<script type="text/javascript" src="/some.js"></script>
|
98
|
+
<script type="text/javascript" src="/other.js"></script>
|
99
|
+
</head>
|
100
|
+
<body>
|
101
|
+
</body>
|
102
|
+
</html>
|
103
|
+
HTML
|
104
|
+
scraper = Scraper.new(html)
|
105
|
+
|
106
|
+
expect(scraper.javascript_urls).to eq(['/some.js', '/other.js'])
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'filters by blacklist' do
|
110
|
+
html = <<-HTML
|
111
|
+
<!DOCTYPE html>
|
112
|
+
<html>
|
113
|
+
<head>
|
114
|
+
<script type="text/javascript" src="/some.js"></script>
|
115
|
+
<script type="text/javascript" src="http://example.com/piwik.js"></script>
|
116
|
+
</head>
|
117
|
+
<body>
|
118
|
+
</body>
|
119
|
+
</html>
|
120
|
+
HTML
|
121
|
+
scraper = Scraper.new(html, head_script_blacklist: [/piwik/])
|
122
|
+
|
123
|
+
expect(scraper.javascript_urls).to eq(['/some.js'])
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'ignores inline scripts in head' do
|
127
|
+
html = <<-HTML
|
128
|
+
<!DOCTYPE html>
|
129
|
+
<html>
|
130
|
+
<head>
|
131
|
+
<script type="text/javascript"></script>
|
132
|
+
</head>
|
133
|
+
<body>
|
134
|
+
</body>
|
135
|
+
</html>
|
136
|
+
HTML
|
137
|
+
scraper = Scraper.new(html)
|
138
|
+
|
139
|
+
expect(scraper.javascript_urls).to eq([])
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
describe '#stylesheet_urls' do
|
144
|
+
it 'returns list of urls to stylesheet files' do
|
145
|
+
html = <<-HTML
|
146
|
+
<!DOCTYPE html>
|
147
|
+
<html>
|
148
|
+
<head>
|
149
|
+
<link rel="stylesheet" type="text/css" href="/some.css">
|
150
|
+
<link rel="stylesheet" type="text/css" href="/other.css">
|
151
|
+
</head>
|
152
|
+
<body>
|
153
|
+
</body>
|
154
|
+
</html>
|
155
|
+
HTML
|
156
|
+
scraper = Scraper.new(html)
|
157
|
+
|
158
|
+
expect(scraper.stylesheet_urls).to eq(['/some.css', '/other.css'])
|
159
|
+
end
|
160
|
+
|
161
|
+
it 'ignores non css links' do
|
162
|
+
html = <<-HTML
|
163
|
+
<!DOCTYPE html>
|
164
|
+
<html>
|
165
|
+
<head>
|
166
|
+
<link rel="shortcut icon" href="/favicon.ico" />
|
167
|
+
</head>
|
168
|
+
<body>
|
169
|
+
</body>
|
170
|
+
</html>
|
171
|
+
HTML
|
172
|
+
scraper = Scraper.new(html)
|
173
|
+
|
174
|
+
expect(scraper.stylesheet_urls).to eq([])
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Pageflow
|
4
|
+
module Chart
|
5
|
+
describe 'scraping site', :inline_resque => true do
|
6
|
+
before do
|
7
|
+
stub_request(:get, "http://example.com/chart.html")
|
8
|
+
.to_return(:status => 200, :body => File.read('spec/fixtures/datawrapper.html'))
|
9
|
+
stub_request(:get, /js$|css$|csv$/)
|
10
|
+
.to_return(:status => 200, :body => 'file')
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'downloads html and dependencies' do
|
14
|
+
post('/charts/scraped_sites', scraped_site: {url: 'http://example.com/chart.html'}, format: 'json')
|
15
|
+
|
16
|
+
expect(ScrapedSite.first.html_file).to be_present
|
17
|
+
expect(ScrapedSite.first.javascript_file).to be_present
|
18
|
+
expect(ScrapedSite.first.stylesheet_file).to be_present
|
19
|
+
expect(ScrapedSite.first.csv_file).to be_present
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|