staticizer 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/lib/staticizer/crawler.rb +2 -1
- data/lib/staticizer/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 281ee17c4f67f579faa5d1762f9677203696fd0b
|
4
|
+
data.tar.gz: f82db0909cd61819e3f19a43e17eb3ed552111f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc5a3463838030e46d0353bd173154ff37b92ae36294d561b4e37270b2ee4dd252b4f6e7aff36cb428b0cc1c917b02fb2ec1abbc1ce7bba85679aed428af68cb
|
7
|
+
data.tar.gz: 3267e1e063ff7c17c6797c7072b35456fa105f0c9af0dfc905c42e9394f7faacb2615b2829019923a9c2de91bec90a499c64b24e28e43ff3ba542f860511ea27
|
data/README.md
CHANGED
@@ -49,8 +49,7 @@ Or install it yourself as:
|
|
49
49
|
|
50
50
|
## Command line usage
|
51
51
|
|
52
|
-
|
53
|
-
|
52
|
+
Staticizer can be used through the commandline tool or by requiring the library.
|
54
53
|
|
55
54
|
### Crawl a website and write to disk
|
56
55
|
|
@@ -125,11 +124,12 @@ This will only crawl urls in the domain squaremill.com
|
|
125
124
|
## Crawler Options
|
126
125
|
|
127
126
|
* :aws - Hash of connection options passed to aws/sdk gem
|
128
|
-
* :filter_url -
|
127
|
+
* :filter_url - lambda called to see if a discovered URL should be crawled, return the url (can be modified) to crawl, return nil otherwise
|
129
128
|
* :output_dir - if writing a site to disk the directory to write to, will be created if it does not exist
|
130
129
|
* :logger - A logger object responding to the usual Ruby Logger methods.
|
131
130
|
* :log_level - Log level - defaults to INFO.
|
132
|
-
|
131
|
+
* :valid_domains - Array of domains that should be crawled. Domains not in this list will be ignored.
|
132
|
+
* :process_body - lambda called to pre-process body of content before writing it out.
|
133
133
|
|
134
134
|
## Contributing
|
135
135
|
|
data/lib/staticizer/crawler.rb
CHANGED
@@ -75,7 +75,7 @@ module Staticizer
|
|
75
75
|
end
|
76
76
|
|
77
77
|
def extract_css_urls(css, base_uri)
|
78
|
-
css.scan(/url\(['"]?(.+?)['"]?\)/).map {|src| make_absolute(base_uri, src[0]) }
|
78
|
+
css.scan(/url\(\s*['"]?(.+?)['"]?\s*\)/).map {|src| make_absolute(base_uri, src[0]) }
|
79
79
|
end
|
80
80
|
|
81
81
|
def add_urls(urls, info = {})
|
@@ -184,6 +184,7 @@ module Staticizer
|
|
184
184
|
add_urls(extract_links(doc, url), {:type_hint => "link"})
|
185
185
|
add_urls(extract_scripts(doc, url), {:type_hint => "script"})
|
186
186
|
add_urls(extract_images(doc, url), {:type_hint => "image"})
|
187
|
+
add_urls(extract_css_urls(response.body, url), {:type_hint => "css_url"})
|
187
188
|
add_urls(extract_hrefs(doc, url), {:type_hint => "href"}) unless @opts[:single_page]
|
188
189
|
else
|
189
190
|
save_page(response, parsed_uri)
|
data/lib/staticizer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: staticizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Conor Hunt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
version: '0'
|
123
123
|
requirements: []
|
124
124
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.2.2
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: A tool to create a static version of a website for hosting on S3.
|