staticizer 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/lib/staticizer/crawler.rb +2 -1
- data/lib/staticizer/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 281ee17c4f67f579faa5d1762f9677203696fd0b
|
4
|
+
data.tar.gz: f82db0909cd61819e3f19a43e17eb3ed552111f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bc5a3463838030e46d0353bd173154ff37b92ae36294d561b4e37270b2ee4dd252b4f6e7aff36cb428b0cc1c917b02fb2ec1abbc1ce7bba85679aed428af68cb
|
7
|
+
data.tar.gz: 3267e1e063ff7c17c6797c7072b35456fa105f0c9af0dfc905c42e9394f7faacb2615b2829019923a9c2de91bec90a499c64b24e28e43ff3ba542f860511ea27
|
data/README.md
CHANGED
@@ -49,8 +49,7 @@ Or install it yourself as:
|
|
49
49
|
|
50
50
|
## Command line usage
|
51
51
|
|
52
|
-
|
53
|
-
|
52
|
+
Staticizer can be used through the commandline tool or by requiring the library.
|
54
53
|
|
55
54
|
### Crawl a website and write to disk
|
56
55
|
|
@@ -125,11 +124,12 @@ This will only crawl urls in the domain squaremill.com
|
|
125
124
|
## Crawler Options
|
126
125
|
|
127
126
|
* :aws - Hash of connection options passed to aws/sdk gem
|
128
|
-
* :filter_url -
|
127
|
+
* :filter_url - lambda called to see if a discovered URL should be crawled, return the url (can be modified) to crawl, return nil otherwise
|
129
128
|
* :output_dir - if writing a site to disk the directory to write to, will be created if it does not exist
|
130
129
|
* :logger - A logger object responding to the usual Ruby Logger methods.
|
131
130
|
* :log_level - Log level - defaults to INFO.
|
132
|
-
|
131
|
+
* :valid_domains - Array of domains that should be crawled. Domains not in this list will be ignored.
|
132
|
+
* :process_body - lambda called to pre-process body of content before writing it out.
|
133
133
|
|
134
134
|
## Contributing
|
135
135
|
|
data/lib/staticizer/crawler.rb
CHANGED
@@ -75,7 +75,7 @@ module Staticizer
|
|
75
75
|
end
|
76
76
|
|
77
77
|
def extract_css_urls(css, base_uri)
|
78
|
-
css.scan(/url\(['"]?(.+?)['"]?\)/).map {|src| make_absolute(base_uri, src[0]) }
|
78
|
+
css.scan(/url\(\s*['"]?(.+?)['"]?\s*\)/).map {|src| make_absolute(base_uri, src[0]) }
|
79
79
|
end
|
80
80
|
|
81
81
|
def add_urls(urls, info = {})
|
@@ -184,6 +184,7 @@ module Staticizer
|
|
184
184
|
add_urls(extract_links(doc, url), {:type_hint => "link"})
|
185
185
|
add_urls(extract_scripts(doc, url), {:type_hint => "script"})
|
186
186
|
add_urls(extract_images(doc, url), {:type_hint => "image"})
|
187
|
+
add_urls(extract_css_urls(response.body, url), {:type_hint => "css_url"})
|
187
188
|
add_urls(extract_hrefs(doc, url), {:type_hint => "href"}) unless @opts[:single_page]
|
188
189
|
else
|
189
190
|
save_page(response, parsed_uri)
|
data/lib/staticizer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: staticizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Conor Hunt
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
version: '0'
|
123
123
|
requirements: []
|
124
124
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.2.2
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: A tool to create a static version of a website for hosting on S3.
|