sinew 1.0.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +49 -0
- data/.travis.yml +4 -0
- data/.vscode/extensions.json +3 -0
- data/.vscode/settings.json +15 -0
- data/Gemfile +1 -1
- data/README.md +153 -12
- data/Rakefile +13 -14
- data/bin/sinew +40 -20
- data/lib/sinew.rb +10 -6
- data/lib/sinew/cache.rb +79 -0
- data/lib/sinew/core_ext.rb +59 -0
- data/lib/sinew/dsl.rb +98 -0
- data/lib/sinew/main.rb +80 -149
- data/lib/sinew/nokogiri_ext.rb +10 -9
- data/lib/sinew/output.rb +126 -0
- data/lib/sinew/request.rb +148 -0
- data/lib/sinew/response.rb +75 -0
- data/lib/sinew/runtime_options.rb +26 -0
- data/lib/sinew/version.rb +1 -1
- data/sample.sinew +5 -3
- data/sinew.gemspec +24 -19
- data/test/test.html +40 -34
- data/test/test_cache.rb +69 -0
- data/test/test_helper.rb +113 -0
- data/test/test_main.rb +36 -91
- data/test/test_nokogiri_ext.rb +14 -15
- data/test/test_output.rb +73 -0
- data/test/test_requests.rb +135 -0
- data/test/test_utf8.rb +39 -0
- metadata +103 -48
- data/lib/sinew/curler.rb +0 -173
- data/lib/sinew/text_util.rb +0 -101
- data/lib/sinew/util.rb +0 -236
- data/test/helper.rb +0 -64
- data/test/test_curler.rb +0 -70
- data/test/test_text_util.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3f5a493286f0bfdff9cb26bf85e768750edc57f489aec31e049a3c634bdc9074
|
4
|
+
data.tar.gz: bda8c48fe0a1bccd4caf0c3304c248c218839f0d5547269df977dca56bca43c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9be078939e5e652c1775473322af55065d4ed0a541ba204b84381ba2d44c22b013e19993f500e02f08c9c686c0ee0f0af95cba9f644edd76058d0c1c05180c8
|
7
|
+
data.tar.gz: 9bd2bde0fa0802a0d3b8467322dae695591a6a8928bf86ac1c7fab11f9754a4b66741e41e2ca029f9f9d1c5307c7e750bd29c64231399866ca4acb4d42087e31
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
AllCops:
|
2
|
+
Exclude:
|
3
|
+
TargetRubyVersion: 2.3
|
4
|
+
|
5
|
+
# amd: customizations
|
6
|
+
Layout/SpaceInsideArrayLiteralBrackets:
|
7
|
+
EnforcedStyle: compact
|
8
|
+
Layout/CaseIndentation:
|
9
|
+
EnforcedStyle: end
|
10
|
+
Layout/EndAlignment:
|
11
|
+
EnforcedStyleAlignWith: variable
|
12
|
+
Style/CollectionMethods:
|
13
|
+
Enabled: true
|
14
|
+
PreferredMethods:
|
15
|
+
reduce: inject
|
16
|
+
Style/EmptyMethod:
|
17
|
+
Enabled: false
|
18
|
+
Style/TrailingCommaInArrayLiteral:
|
19
|
+
EnforcedStyleForMultiline: consistent_comma
|
20
|
+
Style/TrailingCommaInHashLiteral:
|
21
|
+
EnforcedStyleForMultiline: consistent_comma
|
22
|
+
|
23
|
+
# amd: these seem extreme
|
24
|
+
Lint/AssignmentInCondition: { Enabled: false } # I do this all the time
|
25
|
+
Lint/HandleExceptions: { Enabled: false } # blank rescues are useful
|
26
|
+
Naming/BinaryOperatorParameterName: { Enabled: false } # silly
|
27
|
+
Naming/HeredocDelimiterNaming: { Enabled: false } # silly
|
28
|
+
Naming/UncommunicativeMethodParamName: { Enabled: false } # silly
|
29
|
+
Performance/RegexpMatch: { Enabled: false } # =~ is fine
|
30
|
+
Performance/TimesMap: { Enabled: false } # silly
|
31
|
+
Style/ClassAndModuleChildren: { Enabled: false } # silly
|
32
|
+
Style/Documentation: { Enabled: false } # we don't need this
|
33
|
+
Style/DoubleNegation: { Enabled: false } # silly
|
34
|
+
Style/FormatStringToken: { Enabled: false } # we like printf here
|
35
|
+
Style/FrozenStringLiteralComment: { Enabled: false } # seems excessive
|
36
|
+
Style/GuardClause: { Enabled: false } # confusing
|
37
|
+
Style/IfUnlessModifier: { Enabled: false } # personally I hate unless
|
38
|
+
Style/NegatedIf: { Enabled: false } # these are fine
|
39
|
+
Style/Next: { Enabled: false } # these are fine
|
40
|
+
Style/NumericPredicate: { Enabled: false } # silly
|
41
|
+
Style/ParallelAssignment: { Enabled: false } # these are fine
|
42
|
+
Style/PerlBackrefs: { Enabled: false } # these are fine
|
43
|
+
Style/RaiseArgs: { Enabled: false } # silly
|
44
|
+
Style/RegexpLiteral: { Enabled: false } # these are fine
|
45
|
+
Style/StderrPuts: { Enabled: false } # this is awful
|
46
|
+
|
47
|
+
# amd: these Metric rules are annoying, disable
|
48
|
+
Metrics:
|
49
|
+
Enabled: false
|
data/.travis.yml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"editor.formatOnSave": true,
|
3
|
+
"editor.formatOnSaveTimeout": 1500,
|
4
|
+
"editor.tabSize": 2,
|
5
|
+
"editor.wordSeparators": "`~#$%^&*()-=+[{]}\\|;:'\",.<>/",
|
6
|
+
"files.associations": {
|
7
|
+
"*.sinew": "ruby"
|
8
|
+
},
|
9
|
+
"files.insertFinalNewline": true,
|
10
|
+
"files.trimTrailingWhitespace": true,
|
11
|
+
"ruby.format": "rubocop",
|
12
|
+
"ruby.lint": {
|
13
|
+
"rubocop": true
|
14
|
+
}
|
15
|
+
}
|
data/Gemfile
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
gemspec
|
data/README.md
CHANGED
@@ -2,15 +2,47 @@
|
|
2
2
|
|
3
3
|
Sinew collects structured data from web sites (screen scraping). It provides a Ruby DSL built for crawling, a robust caching system, and integration with [Nokogiri](http://nokogiri.org). Though small, this project is the culmination of years of effort based on crawling systems built at several different companies.
|
4
4
|
|
5
|
-
Sinew requires Ruby 1.9, [HTML Tidy](http://tidy.sourceforge.net) and [Curl](http://curl.haxx.se).
|
6
|
-
|
7
5
|
Sinew is distributed as a ruby gem:
|
8
6
|
|
9
7
|
```ruby
|
10
8
|
gem install sinew
|
11
9
|
```
|
12
10
|
|
13
|
-
|
11
|
+
or in your Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'sinew'
|
15
|
+
```
|
16
|
+
|
17
|
+
## Table of Contents
|
18
|
+
|
19
|
+
<!---
|
20
|
+
markdown-toc --no-firsth1 --maxdepth 1 readme.md
|
21
|
+
-->
|
22
|
+
|
23
|
+
* [Sinew 2 (May 2018)](#sinew-2-may-2018)
|
24
|
+
* [Quick Example](#quick-example)
|
25
|
+
* [How it Works](#how-it-works)
|
26
|
+
* [DSL Reference](#dsl-reference)
|
27
|
+
* [Hints](#hints)
|
28
|
+
* [Limitations](#limitations)
|
29
|
+
* [Changelog](#changelog)
|
30
|
+
|
31
|
+
## Sinew 2 (May 2018)
|
32
|
+
|
33
|
+
I am pleased to announce the release of Sinew 2.0, a complete rewrite of Sinew for the modern era. Enhancements include:
|
34
|
+
|
35
|
+
* Remove dependencies on active_support, curl and tidy. We use HTTParty now.
|
36
|
+
* Much easier to customize requests in `.sinew` files. For example, setting User-Agent or Bearer tokens.
|
37
|
+
* More operations like `post_json` or the generic `http`. These methods are thing wrappers around HTTParty.
|
38
|
+
* New end-of-run report.
|
39
|
+
* Tests, rubocop, vscode settings, travis, etc.
|
40
|
+
|
41
|
+
**Breaking change**
|
42
|
+
|
43
|
+
Sinew uses a new format for cached responses. Old Sinew 1 cache directories must be removed before running Sinew again. Sinew 2 might choke on Sinew 1 cache directores when reading `head/`. This is not tested or supported.
|
44
|
+
|
45
|
+
## Quick Example
|
14
46
|
|
15
47
|
Here's an example for collecting the links from httpbin.org:
|
16
48
|
|
@@ -29,18 +61,127 @@ noko.css("ul li a").each do |a|
|
|
29
61
|
end
|
30
62
|
```
|
31
63
|
|
32
|
-
If you paste this into a file called `
|
64
|
+
If you paste this into a file called `sample.sinew` and run `sinew sample.sinew`, it will create a `sample.csv` file containing the href and text for each link.
|
65
|
+
|
66
|
+
## How it Works
|
67
|
+
|
68
|
+
There are three main features provided by Sinew.
|
69
|
+
|
70
|
+
#### The Sinew DSL
|
71
|
+
|
72
|
+
Sinew uses recipe files to crawl web sites. Recipes have the `.sinew` extension, but they are plain old Ruby. The [Sinew DSL](#dsl) makes crawling easy. Use `get` to make an HTTP GET:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
get "https://www.google.com/search?q=darwin"
|
76
|
+
get "https://www.google.com/search", q: "charles darwin"
|
77
|
+
```
|
78
|
+
|
79
|
+
Once you've done a `get`, you have access to the document in a few different formats. In general, it's easiest to use `noko` to automatically parse and interact with the results. If Nokogiri isn't appropriate, you can fall back to regular expressions run against `raw` or `html`. Use `json` if you are expecting a JSON response.
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
get "https://www.google.com/search?q=darwin"
|
83
|
+
|
84
|
+
# pull out the links with nokogiri
|
85
|
+
links = noko.css("a").map { |i| i[:href] }
|
86
|
+
puts links.inspect
|
87
|
+
|
88
|
+
# or, use a regex
|
89
|
+
links = html[/<a[^>]+href="([^"]+)/, 1]
|
90
|
+
puts links.inspect
|
91
|
+
```
|
92
|
+
|
93
|
+
#### CSV Output
|
94
|
+
|
95
|
+
Recipes output CSV files. To continue the example above:
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
get "https://www.google.com/search?q=darwin"
|
99
|
+
noko.css("a").each do |i|
|
100
|
+
row = { }
|
101
|
+
row[:href] = i[:href]
|
102
|
+
row[:text] = i.text
|
103
|
+
csv_emit row
|
104
|
+
end
|
105
|
+
```
|
106
|
+
|
107
|
+
Sinew creates a CSV file with the same name as the recipe, and `csv_emit(hash)` appends a row. The values of your hash are converted to strings:
|
108
|
+
|
109
|
+
1. Nokogiri nodes are converted to text
|
110
|
+
1. Arrays are joined with "|", so you can separate them later
|
111
|
+
1. HTML tags, entities and non-ascii chars are removed
|
112
|
+
1. Whitespace is squished
|
113
|
+
|
114
|
+
#### Caching
|
115
|
+
|
116
|
+
Requests are made using HTTParty, and all responses are cached on disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
|
117
|
+
|
118
|
+
The files in `~/.sinew` have nice names and are designed to be human readable. This helps when writing recipes. Sinew never deletes files from the cache - that's up to you!
|
119
|
+
|
120
|
+
Because all requests are cached, you can run Sinew repeatedly with confidence. Run it over and over again while you build up your recipe.
|
121
|
+
|
122
|
+
## DSL Reference
|
123
|
+
|
124
|
+
#### Making requests
|
125
|
+
|
126
|
+
* `get(url, query = {})` - fetch a url with HTTP GET. URL parameters can be added using `query.
|
127
|
+
* `post(url, form = {})` - fetch a url with HTTP POST, using `form` as the POST body.
|
128
|
+
* `post_json(url, json = {})` - fetch a url with HTTP POST, using `json` as the POST body.
|
129
|
+
* `http(method, url, options = {})` - use this for more complex requests
|
130
|
+
|
131
|
+
#### Parsing the response
|
132
|
+
|
133
|
+
* `raw` - the raw response from the last request
|
134
|
+
* `html` - like `raw`, but with a handful of HTML-specific whitespace cleanups
|
135
|
+
* `noko` - a [Nokogiri](http://nokogiri.org) document built from the tidied HTML
|
136
|
+
* `json` - parse the response as JSON, with symbolized keys
|
137
|
+
* `url` - the url of the last request. If the request goes through a redirect, `url` will reflect the final url.
|
138
|
+
* `uri` - the URI of the last request. This is useful for resolving relative URLs.
|
139
|
+
|
140
|
+
#### Writing CSV
|
141
|
+
|
142
|
+
* `csv_header(keys)` - specify the columns for CSV output. If you don't call this, Sinew will use the keys from the first call to `csv_emit`.
|
143
|
+
* `csv_emit(hash)` - append a row to the CSV file
|
144
|
+
|
145
|
+
## Hints
|
146
|
+
|
147
|
+
Writing Sinew recipes is fun and easy. The builtin caching means you can iterate quickly, since you won't have to re-fetch the data. Here are some hints for writing idiomatic recipes:
|
148
|
+
|
149
|
+
* Sinew doesn't (yet) check robots.txt - please check it manually.
|
150
|
+
* Prefer Nokogiri over regular expressions wherever possible. Learn [CSS selectors](http://www.w3schools.com/cssref/css_selectors.asp).
|
151
|
+
* In Chrome, `$` in the console is your friend.
|
152
|
+
* Fallback to regular expressions if you're desperate. Depending on the site, use either `raw` or `html`. `html` is probably your best bet. `raw` is good for crawling Javascript, but it's fragile if the site changes.
|
153
|
+
* Learn to love `String#[regexp]`, which is an obscure operator but incredibly handy for Sinew.
|
154
|
+
* Laziness is useful. Keep your CSS selectors and regular expressions simple, so maybe they'll work again the next time you need to crawl a site.
|
155
|
+
* Don't be afraid to mix CSS selectors, regular expressions, and Ruby:
|
156
|
+
|
157
|
+
```ruby
|
158
|
+
noko.css("table")[4].css("td").select { |i| i[:width].to_i > 80 }.map(&:text)
|
159
|
+
```
|
160
|
+
|
161
|
+
* Debug your recipes using plain old `puts`, or better yet use `ap` from [awesome_print](https://github.com/michaeldv/awesome_print).
|
162
|
+
* Run `sinew -v` to get a report on every `csv_emit`. Very handy.
|
163
|
+
* Add the CSV files to your git repo. That way you can version them and get diffs!
|
164
|
+
|
165
|
+
## Limitations
|
166
|
+
|
167
|
+
* Caching is based on URL, so use caution with cookies and other forms of authentication
|
168
|
+
* Almost no support for international (non-english) characters
|
169
|
+
|
170
|
+
## Changelog
|
171
|
+
|
172
|
+
#### 2.0.0 (May 2018)
|
173
|
+
|
174
|
+
* Complete rewrite. See above.
|
33
175
|
|
34
|
-
|
176
|
+
#### 1.0.3
|
35
177
|
|
36
|
-
|
178
|
+
* Friendlier message if curl or tidy are missing.
|
37
179
|
|
38
|
-
|
39
|
-
* Sinew runs responses through [HTML Tidy](http://tidy.sourceforge.net). This cleans up dirty HTML and makes it easier to parse in many cases, especially if you have to fallback to regular expressions instead of Nokogiri. Unfortunately, this is a common use case in my experience.
|
40
|
-
* Sinew outputs CSV files. It does exactly one thing and it does it well - Sinew crawls a site and outputs a CSV file. Mechanize is a more general toolkit.
|
180
|
+
#### 1.0.2
|
41
181
|
|
42
|
-
|
182
|
+
* Remove entity options from tidy, which didn't work on MacOS (thanks Rex!)
|
43
183
|
|
44
|
-
|
184
|
+
#### 1.0.1
|
45
185
|
|
46
|
-
|
186
|
+
* Trying to run on 1.8 produces a fatal error. Onward!
|
187
|
+
* Added first batch of unit tests
|
data/Rakefile
CHANGED
@@ -1,27 +1,26 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require "rake"
|
4
|
-
require "rake/testtask"
|
1
|
+
require 'bundler'
|
2
|
+
require 'bundler/setup'
|
5
3
|
|
6
|
-
|
7
|
-
require
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'sinew/version'
|
8
7
|
|
9
8
|
#
|
10
9
|
# gem
|
11
10
|
#
|
12
11
|
|
13
|
-
task :
|
12
|
+
task gem: :build
|
14
13
|
task :build do
|
15
|
-
system
|
14
|
+
system 'gem build --quiet sinew.gemspec'
|
16
15
|
end
|
17
16
|
|
18
|
-
task :
|
17
|
+
task install: :build do
|
19
18
|
system "sudo gem install --quiet sinew-#{Sinew::VERSION}.gem"
|
20
19
|
end
|
21
20
|
|
22
|
-
task :
|
21
|
+
task release: :build do
|
23
22
|
system "git tag -a #{Sinew::VERSION} -m 'Tagging #{Sinew::VERSION}'"
|
24
|
-
system
|
23
|
+
system 'git push --tags'
|
25
24
|
system "gem push sinew-#{Sinew::VERSION}.gem"
|
26
25
|
end
|
27
26
|
|
@@ -29,11 +28,11 @@ end
|
|
29
28
|
# minitest
|
30
29
|
#
|
31
30
|
|
32
|
-
Rake::TestTask.new(:test) do |
|
33
|
-
|
31
|
+
Rake::TestTask.new(:test) do |t|
|
32
|
+
t.warning = false
|
34
33
|
end
|
35
34
|
|
36
|
-
task :
|
35
|
+
task default: :test
|
37
36
|
|
38
37
|
# to test:
|
39
38
|
# block ; rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
|
data/bin/sinew
CHANGED
@@ -1,29 +1,49 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
require "trollop"
|
3
|
+
$LOAD_PATH.unshift("#{__dir__}/../lib")
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
require 'sinew'
|
6
|
+
require 'slop'
|
7
|
+
|
8
|
+
#
|
9
|
+
# options
|
10
|
+
#
|
11
|
+
|
12
|
+
options = Slop.parse do |o|
|
13
|
+
o.banner = 'Usage: sinew [options] <gub.sinew>'
|
14
|
+
o.bool '-v', '--verbose', 'dump every row'
|
15
|
+
o.bool '--version', 'show version'
|
16
|
+
o.bool '-q', '--quiet', 'suppress some output'
|
17
|
+
o.string '--cache', 'Set the cache directory (defaults to ~/.sinew)', default: "#{ENV['HOME']}/.sinew"
|
18
|
+
o.on '--help' do
|
19
|
+
puts o
|
20
|
+
exit
|
14
21
|
end
|
15
22
|
end
|
16
23
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
opt :cache, "Set the cache directory (defaults to ~/.sinew)"
|
21
|
-
opt :verbose, "Dump every row"
|
22
|
-
opt :quiet, "Be quiet"
|
24
|
+
if options[:version]
|
25
|
+
puts Sinew::VERSION
|
26
|
+
exit
|
23
27
|
end
|
24
|
-
Trollop.die "need a .sinew file to run against" if ARGV.blank?
|
25
28
|
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
+
#
|
30
|
+
# recipe
|
31
|
+
#
|
32
|
+
|
33
|
+
recipe = options.arguments.first
|
34
|
+
if !recipe
|
35
|
+
Scripto.fatal('need a .sinew file to run against')
|
29
36
|
end
|
37
|
+
if !File.exist?(recipe)
|
38
|
+
Scripto.fatal("#{recipe} not found")
|
39
|
+
end
|
40
|
+
if options.arguments.length > 1
|
41
|
+
Scripto.fatal('can only run on one .sinew file')
|
42
|
+
end
|
43
|
+
options = options.to_h.merge(recipe: recipe)
|
44
|
+
|
45
|
+
#
|
46
|
+
# main
|
47
|
+
#
|
48
|
+
|
49
|
+
Sinew::Main.new(options).run
|
data/lib/sinew.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
require_relative 'sinew/cache'
|
2
|
+
require_relative 'sinew/core_ext'
|
3
|
+
require_relative 'sinew/dsl'
|
4
|
+
require_relative 'sinew/main'
|
5
|
+
require_relative 'sinew/nokogiri_ext'
|
6
|
+
require_relative 'sinew/output'
|
7
|
+
require_relative 'sinew/request'
|
8
|
+
require_relative 'sinew/response'
|
9
|
+
require_relative 'sinew/runtime_options'
|
10
|
+
require_relative 'sinew/version'
|
data/lib/sinew/cache.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
#
|
5
|
+
# This class handles the caching of http responses on disk.
|
6
|
+
#
|
7
|
+
|
8
|
+
module Sinew
|
9
|
+
class Cache
|
10
|
+
attr_reader :sinew
|
11
|
+
|
12
|
+
def initialize(sinew)
|
13
|
+
@sinew = sinew
|
14
|
+
end
|
15
|
+
|
16
|
+
def get(request)
|
17
|
+
body = read_if_exist(body_path(request))
|
18
|
+
return nil if !body
|
19
|
+
|
20
|
+
head = read_if_exist(head_path(request))
|
21
|
+
Response.from_cache(request, body, head)
|
22
|
+
end
|
23
|
+
|
24
|
+
def set(response)
|
25
|
+
body_path = body_path(response.request)
|
26
|
+
head_path = head_path(response.request)
|
27
|
+
|
28
|
+
FileUtils.mkdir_p(File.dirname(body_path))
|
29
|
+
FileUtils.mkdir_p(File.dirname(head_path))
|
30
|
+
|
31
|
+
# write body, and head if necessary
|
32
|
+
atomic_write(body_path, response.body)
|
33
|
+
if head_necessary?(response)
|
34
|
+
head = JSON.pretty_generate(response.head_as_json)
|
35
|
+
atomic_write(head_path, head)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def root_dir
|
40
|
+
sinew.options[:cache]
|
41
|
+
end
|
42
|
+
protected :root_dir
|
43
|
+
|
44
|
+
def head_necessary?(response)
|
45
|
+
response.error? || response.redirected?
|
46
|
+
end
|
47
|
+
protected :head_necessary?
|
48
|
+
|
49
|
+
def body_path(request)
|
50
|
+
"#{root_dir}/#{request.cache_key}"
|
51
|
+
end
|
52
|
+
protected :body_path
|
53
|
+
|
54
|
+
def head_path(request)
|
55
|
+
body_path = body_path(request)
|
56
|
+
dir, base = File.dirname(body_path), File.basename(body_path)
|
57
|
+
"#{dir}/head/#{base}"
|
58
|
+
end
|
59
|
+
protected :head_path
|
60
|
+
|
61
|
+
def read_if_exist(path)
|
62
|
+
if File.exist?(path)
|
63
|
+
IO.read(path, mode: 'r:UTF-8')
|
64
|
+
end
|
65
|
+
end
|
66
|
+
protected :read_if_exist
|
67
|
+
|
68
|
+
def atomic_write(path, data)
|
69
|
+
tmp = Tempfile.new('sinew', encoding: 'UTF-8')
|
70
|
+
tmp.write(data)
|
71
|
+
tmp.close
|
72
|
+
FileUtils.chmod(0o644, tmp.path)
|
73
|
+
FileUtils.mv(tmp.path, path)
|
74
|
+
ensure
|
75
|
+
FileUtils.rm(tmp.path, force: true)
|
76
|
+
end
|
77
|
+
protected :atomic_write
|
78
|
+
end
|
79
|
+
end
|