sinew 1.0.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +49 -0
- data/.travis.yml +4 -0
- data/.vscode/extensions.json +3 -0
- data/.vscode/settings.json +15 -0
- data/Gemfile +1 -1
- data/README.md +153 -12
- data/Rakefile +13 -14
- data/bin/sinew +40 -20
- data/lib/sinew.rb +10 -6
- data/lib/sinew/cache.rb +79 -0
- data/lib/sinew/core_ext.rb +59 -0
- data/lib/sinew/dsl.rb +98 -0
- data/lib/sinew/main.rb +80 -149
- data/lib/sinew/nokogiri_ext.rb +10 -9
- data/lib/sinew/output.rb +126 -0
- data/lib/sinew/request.rb +148 -0
- data/lib/sinew/response.rb +75 -0
- data/lib/sinew/runtime_options.rb +26 -0
- data/lib/sinew/version.rb +1 -1
- data/sample.sinew +5 -3
- data/sinew.gemspec +24 -19
- data/test/test.html +40 -34
- data/test/test_cache.rb +69 -0
- data/test/test_helper.rb +113 -0
- data/test/test_main.rb +36 -91
- data/test/test_nokogiri_ext.rb +14 -15
- data/test/test_output.rb +73 -0
- data/test/test_requests.rb +135 -0
- data/test/test_utf8.rb +39 -0
- metadata +103 -48
- data/lib/sinew/curler.rb +0 -173
- data/lib/sinew/text_util.rb +0 -101
- data/lib/sinew/util.rb +0 -236
- data/test/helper.rb +0 -64
- data/test/test_curler.rb +0 -70
- data/test/test_text_util.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 3f5a493286f0bfdff9cb26bf85e768750edc57f489aec31e049a3c634bdc9074
|
4
|
+
data.tar.gz: bda8c48fe0a1bccd4caf0c3304c248c218839f0d5547269df977dca56bca43c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9be078939e5e652c1775473322af55065d4ed0a541ba204b84381ba2d44c22b013e19993f500e02f08c9c686c0ee0f0af95cba9f644edd76058d0c1c05180c8
|
7
|
+
data.tar.gz: 9bd2bde0fa0802a0d3b8467322dae695591a6a8928bf86ac1c7fab11f9754a4b66741e41e2ca029f9f9d1c5307c7e750bd29c64231399866ca4acb4d42087e31
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
AllCops:
|
2
|
+
Exclude:
|
3
|
+
TargetRubyVersion: 2.3
|
4
|
+
|
5
|
+
# amd: customizations
|
6
|
+
Layout/SpaceInsideArrayLiteralBrackets:
|
7
|
+
EnforcedStyle: compact
|
8
|
+
Layout/CaseIndentation:
|
9
|
+
EnforcedStyle: end
|
10
|
+
Layout/EndAlignment:
|
11
|
+
EnforcedStyleAlignWith: variable
|
12
|
+
Style/CollectionMethods:
|
13
|
+
Enabled: true
|
14
|
+
PreferredMethods:
|
15
|
+
reduce: inject
|
16
|
+
Style/EmptyMethod:
|
17
|
+
Enabled: false
|
18
|
+
Style/TrailingCommaInArrayLiteral:
|
19
|
+
EnforcedStyleForMultiline: consistent_comma
|
20
|
+
Style/TrailingCommaInHashLiteral:
|
21
|
+
EnforcedStyleForMultiline: consistent_comma
|
22
|
+
|
23
|
+
# amd: these seem extreme
|
24
|
+
Lint/AssignmentInCondition: { Enabled: false } # I do this all the time
|
25
|
+
Lint/HandleExceptions: { Enabled: false } # blank rescues are useful
|
26
|
+
Naming/BinaryOperatorParameterName: { Enabled: false } # silly
|
27
|
+
Naming/HeredocDelimiterNaming: { Enabled: false } # silly
|
28
|
+
Naming/UncommunicativeMethodParamName: { Enabled: false } # silly
|
29
|
+
Performance/RegexpMatch: { Enabled: false } # =~ is fine
|
30
|
+
Performance/TimesMap: { Enabled: false } # silly
|
31
|
+
Style/ClassAndModuleChildren: { Enabled: false } # silly
|
32
|
+
Style/Documentation: { Enabled: false } # we don't need this
|
33
|
+
Style/DoubleNegation: { Enabled: false } # silly
|
34
|
+
Style/FormatStringToken: { Enabled: false } # we like printf here
|
35
|
+
Style/FrozenStringLiteralComment: { Enabled: false } # seems excessive
|
36
|
+
Style/GuardClause: { Enabled: false } # confusing
|
37
|
+
Style/IfUnlessModifier: { Enabled: false } # personally I hate unless
|
38
|
+
Style/NegatedIf: { Enabled: false } # these are fine
|
39
|
+
Style/Next: { Enabled: false } # these are fine
|
40
|
+
Style/NumericPredicate: { Enabled: false } # silly
|
41
|
+
Style/ParallelAssignment: { Enabled: false } # these are fine
|
42
|
+
Style/PerlBackrefs: { Enabled: false } # these are fine
|
43
|
+
Style/RaiseArgs: { Enabled: false } # silly
|
44
|
+
Style/RegexpLiteral: { Enabled: false } # these are fine
|
45
|
+
Style/StderrPuts: { Enabled: false } # this is awful
|
46
|
+
|
47
|
+
# amd: these Metric rules are annoying, disable
|
48
|
+
Metrics:
|
49
|
+
Enabled: false
|
data/.travis.yml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"editor.formatOnSave": true,
|
3
|
+
"editor.formatOnSaveTimeout": 1500,
|
4
|
+
"editor.tabSize": 2,
|
5
|
+
"editor.wordSeparators": "`~#$%^&*()-=+[{]}\\|;:'\",.<>/",
|
6
|
+
"files.associations": {
|
7
|
+
"*.sinew": "ruby"
|
8
|
+
},
|
9
|
+
"files.insertFinalNewline": true,
|
10
|
+
"files.trimTrailingWhitespace": true,
|
11
|
+
"ruby.format": "rubocop",
|
12
|
+
"ruby.lint": {
|
13
|
+
"rubocop": true
|
14
|
+
}
|
15
|
+
}
|
data/Gemfile
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
source
|
1
|
+
source 'http://rubygems.org'
|
2
2
|
gemspec
|
data/README.md
CHANGED
@@ -2,15 +2,47 @@
|
|
2
2
|
|
3
3
|
Sinew collects structured data from web sites (screen scraping). It provides a Ruby DSL built for crawling, a robust caching system, and integration with [Nokogiri](http://nokogiri.org). Though small, this project is the culmination of years of effort based on crawling systems built at several different companies.
|
4
4
|
|
5
|
-
Sinew requires Ruby 1.9, [HTML Tidy](http://tidy.sourceforge.net) and [Curl](http://curl.haxx.se).
|
6
|
-
|
7
5
|
Sinew is distributed as a ruby gem:
|
8
6
|
|
9
7
|
```ruby
|
10
8
|
gem install sinew
|
11
9
|
```
|
12
10
|
|
13
|
-
|
11
|
+
or in your Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'sinew'
|
15
|
+
```
|
16
|
+
|
17
|
+
## Table of Contents
|
18
|
+
|
19
|
+
<!---
|
20
|
+
markdown-toc --no-firsth1 --maxdepth 1 readme.md
|
21
|
+
-->
|
22
|
+
|
23
|
+
* [Sinew 2 (May 2018)](#sinew-2-may-2018)
|
24
|
+
* [Quick Example](#quick-example)
|
25
|
+
* [How it Works](#how-it-works)
|
26
|
+
* [DSL Reference](#dsl-reference)
|
27
|
+
* [Hints](#hints)
|
28
|
+
* [Limitations](#limitations)
|
29
|
+
* [Changelog](#changelog)
|
30
|
+
|
31
|
+
## Sinew 2 (May 2018)
|
32
|
+
|
33
|
+
I am pleased to announce the release of Sinew 2.0, a complete rewrite of Sinew for the modern era. Enhancements include:
|
34
|
+
|
35
|
+
* Remove dependencies on active_support, curl and tidy. We use HTTParty now.
|
36
|
+
* Much easier to customize requests in `.sinew` files. For example, setting User-Agent or Bearer tokens.
|
37
|
+
* More operations like `post_json` or the generic `http`. These methods are thing wrappers around HTTParty.
|
38
|
+
* New end-of-run report.
|
39
|
+
* Tests, rubocop, vscode settings, travis, etc.
|
40
|
+
|
41
|
+
**Breaking change**
|
42
|
+
|
43
|
+
Sinew uses a new format for cached responses. Old Sinew 1 cache directories must be removed before running Sinew again. Sinew 2 might choke on Sinew 1 cache directores when reading `head/`. This is not tested or supported.
|
44
|
+
|
45
|
+
## Quick Example
|
14
46
|
|
15
47
|
Here's an example for collecting the links from httpbin.org:
|
16
48
|
|
@@ -29,18 +61,127 @@ noko.css("ul li a").each do |a|
|
|
29
61
|
end
|
30
62
|
```
|
31
63
|
|
32
|
-
If you paste this into a file called `
|
64
|
+
If you paste this into a file called `sample.sinew` and run `sinew sample.sinew`, it will create a `sample.csv` file containing the href and text for each link.
|
65
|
+
|
66
|
+
## How it Works
|
67
|
+
|
68
|
+
There are three main features provided by Sinew.
|
69
|
+
|
70
|
+
#### The Sinew DSL
|
71
|
+
|
72
|
+
Sinew uses recipe files to crawl web sites. Recipes have the `.sinew` extension, but they are plain old Ruby. The [Sinew DSL](#dsl) makes crawling easy. Use `get` to make an HTTP GET:
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
get "https://www.google.com/search?q=darwin"
|
76
|
+
get "https://www.google.com/search", q: "charles darwin"
|
77
|
+
```
|
78
|
+
|
79
|
+
Once you've done a `get`, you have access to the document in a few different formats. In general, it's easiest to use `noko` to automatically parse and interact with the results. If Nokogiri isn't appropriate, you can fall back to regular expressions run against `raw` or `html`. Use `json` if you are expecting a JSON response.
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
get "https://www.google.com/search?q=darwin"
|
83
|
+
|
84
|
+
# pull out the links with nokogiri
|
85
|
+
links = noko.css("a").map { |i| i[:href] }
|
86
|
+
puts links.inspect
|
87
|
+
|
88
|
+
# or, use a regex
|
89
|
+
links = html[/<a[^>]+href="([^"]+)/, 1]
|
90
|
+
puts links.inspect
|
91
|
+
```
|
92
|
+
|
93
|
+
#### CSV Output
|
94
|
+
|
95
|
+
Recipes output CSV files. To continue the example above:
|
96
|
+
|
97
|
+
```ruby
|
98
|
+
get "https://www.google.com/search?q=darwin"
|
99
|
+
noko.css("a").each do |i|
|
100
|
+
row = { }
|
101
|
+
row[:href] = i[:href]
|
102
|
+
row[:text] = i.text
|
103
|
+
csv_emit row
|
104
|
+
end
|
105
|
+
```
|
106
|
+
|
107
|
+
Sinew creates a CSV file with the same name as the recipe, and `csv_emit(hash)` appends a row. The values of your hash are converted to strings:
|
108
|
+
|
109
|
+
1. Nokogiri nodes are converted to text
|
110
|
+
1. Arrays are joined with "|", so you can separate them later
|
111
|
+
1. HTML tags, entities and non-ascii chars are removed
|
112
|
+
1. Whitespace is squished
|
113
|
+
|
114
|
+
#### Caching
|
115
|
+
|
116
|
+
Requests are made using HTTParty, and all responses are cached on disk in `~/.sinew`. Error responses are cached as well. Each URL will be hit exactly once, and requests are rate limited to one per second. Sinew tries to be polite.
|
117
|
+
|
118
|
+
The files in `~/.sinew` have nice names and are designed to be human readable. This helps when writing recipes. Sinew never deletes files from the cache - that's up to you!
|
119
|
+
|
120
|
+
Because all requests are cached, you can run Sinew repeatedly with confidence. Run it over and over again while you build up your recipe.
|
121
|
+
|
122
|
+
## DSL Reference
|
123
|
+
|
124
|
+
#### Making requests
|
125
|
+
|
126
|
+
* `get(url, query = {})` - fetch a url with HTTP GET. URL parameters can be added using `query.
|
127
|
+
* `post(url, form = {})` - fetch a url with HTTP POST, using `form` as the POST body.
|
128
|
+
* `post_json(url, json = {})` - fetch a url with HTTP POST, using `json` as the POST body.
|
129
|
+
* `http(method, url, options = {})` - use this for more complex requests
|
130
|
+
|
131
|
+
#### Parsing the response
|
132
|
+
|
133
|
+
* `raw` - the raw response from the last request
|
134
|
+
* `html` - like `raw`, but with a handful of HTML-specific whitespace cleanups
|
135
|
+
* `noko` - a [Nokogiri](http://nokogiri.org) document built from the tidied HTML
|
136
|
+
* `json` - parse the response as JSON, with symbolized keys
|
137
|
+
* `url` - the url of the last request. If the request goes through a redirect, `url` will reflect the final url.
|
138
|
+
* `uri` - the URI of the last request. This is useful for resolving relative URLs.
|
139
|
+
|
140
|
+
#### Writing CSV
|
141
|
+
|
142
|
+
* `csv_header(keys)` - specify the columns for CSV output. If you don't call this, Sinew will use the keys from the first call to `csv_emit`.
|
143
|
+
* `csv_emit(hash)` - append a row to the CSV file
|
144
|
+
|
145
|
+
## Hints
|
146
|
+
|
147
|
+
Writing Sinew recipes is fun and easy. The builtin caching means you can iterate quickly, since you won't have to re-fetch the data. Here are some hints for writing idiomatic recipes:
|
148
|
+
|
149
|
+
* Sinew doesn't (yet) check robots.txt - please check it manually.
|
150
|
+
* Prefer Nokogiri over regular expressions wherever possible. Learn [CSS selectors](http://www.w3schools.com/cssref/css_selectors.asp).
|
151
|
+
* In Chrome, `$` in the console is your friend.
|
152
|
+
* Fallback to regular expressions if you're desperate. Depending on the site, use either `raw` or `html`. `html` is probably your best bet. `raw` is good for crawling Javascript, but it's fragile if the site changes.
|
153
|
+
* Learn to love `String#[regexp]`, which is an obscure operator but incredibly handy for Sinew.
|
154
|
+
* Laziness is useful. Keep your CSS selectors and regular expressions simple, so maybe they'll work again the next time you need to crawl a site.
|
155
|
+
* Don't be afraid to mix CSS selectors, regular expressions, and Ruby:
|
156
|
+
|
157
|
+
```ruby
|
158
|
+
noko.css("table")[4].css("td").select { |i| i[:width].to_i > 80 }.map(&:text)
|
159
|
+
```
|
160
|
+
|
161
|
+
* Debug your recipes using plain old `puts`, or better yet use `ap` from [awesome_print](https://github.com/michaeldv/awesome_print).
|
162
|
+
* Run `sinew -v` to get a report on every `csv_emit`. Very handy.
|
163
|
+
* Add the CSV files to your git repo. That way you can version them and get diffs!
|
164
|
+
|
165
|
+
## Limitations
|
166
|
+
|
167
|
+
* Caching is based on URL, so use caution with cookies and other forms of authentication
|
168
|
+
* Almost no support for international (non-english) characters
|
169
|
+
|
170
|
+
## Changelog
|
171
|
+
|
172
|
+
#### 2.0.0 (May 2018)
|
173
|
+
|
174
|
+
* Complete rewrite. See above.
|
33
175
|
|
34
|
-
|
176
|
+
#### 1.0.3
|
35
177
|
|
36
|
-
|
178
|
+
* Friendlier message if curl or tidy are missing.
|
37
179
|
|
38
|
-
|
39
|
-
* Sinew runs responses through [HTML Tidy](http://tidy.sourceforge.net). This cleans up dirty HTML and makes it easier to parse in many cases, especially if you have to fallback to regular expressions instead of Nokogiri. Unfortunately, this is a common use case in my experience.
|
40
|
-
* Sinew outputs CSV files. It does exactly one thing and it does it well - Sinew crawls a site and outputs a CSV file. Mechanize is a more general toolkit.
|
180
|
+
#### 1.0.2
|
41
181
|
|
42
|
-
|
182
|
+
* Remove entity options from tidy, which didn't work on MacOS (thanks Rex!)
|
43
183
|
|
44
|
-
|
184
|
+
#### 1.0.1
|
45
185
|
|
46
|
-
|
186
|
+
* Trying to run on 1.8 produces a fatal error. Onward!
|
187
|
+
* Added first batch of unit tests
|
data/Rakefile
CHANGED
@@ -1,27 +1,26 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require "rake"
|
4
|
-
require "rake/testtask"
|
1
|
+
require 'bundler'
|
2
|
+
require 'bundler/setup'
|
5
3
|
|
6
|
-
|
7
|
-
require
|
4
|
+
require 'rake'
|
5
|
+
require 'rake/testtask'
|
6
|
+
require 'sinew/version'
|
8
7
|
|
9
8
|
#
|
10
9
|
# gem
|
11
10
|
#
|
12
11
|
|
13
|
-
task :
|
12
|
+
task gem: :build
|
14
13
|
task :build do
|
15
|
-
system
|
14
|
+
system 'gem build --quiet sinew.gemspec'
|
16
15
|
end
|
17
16
|
|
18
|
-
task :
|
17
|
+
task install: :build do
|
19
18
|
system "sudo gem install --quiet sinew-#{Sinew::VERSION}.gem"
|
20
19
|
end
|
21
20
|
|
22
|
-
task :
|
21
|
+
task release: :build do
|
23
22
|
system "git tag -a #{Sinew::VERSION} -m 'Tagging #{Sinew::VERSION}'"
|
24
|
-
system
|
23
|
+
system 'git push --tags'
|
25
24
|
system "gem push sinew-#{Sinew::VERSION}.gem"
|
26
25
|
end
|
27
26
|
|
@@ -29,11 +28,11 @@ end
|
|
29
28
|
# minitest
|
30
29
|
#
|
31
30
|
|
32
|
-
Rake::TestTask.new(:test) do |
|
33
|
-
|
31
|
+
Rake::TestTask.new(:test) do |t|
|
32
|
+
t.warning = false
|
34
33
|
end
|
35
34
|
|
36
|
-
task :
|
35
|
+
task default: :test
|
37
36
|
|
38
37
|
# to test:
|
39
38
|
# block ; rake install && rm -rf ~/.sinew/www.amazon.com && /usr/local/bin/sinew sample.sinew
|
data/bin/sinew
CHANGED
@@ -1,29 +1,49 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
require "trollop"
|
3
|
+
$LOAD_PATH.unshift("#{__dir__}/../lib")
|
5
4
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
5
|
+
require 'sinew'
|
6
|
+
require 'slop'
|
7
|
+
|
8
|
+
#
|
9
|
+
# options
|
10
|
+
#
|
11
|
+
|
12
|
+
options = Slop.parse do |o|
|
13
|
+
o.banner = 'Usage: sinew [options] <gub.sinew>'
|
14
|
+
o.bool '-v', '--verbose', 'dump every row'
|
15
|
+
o.bool '--version', 'show version'
|
16
|
+
o.bool '-q', '--quiet', 'suppress some output'
|
17
|
+
o.string '--cache', 'Set the cache directory (defaults to ~/.sinew)', default: "#{ENV['HOME']}/.sinew"
|
18
|
+
o.on '--help' do
|
19
|
+
puts o
|
20
|
+
exit
|
14
21
|
end
|
15
22
|
end
|
16
23
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
opt :cache, "Set the cache directory (defaults to ~/.sinew)"
|
21
|
-
opt :verbose, "Dump every row"
|
22
|
-
opt :quiet, "Be quiet"
|
24
|
+
if options[:version]
|
25
|
+
puts Sinew::VERSION
|
26
|
+
exit
|
23
27
|
end
|
24
|
-
Trollop.die "need a .sinew file to run against" if ARGV.blank?
|
25
28
|
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
+
#
|
30
|
+
# recipe
|
31
|
+
#
|
32
|
+
|
33
|
+
recipe = options.arguments.first
|
34
|
+
if !recipe
|
35
|
+
Scripto.fatal('need a .sinew file to run against')
|
29
36
|
end
|
37
|
+
if !File.exist?(recipe)
|
38
|
+
Scripto.fatal("#{recipe} not found")
|
39
|
+
end
|
40
|
+
if options.arguments.length > 1
|
41
|
+
Scripto.fatal('can only run on one .sinew file')
|
42
|
+
end
|
43
|
+
options = options.to_h.merge(recipe: recipe)
|
44
|
+
|
45
|
+
#
|
46
|
+
# main
|
47
|
+
#
|
48
|
+
|
49
|
+
Sinew::Main.new(options).run
|
data/lib/sinew.rb
CHANGED
@@ -1,6 +1,10 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
require_relative 'sinew/cache'
|
2
|
+
require_relative 'sinew/core_ext'
|
3
|
+
require_relative 'sinew/dsl'
|
4
|
+
require_relative 'sinew/main'
|
5
|
+
require_relative 'sinew/nokogiri_ext'
|
6
|
+
require_relative 'sinew/output'
|
7
|
+
require_relative 'sinew/request'
|
8
|
+
require_relative 'sinew/response'
|
9
|
+
require_relative 'sinew/runtime_options'
|
10
|
+
require_relative 'sinew/version'
|
data/lib/sinew/cache.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
#
|
5
|
+
# This class handles the caching of http responses on disk.
|
6
|
+
#
|
7
|
+
|
8
|
+
module Sinew
|
9
|
+
class Cache
|
10
|
+
attr_reader :sinew
|
11
|
+
|
12
|
+
def initialize(sinew)
|
13
|
+
@sinew = sinew
|
14
|
+
end
|
15
|
+
|
16
|
+
def get(request)
|
17
|
+
body = read_if_exist(body_path(request))
|
18
|
+
return nil if !body
|
19
|
+
|
20
|
+
head = read_if_exist(head_path(request))
|
21
|
+
Response.from_cache(request, body, head)
|
22
|
+
end
|
23
|
+
|
24
|
+
def set(response)
|
25
|
+
body_path = body_path(response.request)
|
26
|
+
head_path = head_path(response.request)
|
27
|
+
|
28
|
+
FileUtils.mkdir_p(File.dirname(body_path))
|
29
|
+
FileUtils.mkdir_p(File.dirname(head_path))
|
30
|
+
|
31
|
+
# write body, and head if necessary
|
32
|
+
atomic_write(body_path, response.body)
|
33
|
+
if head_necessary?(response)
|
34
|
+
head = JSON.pretty_generate(response.head_as_json)
|
35
|
+
atomic_write(head_path, head)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def root_dir
|
40
|
+
sinew.options[:cache]
|
41
|
+
end
|
42
|
+
protected :root_dir
|
43
|
+
|
44
|
+
def head_necessary?(response)
|
45
|
+
response.error? || response.redirected?
|
46
|
+
end
|
47
|
+
protected :head_necessary?
|
48
|
+
|
49
|
+
def body_path(request)
|
50
|
+
"#{root_dir}/#{request.cache_key}"
|
51
|
+
end
|
52
|
+
protected :body_path
|
53
|
+
|
54
|
+
def head_path(request)
|
55
|
+
body_path = body_path(request)
|
56
|
+
dir, base = File.dirname(body_path), File.basename(body_path)
|
57
|
+
"#{dir}/head/#{base}"
|
58
|
+
end
|
59
|
+
protected :head_path
|
60
|
+
|
61
|
+
def read_if_exist(path)
|
62
|
+
if File.exist?(path)
|
63
|
+
IO.read(path, mode: 'r:UTF-8')
|
64
|
+
end
|
65
|
+
end
|
66
|
+
protected :read_if_exist
|
67
|
+
|
68
|
+
def atomic_write(path, data)
|
69
|
+
tmp = Tempfile.new('sinew', encoding: 'UTF-8')
|
70
|
+
tmp.write(data)
|
71
|
+
tmp.close
|
72
|
+
FileUtils.chmod(0o644, tmp.path)
|
73
|
+
FileUtils.mv(tmp.path, path)
|
74
|
+
ensure
|
75
|
+
FileUtils.rm(tmp.path, force: true)
|
76
|
+
end
|
77
|
+
protected :atomic_write
|
78
|
+
end
|
79
|
+
end
|