sinew 1.0.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +49 -0
- data/.travis.yml +4 -0
- data/.vscode/extensions.json +3 -0
- data/.vscode/settings.json +15 -0
- data/Gemfile +1 -1
- data/README.md +153 -12
- data/Rakefile +13 -14
- data/bin/sinew +40 -20
- data/lib/sinew.rb +10 -6
- data/lib/sinew/cache.rb +79 -0
- data/lib/sinew/core_ext.rb +59 -0
- data/lib/sinew/dsl.rb +98 -0
- data/lib/sinew/main.rb +80 -149
- data/lib/sinew/nokogiri_ext.rb +10 -9
- data/lib/sinew/output.rb +126 -0
- data/lib/sinew/request.rb +148 -0
- data/lib/sinew/response.rb +75 -0
- data/lib/sinew/runtime_options.rb +26 -0
- data/lib/sinew/version.rb +1 -1
- data/sample.sinew +5 -3
- data/sinew.gemspec +24 -19
- data/test/test.html +40 -34
- data/test/test_cache.rb +69 -0
- data/test/test_helper.rb +113 -0
- data/test/test_main.rb +36 -91
- data/test/test_nokogiri_ext.rb +14 -15
- data/test/test_output.rb +73 -0
- data/test/test_requests.rb +135 -0
- data/test/test_utf8.rb +39 -0
- metadata +103 -48
- data/lib/sinew/curler.rb +0 -173
- data/lib/sinew/text_util.rb +0 -101
- data/lib/sinew/util.rb +0 -236
- data/test/helper.rb +0 -64
- data/test/test_curler.rb +0 -70
- data/test/test_text_util.rb +0 -23
data/lib/sinew/text_util.rb
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
require "active_support/core_ext"
|
2
|
-
require "set"
|
3
|
-
|
4
|
-
module Sinew
|
5
|
-
module TextUtil
|
6
|
-
extend self
|
7
|
-
|
8
|
-
ATTRS_KEEP = Set.new %w(a img iframe)
|
9
|
-
TIDY_OPTIONS = {
|
10
|
-
"-asxml" => nil,
|
11
|
-
"-bare" => nil,
|
12
|
-
"-quiet" => nil,
|
13
|
-
"-utf8" => nil,
|
14
|
-
"-wrap" => 0,
|
15
|
-
"--doctype" => "omit",
|
16
|
-
"--hide-comments" => "yes",
|
17
|
-
"--force-output" => "yes",
|
18
|
-
"-f" => "/dev/null",
|
19
|
-
}
|
20
|
-
|
21
|
-
XML_ENTITIES = { "&"=>"&", "<"=>"<", ">"=>">", "'"=>"'", '"'=>""" }
|
22
|
-
XML_ENTITIES_INV = XML_ENTITIES.invert
|
23
|
-
COMMON_ENTITIES_INV = XML_ENTITIES_INV.merge(
|
24
|
-
"½" => "1/2",
|
25
|
-
"¼" => "1/4",
|
26
|
-
"¾" => "3/4",
|
27
|
-
"“" => '"',
|
28
|
-
"‘" => "'",
|
29
|
-
"—" => "-",
|
30
|
-
" " => " ",
|
31
|
-
"–" => "-",
|
32
|
-
"”" => '"',
|
33
|
-
"’" => "'",
|
34
|
-
"˜" => "~",
|
35
|
-
""" => '"',
|
36
|
-
"'" => "'",
|
37
|
-
" " => " ",
|
38
|
-
"
" => "\n"
|
39
|
-
)
|
40
|
-
|
41
|
-
#
|
42
|
-
# tidy/clean
|
43
|
-
#
|
44
|
-
|
45
|
-
def html_tidy(s)
|
46
|
-
# run tidy
|
47
|
-
args = TIDY_OPTIONS.map { |k, v| "#{k} #{v}" }.join(" ")
|
48
|
-
s = IO.popen("tidy #{args}", "rb+") do |f|
|
49
|
-
f.write(s)
|
50
|
-
f.close_write
|
51
|
-
f.read
|
52
|
-
end
|
53
|
-
raise "could not run tidy" if ($? >> 8) > 2
|
54
|
-
|
55
|
-
# now kill some tags
|
56
|
-
s.sub!(/<html\b[^>]+>/, "<html>")
|
57
|
-
s.gsub!(/<\/?(meta|link)\b[^>]*>/m, "")
|
58
|
-
s.gsub!(/<(style|script)\b[^>]*(\/>|>.*?<\/\1\b>)/m, "")
|
59
|
-
s.gsub!(/<\?[^>]*>/m, "")
|
60
|
-
s.squish!
|
61
|
-
|
62
|
-
# kill whitespace around tags
|
63
|
-
s.gsub!(/ ?<([^>]+)> ?/, "<\\1>")
|
64
|
-
|
65
|
-
s
|
66
|
-
end
|
67
|
-
|
68
|
-
def html_clean(s)
|
69
|
-
html_clean_from_tidy(html_tidy(s))
|
70
|
-
end
|
71
|
-
|
72
|
-
def html_clean_from_tidy(s)
|
73
|
-
# then kill most attrs
|
74
|
-
s = s.dup
|
75
|
-
s.gsub!(/<([^\s>]+)[^>]*?(\/)?>/) do |i|
|
76
|
-
ATTRS_KEEP.include?($1) ? i : "<#{$1}#{$2}>"
|
77
|
-
end
|
78
|
-
s
|
79
|
-
end
|
80
|
-
|
81
|
-
#
|
82
|
-
# untag/unent
|
83
|
-
#
|
84
|
-
|
85
|
-
def xml_escape(s)
|
86
|
-
s.gsub(/[&<>'"]/) { |i| XML_ENTITIES[i] }
|
87
|
-
end
|
88
|
-
|
89
|
-
def xml_unescape(s)
|
90
|
-
s.gsub(/&(amp|lt|gt|apos|quot);/) { |i| XML_ENTITIES_INV[i] }
|
91
|
-
end
|
92
|
-
|
93
|
-
def untag(s)
|
94
|
-
s.gsub(/<[^>]+>/, " ")
|
95
|
-
end
|
96
|
-
|
97
|
-
def unent(s)
|
98
|
-
s.gsub(/&#?[a-z0-9]{2,};/) { |i| COMMON_ENTITIES_INV[i] }
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
data/lib/sinew/util.rb
DELETED
@@ -1,236 +0,0 @@
|
|
1
|
-
require "digest/md5"
|
2
|
-
require "etc"
|
3
|
-
require "fileutils"
|
4
|
-
|
5
|
-
module Sinew
|
6
|
-
# Helper module for executing commands and printing stuff
|
7
|
-
# out.
|
8
|
-
#
|
9
|
-
# The general idea is to only print commands that are actually
|
10
|
-
# interesting. For example, mkdir_if_necessary won't print anything
|
11
|
-
# if the directory already exists. That way we can scan output and
|
12
|
-
# see what changes were made without getting lost in repetitive
|
13
|
-
# commands that had no actual effect.
|
14
|
-
module Util
|
15
|
-
class RunError < StandardError ; end
|
16
|
-
|
17
|
-
extend self
|
18
|
-
|
19
|
-
RESET = "\e[0m"
|
20
|
-
RED = "\e[1;37;41m"
|
21
|
-
GREEN = "\e[1;37;42m"
|
22
|
-
YELLOW = "\e[1;37;43m"
|
23
|
-
BLUE = "\e[1;37;44m"
|
24
|
-
MAGENTA = "\e[1;37;45m"
|
25
|
-
CYAN = "\e[1;37;46m"
|
26
|
-
|
27
|
-
#
|
28
|
-
# running commands
|
29
|
-
#
|
30
|
-
|
31
|
-
# Make all commands echo before running.
|
32
|
-
def run_verbose!
|
33
|
-
@run_verbose = true
|
34
|
-
end
|
35
|
-
|
36
|
-
# Run a command, raise an error upon failure. Output goes to
|
37
|
-
# $stdout/$stderr.
|
38
|
-
def run(command, args = nil)
|
39
|
-
line = nil
|
40
|
-
if args
|
41
|
-
args = args.map(&:to_s)
|
42
|
-
line = "#{command} #{args.join(" ")}"
|
43
|
-
vputs line
|
44
|
-
system(command, *args)
|
45
|
-
else
|
46
|
-
line = command
|
47
|
-
vputs line
|
48
|
-
system(command)
|
49
|
-
end
|
50
|
-
if $? != 0
|
51
|
-
if $?.termsig == Signal.list["INT"]
|
52
|
-
raise "#{line} interrupted"
|
53
|
-
end
|
54
|
-
raise RunError, "#{line} failed : #{$?.to_i / 256}"
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
# Like mkdir -p. Optionally, set the owner and mode.
|
59
|
-
def mkdir(dir, owner = nil, mode = nil)
|
60
|
-
FileUtils.mkdir_p(dir, :verbose => verbose?)
|
61
|
-
chmod(dir, mode) if mode
|
62
|
-
chown(dir, owner) if owner
|
63
|
-
end
|
64
|
-
|
65
|
-
# mkdir only if the directory doesn't already exist. Optionally,
|
66
|
-
# set the owner and mode.
|
67
|
-
def mkdir_if_necessary(dir, owner = nil, mode = nil)
|
68
|
-
mkdir(dir, owner, mode) if !(File.exists?(dir) || File.symlink?(dir))
|
69
|
-
end
|
70
|
-
|
71
|
-
# rm a dir and recreate it.
|
72
|
-
def rm_and_mkdir(dir)
|
73
|
-
raise "don't do this" if dir == ""
|
74
|
-
run "rm -rf #{dir} && mkdir -p #{dir}"
|
75
|
-
end
|
76
|
-
|
77
|
-
# Are two files different?
|
78
|
-
def different?(a, b)
|
79
|
-
!FileUtils.compare_file(a, b)
|
80
|
-
end
|
81
|
-
|
82
|
-
# Copy file or dir from src to dst. Optionally, set the mode and
|
83
|
-
# owner of dst.
|
84
|
-
def cp(src, dst, owner = nil, mode = nil)
|
85
|
-
FileUtils.cp_r(src, dst, :preserve => true, :verbose => verbose?)
|
86
|
-
if owner && !File.symlink?(dst)
|
87
|
-
chown(dst, owner)
|
88
|
-
end
|
89
|
-
if mode
|
90
|
-
chmod(dst, mode)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
# Copy file or dir from src to dst, but create the dst directory
|
95
|
-
# first if necessary. Optionally, set the mode and owner of dst.
|
96
|
-
def cp_with_mkdir(src, dst, owner = nil, mode = nil)
|
97
|
-
mkdir_if_necessary(File.dirname(dst))
|
98
|
-
cp(src, dst, owner, mode)
|
99
|
-
end
|
100
|
-
|
101
|
-
# Copy file or dir from src to dst, but ONLY if dst doesn't exist
|
102
|
-
# or has different contents than src. Optionally, set the mode and
|
103
|
-
# owner of dst.
|
104
|
-
def cp_if_necessary(src, dst, owner = nil, mode = nil)
|
105
|
-
if !File.exists?(dst) || different?(src, dst)
|
106
|
-
cp(src, dst, owner, mode)
|
107
|
-
true
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
# Move src to dst. Because this uses FileUtils, it works even if
|
112
|
-
# dst is on a different partition.
|
113
|
-
def mv(src, dst)
|
114
|
-
FileUtils.mv(src, dst, :verbose => verbose?)
|
115
|
-
end
|
116
|
-
|
117
|
-
# Move src to dst, but create the dst directory first if
|
118
|
-
# necessary.
|
119
|
-
def mv_with_mkdir(src, dst)
|
120
|
-
mkdir_if_necessary(File.dirname(dst))
|
121
|
-
mv(src, dst)
|
122
|
-
end
|
123
|
-
|
124
|
-
# Chown file to be owned by user.
|
125
|
-
def chown(file, user)
|
126
|
-
user = user.to_s
|
127
|
-
# who is the current owner?
|
128
|
-
@uids ||= {}
|
129
|
-
@uids[user] ||= Etc.getpwnam(user).uid
|
130
|
-
uid = @uids[user]
|
131
|
-
if File.stat(file).uid != uid
|
132
|
-
run "chown #{user}:#{user} '#{file}'"
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
# Chmod file to a new mode.
|
137
|
-
def chmod(file, mode)
|
138
|
-
if File.stat(file).mode != mode
|
139
|
-
FileUtils.chmod(mode, file, :verbose => verbose?)
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
# rm a file
|
144
|
-
def rm(file)
|
145
|
-
FileUtils.rm(file, :force => true, :verbose => verbose?)
|
146
|
-
end
|
147
|
-
|
148
|
-
# rm a file, but only if it exists.
|
149
|
-
def rm_if_necessary(file)
|
150
|
-
if File.exists?(file)
|
151
|
-
rm(file)
|
152
|
-
true
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# Create a symlink from src to dst.
|
157
|
-
def ln(src, dst)
|
158
|
-
FileUtils.ln_sf(src, dst, :verbose => verbose?)
|
159
|
-
end
|
160
|
-
|
161
|
-
# Create a symlink from src to dst, but only if it hasn't already
|
162
|
-
# been created.
|
163
|
-
def ln_if_necessary(src, dst)
|
164
|
-
ln = false
|
165
|
-
if !File.symlink?(dst)
|
166
|
-
ln = true
|
167
|
-
elsif File.readlink(dst) != src
|
168
|
-
rm(dst)
|
169
|
-
ln = true
|
170
|
-
end
|
171
|
-
if ln
|
172
|
-
ln(src, dst)
|
173
|
-
true
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
# Touch a file
|
178
|
-
def touch(file)
|
179
|
-
FileUtils.touch(file)
|
180
|
-
end
|
181
|
-
|
182
|
-
# A nice printout in green.
|
183
|
-
def banner(s, color = GREEN)
|
184
|
-
s = "#{s} ".ljust(72, " ")
|
185
|
-
$stderr.write "#{color}[#{Time.new.strftime('%H:%M:%S')}] #{s}#{RESET}\n"
|
186
|
-
$stderr.flush
|
187
|
-
end
|
188
|
-
|
189
|
-
# Print a warning in yellow.
|
190
|
-
def warning(msg)
|
191
|
-
banner("Warning: #{msg}", YELLOW)
|
192
|
-
end
|
193
|
-
|
194
|
-
# Print a fatal error in red, then exit.
|
195
|
-
def fatal(msg)
|
196
|
-
banner(msg, RED)
|
197
|
-
exit(1)
|
198
|
-
end
|
199
|
-
|
200
|
-
# Generate some random text
|
201
|
-
def random_text(len)
|
202
|
-
chars = ("A".."Z").to_a + ("a".."z").to_a + ("0".."9").to_a
|
203
|
-
(1..len).map { chars[rand(chars.length - 1)] }.join("")
|
204
|
-
end
|
205
|
-
|
206
|
-
# Convert a string into something that could be a path segment
|
207
|
-
def pathify(s)
|
208
|
-
s = s.gsub(/^\//, "")
|
209
|
-
s = s.gsub("..", ",")
|
210
|
-
s = s.gsub(/[?\/&]/, ",")
|
211
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
212
|
-
hex = i.unpack("H2").first
|
213
|
-
"%#{hex}"
|
214
|
-
end
|
215
|
-
s = "_root_" if s.empty?
|
216
|
-
s = s.downcase
|
217
|
-
s
|
218
|
-
end
|
219
|
-
|
220
|
-
# checksum some text
|
221
|
-
def md5(s)
|
222
|
-
Digest::MD5.hexdigest(s.to_s)
|
223
|
-
end
|
224
|
-
|
225
|
-
private
|
226
|
-
|
227
|
-
# Returns true if verbosity is turned on.
|
228
|
-
def verbose?
|
229
|
-
@run_verbose ||= nil
|
230
|
-
end
|
231
|
-
|
232
|
-
def vputs(s)
|
233
|
-
$stderr.puts s if verbose?
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|
data/test/helper.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
require "active_support/core_ext"
|
2
|
-
require "test/unit"
|
3
|
-
require "sinew"
|
4
|
-
|
5
|
-
module Sinew
|
6
|
-
class TestCase < Test::Unit::TestCase
|
7
|
-
TMP = "/tmp/_test_sinew"
|
8
|
-
HTML_FILE = File.expand_path("#{File.dirname(__FILE__)}/test.html")
|
9
|
-
HTML = File.read(HTML_FILE)
|
10
|
-
|
11
|
-
#
|
12
|
-
# for mocking curl
|
13
|
-
#
|
14
|
-
|
15
|
-
def mock_curl_200
|
16
|
-
Proc.new do |cmd, args|
|
17
|
-
mock_curl(args, HTML, "HTTP/1.1 200 OK")
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def mock_curl_302
|
22
|
-
Proc.new do |cmd, args|
|
23
|
-
mock_curl(args, "", "HTTP/1.1 302 Moved Temporarily\r\nLocation: http://www.gub.com")
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def mock_curl_500
|
28
|
-
Proc.new do |cmd, args|
|
29
|
-
raise Util::RunError, "curl error"
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def mock_curl(args, body, head)
|
34
|
-
File.write(args[args.index("--output") + 1], body)
|
35
|
-
File.write(args[args.index("--dump-header") + 1], "#{head}\r\n\r\n")
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
#
|
41
|
-
# from MiniTest, but not in the gem yet
|
42
|
-
#
|
43
|
-
|
44
|
-
class Object
|
45
|
-
def stub name, val_or_callable, &block
|
46
|
-
new_name = "__minitest_stub__#{name}"
|
47
|
-
|
48
|
-
metaclass = class << self; self; end
|
49
|
-
metaclass.send :alias_method, new_name, name
|
50
|
-
metaclass.send :define_method, name do |*args|
|
51
|
-
if val_or_callable.respond_to? :call then
|
52
|
-
val_or_callable.call(*args)
|
53
|
-
else
|
54
|
-
val_or_callable
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
yield
|
59
|
-
ensure
|
60
|
-
metaclass.send :undef_method, name
|
61
|
-
metaclass.send :alias_method, name, new_name
|
62
|
-
metaclass.send :undef_method, new_name
|
63
|
-
end
|
64
|
-
end
|
data/test/test_curler.rb
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
module Sinew
|
4
|
-
class TestCurler < TestCase
|
5
|
-
def setup
|
6
|
-
# create TMP dir
|
7
|
-
FileUtils.rm_rf(TMP) if File.exists?(TMP)
|
8
|
-
FileUtils.mkdir_p(TMP)
|
9
|
-
|
10
|
-
# curler, pointed at TMP
|
11
|
-
@curler = Curler.new(dir: TMP, verbose: false)
|
12
|
-
end
|
13
|
-
|
14
|
-
#
|
15
|
-
# tests
|
16
|
-
#
|
17
|
-
|
18
|
-
def test_200
|
19
|
-
Util.stub(:run, mock_curl_200) do
|
20
|
-
path = @curler.get("http://www.example.com")
|
21
|
-
assert_equal(HTML, File.read(path))
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_500
|
26
|
-
assert_raises(Curler::Error) do
|
27
|
-
Util.stub(:run, mock_curl_500) do
|
28
|
-
@curler.get("http://www.example.com")
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_cached
|
34
|
-
Util.stub(:run, mock_curl_200) do
|
35
|
-
assert_equal(HTML, File.read(@curler.get("http://www.example.com")))
|
36
|
-
end
|
37
|
-
# the file is cached, so this shouldn't produce an error
|
38
|
-
Util.stub(:run, mock_curl_500) do
|
39
|
-
@curler.get("http://www.example.com")
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_302
|
44
|
-
Util.stub(:run, mock_curl_302) do
|
45
|
-
@curler.get("http://www.example.com")
|
46
|
-
assert_equal("http://www.gub.com", @curler.url)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_rate_limit
|
51
|
-
slept = false
|
52
|
-
|
53
|
-
# change Kernel#sleep to not really sleep!
|
54
|
-
Kernel.send(:alias_method, :old_sleep, :sleep)
|
55
|
-
Kernel.send(:define_method, :sleep) do |x|
|
56
|
-
slept = true
|
57
|
-
end
|
58
|
-
|
59
|
-
Util.stub(:run, mock_curl_200) do
|
60
|
-
@curler.get("http://www.example.com/1")
|
61
|
-
@curler.get("http://www.example.com/2")
|
62
|
-
end
|
63
|
-
assert(slept)
|
64
|
-
|
65
|
-
# restore old Kernel#sleep
|
66
|
-
Kernel.send(:alias_method, :sleep, :old_sleep)
|
67
|
-
Kernel.send(:undef_method, :old_sleep)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|