sinew 1.0.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +49 -0
- data/.travis.yml +4 -0
- data/.vscode/extensions.json +3 -0
- data/.vscode/settings.json +15 -0
- data/Gemfile +1 -1
- data/README.md +153 -12
- data/Rakefile +13 -14
- data/bin/sinew +40 -20
- data/lib/sinew.rb +10 -6
- data/lib/sinew/cache.rb +79 -0
- data/lib/sinew/core_ext.rb +59 -0
- data/lib/sinew/dsl.rb +98 -0
- data/lib/sinew/main.rb +80 -149
- data/lib/sinew/nokogiri_ext.rb +10 -9
- data/lib/sinew/output.rb +126 -0
- data/lib/sinew/request.rb +148 -0
- data/lib/sinew/response.rb +75 -0
- data/lib/sinew/runtime_options.rb +26 -0
- data/lib/sinew/version.rb +1 -1
- data/sample.sinew +5 -3
- data/sinew.gemspec +24 -19
- data/test/test.html +40 -34
- data/test/test_cache.rb +69 -0
- data/test/test_helper.rb +113 -0
- data/test/test_main.rb +36 -91
- data/test/test_nokogiri_ext.rb +14 -15
- data/test/test_output.rb +73 -0
- data/test/test_requests.rb +135 -0
- data/test/test_utf8.rb +39 -0
- metadata +103 -48
- data/lib/sinew/curler.rb +0 -173
- data/lib/sinew/text_util.rb +0 -101
- data/lib/sinew/util.rb +0 -236
- data/test/helper.rb +0 -64
- data/test/test_curler.rb +0 -70
- data/test/test_text_util.rb +0 -23
data/lib/sinew/text_util.rb
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
require "active_support/core_ext"
|
2
|
-
require "set"
|
3
|
-
|
4
|
-
module Sinew
|
5
|
-
module TextUtil
|
6
|
-
extend self
|
7
|
-
|
8
|
-
ATTRS_KEEP = Set.new %w(a img iframe)
|
9
|
-
TIDY_OPTIONS = {
|
10
|
-
"-asxml" => nil,
|
11
|
-
"-bare" => nil,
|
12
|
-
"-quiet" => nil,
|
13
|
-
"-utf8" => nil,
|
14
|
-
"-wrap" => 0,
|
15
|
-
"--doctype" => "omit",
|
16
|
-
"--hide-comments" => "yes",
|
17
|
-
"--force-output" => "yes",
|
18
|
-
"-f" => "/dev/null",
|
19
|
-
}
|
20
|
-
|
21
|
-
XML_ENTITIES = { "&"=>"&", "<"=>"<", ">"=>">", "'"=>"'", '"'=>""" }
|
22
|
-
XML_ENTITIES_INV = XML_ENTITIES.invert
|
23
|
-
COMMON_ENTITIES_INV = XML_ENTITIES_INV.merge(
|
24
|
-
"½" => "1/2",
|
25
|
-
"¼" => "1/4",
|
26
|
-
"¾" => "3/4",
|
27
|
-
"“" => '"',
|
28
|
-
"‘" => "'",
|
29
|
-
"—" => "-",
|
30
|
-
" " => " ",
|
31
|
-
"–" => "-",
|
32
|
-
"”" => '"',
|
33
|
-
"’" => "'",
|
34
|
-
"˜" => "~",
|
35
|
-
""" => '"',
|
36
|
-
"'" => "'",
|
37
|
-
" " => " ",
|
38
|
-
"
" => "\n"
|
39
|
-
)
|
40
|
-
|
41
|
-
#
|
42
|
-
# tidy/clean
|
43
|
-
#
|
44
|
-
|
45
|
-
def html_tidy(s)
|
46
|
-
# run tidy
|
47
|
-
args = TIDY_OPTIONS.map { |k, v| "#{k} #{v}" }.join(" ")
|
48
|
-
s = IO.popen("tidy #{args}", "rb+") do |f|
|
49
|
-
f.write(s)
|
50
|
-
f.close_write
|
51
|
-
f.read
|
52
|
-
end
|
53
|
-
raise "could not run tidy" if ($? >> 8) > 2
|
54
|
-
|
55
|
-
# now kill some tags
|
56
|
-
s.sub!(/<html\b[^>]+>/, "<html>")
|
57
|
-
s.gsub!(/<\/?(meta|link)\b[^>]*>/m, "")
|
58
|
-
s.gsub!(/<(style|script)\b[^>]*(\/>|>.*?<\/\1\b>)/m, "")
|
59
|
-
s.gsub!(/<\?[^>]*>/m, "")
|
60
|
-
s.squish!
|
61
|
-
|
62
|
-
# kill whitespace around tags
|
63
|
-
s.gsub!(/ ?<([^>]+)> ?/, "<\\1>")
|
64
|
-
|
65
|
-
s
|
66
|
-
end
|
67
|
-
|
68
|
-
def html_clean(s)
|
69
|
-
html_clean_from_tidy(html_tidy(s))
|
70
|
-
end
|
71
|
-
|
72
|
-
def html_clean_from_tidy(s)
|
73
|
-
# then kill most attrs
|
74
|
-
s = s.dup
|
75
|
-
s.gsub!(/<([^\s>]+)[^>]*?(\/)?>/) do |i|
|
76
|
-
ATTRS_KEEP.include?($1) ? i : "<#{$1}#{$2}>"
|
77
|
-
end
|
78
|
-
s
|
79
|
-
end
|
80
|
-
|
81
|
-
#
|
82
|
-
# untag/unent
|
83
|
-
#
|
84
|
-
|
85
|
-
def xml_escape(s)
|
86
|
-
s.gsub(/[&<>'"]/) { |i| XML_ENTITIES[i] }
|
87
|
-
end
|
88
|
-
|
89
|
-
def xml_unescape(s)
|
90
|
-
s.gsub(/&(amp|lt|gt|apos|quot);/) { |i| XML_ENTITIES_INV[i] }
|
91
|
-
end
|
92
|
-
|
93
|
-
def untag(s)
|
94
|
-
s.gsub(/<[^>]+>/, " ")
|
95
|
-
end
|
96
|
-
|
97
|
-
def unent(s)
|
98
|
-
s.gsub(/&#?[a-z0-9]{2,};/) { |i| COMMON_ENTITIES_INV[i] }
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
data/lib/sinew/util.rb
DELETED
@@ -1,236 +0,0 @@
|
|
1
|
-
require "digest/md5"
|
2
|
-
require "etc"
|
3
|
-
require "fileutils"
|
4
|
-
|
5
|
-
module Sinew
|
6
|
-
# Helper module for executing commands and printing stuff
|
7
|
-
# out.
|
8
|
-
#
|
9
|
-
# The general idea is to only print commands that are actually
|
10
|
-
# interesting. For example, mkdir_if_necessary won't print anything
|
11
|
-
# if the directory already exists. That way we can scan output and
|
12
|
-
# see what changes were made without getting lost in repetitive
|
13
|
-
# commands that had no actual effect.
|
14
|
-
module Util
|
15
|
-
class RunError < StandardError ; end
|
16
|
-
|
17
|
-
extend self
|
18
|
-
|
19
|
-
RESET = "\e[0m"
|
20
|
-
RED = "\e[1;37;41m"
|
21
|
-
GREEN = "\e[1;37;42m"
|
22
|
-
YELLOW = "\e[1;37;43m"
|
23
|
-
BLUE = "\e[1;37;44m"
|
24
|
-
MAGENTA = "\e[1;37;45m"
|
25
|
-
CYAN = "\e[1;37;46m"
|
26
|
-
|
27
|
-
#
|
28
|
-
# running commands
|
29
|
-
#
|
30
|
-
|
31
|
-
# Make all commands echo before running.
|
32
|
-
def run_verbose!
|
33
|
-
@run_verbose = true
|
34
|
-
end
|
35
|
-
|
36
|
-
# Run a command, raise an error upon failure. Output goes to
|
37
|
-
# $stdout/$stderr.
|
38
|
-
def run(command, args = nil)
|
39
|
-
line = nil
|
40
|
-
if args
|
41
|
-
args = args.map(&:to_s)
|
42
|
-
line = "#{command} #{args.join(" ")}"
|
43
|
-
vputs line
|
44
|
-
system(command, *args)
|
45
|
-
else
|
46
|
-
line = command
|
47
|
-
vputs line
|
48
|
-
system(command)
|
49
|
-
end
|
50
|
-
if $? != 0
|
51
|
-
if $?.termsig == Signal.list["INT"]
|
52
|
-
raise "#{line} interrupted"
|
53
|
-
end
|
54
|
-
raise RunError, "#{line} failed : #{$?.to_i / 256}"
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
# Like mkdir -p. Optionally, set the owner and mode.
|
59
|
-
def mkdir(dir, owner = nil, mode = nil)
|
60
|
-
FileUtils.mkdir_p(dir, :verbose => verbose?)
|
61
|
-
chmod(dir, mode) if mode
|
62
|
-
chown(dir, owner) if owner
|
63
|
-
end
|
64
|
-
|
65
|
-
# mkdir only if the directory doesn't already exist. Optionally,
|
66
|
-
# set the owner and mode.
|
67
|
-
def mkdir_if_necessary(dir, owner = nil, mode = nil)
|
68
|
-
mkdir(dir, owner, mode) if !(File.exists?(dir) || File.symlink?(dir))
|
69
|
-
end
|
70
|
-
|
71
|
-
# rm a dir and recreate it.
|
72
|
-
def rm_and_mkdir(dir)
|
73
|
-
raise "don't do this" if dir == ""
|
74
|
-
run "rm -rf #{dir} && mkdir -p #{dir}"
|
75
|
-
end
|
76
|
-
|
77
|
-
# Are two files different?
|
78
|
-
def different?(a, b)
|
79
|
-
!FileUtils.compare_file(a, b)
|
80
|
-
end
|
81
|
-
|
82
|
-
# Copy file or dir from src to dst. Optionally, set the mode and
|
83
|
-
# owner of dst.
|
84
|
-
def cp(src, dst, owner = nil, mode = nil)
|
85
|
-
FileUtils.cp_r(src, dst, :preserve => true, :verbose => verbose?)
|
86
|
-
if owner && !File.symlink?(dst)
|
87
|
-
chown(dst, owner)
|
88
|
-
end
|
89
|
-
if mode
|
90
|
-
chmod(dst, mode)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
# Copy file or dir from src to dst, but create the dst directory
|
95
|
-
# first if necessary. Optionally, set the mode and owner of dst.
|
96
|
-
def cp_with_mkdir(src, dst, owner = nil, mode = nil)
|
97
|
-
mkdir_if_necessary(File.dirname(dst))
|
98
|
-
cp(src, dst, owner, mode)
|
99
|
-
end
|
100
|
-
|
101
|
-
# Copy file or dir from src to dst, but ONLY if dst doesn't exist
|
102
|
-
# or has different contents than src. Optionally, set the mode and
|
103
|
-
# owner of dst.
|
104
|
-
def cp_if_necessary(src, dst, owner = nil, mode = nil)
|
105
|
-
if !File.exists?(dst) || different?(src, dst)
|
106
|
-
cp(src, dst, owner, mode)
|
107
|
-
true
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
# Move src to dst. Because this uses FileUtils, it works even if
|
112
|
-
# dst is on a different partition.
|
113
|
-
def mv(src, dst)
|
114
|
-
FileUtils.mv(src, dst, :verbose => verbose?)
|
115
|
-
end
|
116
|
-
|
117
|
-
# Move src to dst, but create the dst directory first if
|
118
|
-
# necessary.
|
119
|
-
def mv_with_mkdir(src, dst)
|
120
|
-
mkdir_if_necessary(File.dirname(dst))
|
121
|
-
mv(src, dst)
|
122
|
-
end
|
123
|
-
|
124
|
-
# Chown file to be owned by user.
|
125
|
-
def chown(file, user)
|
126
|
-
user = user.to_s
|
127
|
-
# who is the current owner?
|
128
|
-
@uids ||= {}
|
129
|
-
@uids[user] ||= Etc.getpwnam(user).uid
|
130
|
-
uid = @uids[user]
|
131
|
-
if File.stat(file).uid != uid
|
132
|
-
run "chown #{user}:#{user} '#{file}'"
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
# Chmod file to a new mode.
|
137
|
-
def chmod(file, mode)
|
138
|
-
if File.stat(file).mode != mode
|
139
|
-
FileUtils.chmod(mode, file, :verbose => verbose?)
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
# rm a file
|
144
|
-
def rm(file)
|
145
|
-
FileUtils.rm(file, :force => true, :verbose => verbose?)
|
146
|
-
end
|
147
|
-
|
148
|
-
# rm a file, but only if it exists.
|
149
|
-
def rm_if_necessary(file)
|
150
|
-
if File.exists?(file)
|
151
|
-
rm(file)
|
152
|
-
true
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
# Create a symlink from src to dst.
|
157
|
-
def ln(src, dst)
|
158
|
-
FileUtils.ln_sf(src, dst, :verbose => verbose?)
|
159
|
-
end
|
160
|
-
|
161
|
-
# Create a symlink from src to dst, but only if it hasn't already
|
162
|
-
# been created.
|
163
|
-
def ln_if_necessary(src, dst)
|
164
|
-
ln = false
|
165
|
-
if !File.symlink?(dst)
|
166
|
-
ln = true
|
167
|
-
elsif File.readlink(dst) != src
|
168
|
-
rm(dst)
|
169
|
-
ln = true
|
170
|
-
end
|
171
|
-
if ln
|
172
|
-
ln(src, dst)
|
173
|
-
true
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
# Touch a file
|
178
|
-
def touch(file)
|
179
|
-
FileUtils.touch(file)
|
180
|
-
end
|
181
|
-
|
182
|
-
# A nice printout in green.
|
183
|
-
def banner(s, color = GREEN)
|
184
|
-
s = "#{s} ".ljust(72, " ")
|
185
|
-
$stderr.write "#{color}[#{Time.new.strftime('%H:%M:%S')}] #{s}#{RESET}\n"
|
186
|
-
$stderr.flush
|
187
|
-
end
|
188
|
-
|
189
|
-
# Print a warning in yellow.
|
190
|
-
def warning(msg)
|
191
|
-
banner("Warning: #{msg}", YELLOW)
|
192
|
-
end
|
193
|
-
|
194
|
-
# Print a fatal error in red, then exit.
|
195
|
-
def fatal(msg)
|
196
|
-
banner(msg, RED)
|
197
|
-
exit(1)
|
198
|
-
end
|
199
|
-
|
200
|
-
# Generate some random text
|
201
|
-
def random_text(len)
|
202
|
-
chars = ("A".."Z").to_a + ("a".."z").to_a + ("0".."9").to_a
|
203
|
-
(1..len).map { chars[rand(chars.length - 1)] }.join("")
|
204
|
-
end
|
205
|
-
|
206
|
-
# Convert a string into something that could be a path segment
|
207
|
-
def pathify(s)
|
208
|
-
s = s.gsub(/^\//, "")
|
209
|
-
s = s.gsub("..", ",")
|
210
|
-
s = s.gsub(/[?\/&]/, ",")
|
211
|
-
s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
|
212
|
-
hex = i.unpack("H2").first
|
213
|
-
"%#{hex}"
|
214
|
-
end
|
215
|
-
s = "_root_" if s.empty?
|
216
|
-
s = s.downcase
|
217
|
-
s
|
218
|
-
end
|
219
|
-
|
220
|
-
# checksum some text
|
221
|
-
def md5(s)
|
222
|
-
Digest::MD5.hexdigest(s.to_s)
|
223
|
-
end
|
224
|
-
|
225
|
-
private
|
226
|
-
|
227
|
-
# Returns true if verbosity is turned on.
|
228
|
-
def verbose?
|
229
|
-
@run_verbose ||= nil
|
230
|
-
end
|
231
|
-
|
232
|
-
def vputs(s)
|
233
|
-
$stderr.puts s if verbose?
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|
data/test/helper.rb
DELETED
@@ -1,64 +0,0 @@
|
|
1
|
-
require "active_support/core_ext"
|
2
|
-
require "test/unit"
|
3
|
-
require "sinew"
|
4
|
-
|
5
|
-
module Sinew
|
6
|
-
class TestCase < Test::Unit::TestCase
|
7
|
-
TMP = "/tmp/_test_sinew"
|
8
|
-
HTML_FILE = File.expand_path("#{File.dirname(__FILE__)}/test.html")
|
9
|
-
HTML = File.read(HTML_FILE)
|
10
|
-
|
11
|
-
#
|
12
|
-
# for mocking curl
|
13
|
-
#
|
14
|
-
|
15
|
-
def mock_curl_200
|
16
|
-
Proc.new do |cmd, args|
|
17
|
-
mock_curl(args, HTML, "HTTP/1.1 200 OK")
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def mock_curl_302
|
22
|
-
Proc.new do |cmd, args|
|
23
|
-
mock_curl(args, "", "HTTP/1.1 302 Moved Temporarily\r\nLocation: http://www.gub.com")
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def mock_curl_500
|
28
|
-
Proc.new do |cmd, args|
|
29
|
-
raise Util::RunError, "curl error"
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def mock_curl(args, body, head)
|
34
|
-
File.write(args[args.index("--output") + 1], body)
|
35
|
-
File.write(args[args.index("--dump-header") + 1], "#{head}\r\n\r\n")
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
#
|
41
|
-
# from MiniTest, but not in the gem yet
|
42
|
-
#
|
43
|
-
|
44
|
-
class Object
|
45
|
-
def stub name, val_or_callable, &block
|
46
|
-
new_name = "__minitest_stub__#{name}"
|
47
|
-
|
48
|
-
metaclass = class << self; self; end
|
49
|
-
metaclass.send :alias_method, new_name, name
|
50
|
-
metaclass.send :define_method, name do |*args|
|
51
|
-
if val_or_callable.respond_to? :call then
|
52
|
-
val_or_callable.call(*args)
|
53
|
-
else
|
54
|
-
val_or_callable
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
yield
|
59
|
-
ensure
|
60
|
-
metaclass.send :undef_method, name
|
61
|
-
metaclass.send :alias_method, name, new_name
|
62
|
-
metaclass.send :undef_method, new_name
|
63
|
-
end
|
64
|
-
end
|
data/test/test_curler.rb
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
module Sinew
|
4
|
-
class TestCurler < TestCase
|
5
|
-
def setup
|
6
|
-
# create TMP dir
|
7
|
-
FileUtils.rm_rf(TMP) if File.exists?(TMP)
|
8
|
-
FileUtils.mkdir_p(TMP)
|
9
|
-
|
10
|
-
# curler, pointed at TMP
|
11
|
-
@curler = Curler.new(dir: TMP, verbose: false)
|
12
|
-
end
|
13
|
-
|
14
|
-
#
|
15
|
-
# tests
|
16
|
-
#
|
17
|
-
|
18
|
-
def test_200
|
19
|
-
Util.stub(:run, mock_curl_200) do
|
20
|
-
path = @curler.get("http://www.example.com")
|
21
|
-
assert_equal(HTML, File.read(path))
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_500
|
26
|
-
assert_raises(Curler::Error) do
|
27
|
-
Util.stub(:run, mock_curl_500) do
|
28
|
-
@curler.get("http://www.example.com")
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def test_cached
|
34
|
-
Util.stub(:run, mock_curl_200) do
|
35
|
-
assert_equal(HTML, File.read(@curler.get("http://www.example.com")))
|
36
|
-
end
|
37
|
-
# the file is cached, so this shouldn't produce an error
|
38
|
-
Util.stub(:run, mock_curl_500) do
|
39
|
-
@curler.get("http://www.example.com")
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_302
|
44
|
-
Util.stub(:run, mock_curl_302) do
|
45
|
-
@curler.get("http://www.example.com")
|
46
|
-
assert_equal("http://www.gub.com", @curler.url)
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_rate_limit
|
51
|
-
slept = false
|
52
|
-
|
53
|
-
# change Kernel#sleep to not really sleep!
|
54
|
-
Kernel.send(:alias_method, :old_sleep, :sleep)
|
55
|
-
Kernel.send(:define_method, :sleep) do |x|
|
56
|
-
slept = true
|
57
|
-
end
|
58
|
-
|
59
|
-
Util.stub(:run, mock_curl_200) do
|
60
|
-
@curler.get("http://www.example.com/1")
|
61
|
-
@curler.get("http://www.example.com/2")
|
62
|
-
end
|
63
|
-
assert(slept)
|
64
|
-
|
65
|
-
# restore old Kernel#sleep
|
66
|
-
Kernel.send(:alias_method, :sleep, :old_sleep)
|
67
|
-
Kernel.send(:undef_method, :old_sleep)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|