sinew 1.0.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,101 +0,0 @@
1
- require "active_support/core_ext"
2
- require "set"
3
-
4
- module Sinew
5
- module TextUtil
6
- extend self
7
-
8
- ATTRS_KEEP = Set.new %w(a img iframe)
9
- TIDY_OPTIONS = {
10
- "-asxml" => nil,
11
- "-bare" => nil,
12
- "-quiet" => nil,
13
- "-utf8" => nil,
14
- "-wrap" => 0,
15
- "--doctype" => "omit",
16
- "--hide-comments" => "yes",
17
- "--force-output" => "yes",
18
- "-f" => "/dev/null",
19
- }
20
-
21
- XML_ENTITIES = { "&"=>"&amp;", "<"=>"&lt;", ">"=>"&gt;", "'"=>"&apos;", '"'=>"&quot;" }
22
- XML_ENTITIES_INV = XML_ENTITIES.invert
23
- COMMON_ENTITIES_INV = XML_ENTITIES_INV.merge(
24
- "&frac12;" => "1/2",
25
- "&frac14;" => "1/4",
26
- "&frac34;" => "3/4",
27
- "&ldquo;" => '"',
28
- "&lsquo;" => "'",
29
- "&mdash;" => "-",
30
- "&nbsp;" => " ",
31
- "&ndash;" => "-",
32
- "&rdquo;" => '"',
33
- "&rsquo;" => "'",
34
- "&tilde;" => "~",
35
- "&#34;" => '"',
36
- "&#39;" => "'",
37
- "&#160;" => " ",
38
- "&#8232;" => "\n"
39
- )
40
-
41
- #
42
- # tidy/clean
43
- #
44
-
45
- def html_tidy(s)
46
- # run tidy
47
- args = TIDY_OPTIONS.map { |k, v| "#{k} #{v}" }.join(" ")
48
- s = IO.popen("tidy #{args}", "rb+") do |f|
49
- f.write(s)
50
- f.close_write
51
- f.read
52
- end
53
- raise "could not run tidy" if ($? >> 8) > 2
54
-
55
- # now kill some tags
56
- s.sub!(/<html\b[^>]+>/, "<html>")
57
- s.gsub!(/<\/?(meta|link)\b[^>]*>/m, "")
58
- s.gsub!(/<(style|script)\b[^>]*(\/>|>.*?<\/\1\b>)/m, "")
59
- s.gsub!(/<\?[^>]*>/m, "")
60
- s.squish!
61
-
62
- # kill whitespace around tags
63
- s.gsub!(/ ?<([^>]+)> ?/, "<\\1>")
64
-
65
- s
66
- end
67
-
68
- def html_clean(s)
69
- html_clean_from_tidy(html_tidy(s))
70
- end
71
-
72
- def html_clean_from_tidy(s)
73
- # then kill most attrs
74
- s = s.dup
75
- s.gsub!(/<([^\s>]+)[^>]*?(\/)?>/) do |i|
76
- ATTRS_KEEP.include?($1) ? i : "<#{$1}#{$2}>"
77
- end
78
- s
79
- end
80
-
81
- #
82
- # untag/unent
83
- #
84
-
85
- def xml_escape(s)
86
- s.gsub(/[&<>'"]/) { |i| XML_ENTITIES[i] }
87
- end
88
-
89
- def xml_unescape(s)
90
- s.gsub(/&(amp|lt|gt|apos|quot);/) { |i| XML_ENTITIES_INV[i] }
91
- end
92
-
93
- def untag(s)
94
- s.gsub(/<[^>]+>/, " ")
95
- end
96
-
97
- def unent(s)
98
- s.gsub(/&#?[a-z0-9]{2,};/) { |i| COMMON_ENTITIES_INV[i] }
99
- end
100
- end
101
- end
@@ -1,236 +0,0 @@
1
- require "digest/md5"
2
- require "etc"
3
- require "fileutils"
4
-
5
- module Sinew
6
- # Helper module for executing commands and printing stuff
7
- # out.
8
- #
9
- # The general idea is to only print commands that are actually
10
- # interesting. For example, mkdir_if_necessary won't print anything
11
- # if the directory already exists. That way we can scan output and
12
- # see what changes were made without getting lost in repetitive
13
- # commands that had no actual effect.
14
- module Util
15
- class RunError < StandardError ; end
16
-
17
- extend self
18
-
19
- RESET = "\e[0m"
20
- RED = "\e[1;37;41m"
21
- GREEN = "\e[1;37;42m"
22
- YELLOW = "\e[1;37;43m"
23
- BLUE = "\e[1;37;44m"
24
- MAGENTA = "\e[1;37;45m"
25
- CYAN = "\e[1;37;46m"
26
-
27
- #
28
- # running commands
29
- #
30
-
31
- # Make all commands echo before running.
32
- def run_verbose!
33
- @run_verbose = true
34
- end
35
-
36
- # Run a command, raise an error upon failure. Output goes to
37
- # $stdout/$stderr.
38
- def run(command, args = nil)
39
- line = nil
40
- if args
41
- args = args.map(&:to_s)
42
- line = "#{command} #{args.join(" ")}"
43
- vputs line
44
- system(command, *args)
45
- else
46
- line = command
47
- vputs line
48
- system(command)
49
- end
50
- if $? != 0
51
- if $?.termsig == Signal.list["INT"]
52
- raise "#{line} interrupted"
53
- end
54
- raise RunError, "#{line} failed : #{$?.to_i / 256}"
55
- end
56
- end
57
-
58
- # Like mkdir -p. Optionally, set the owner and mode.
59
- def mkdir(dir, owner = nil, mode = nil)
60
- FileUtils.mkdir_p(dir, :verbose => verbose?)
61
- chmod(dir, mode) if mode
62
- chown(dir, owner) if owner
63
- end
64
-
65
- # mkdir only if the directory doesn't already exist. Optionally,
66
- # set the owner and mode.
67
- def mkdir_if_necessary(dir, owner = nil, mode = nil)
68
- mkdir(dir, owner, mode) if !(File.exists?(dir) || File.symlink?(dir))
69
- end
70
-
71
- # rm a dir and recreate it.
72
- def rm_and_mkdir(dir)
73
- raise "don't do this" if dir == ""
74
- run "rm -rf #{dir} && mkdir -p #{dir}"
75
- end
76
-
77
- # Are two files different?
78
- def different?(a, b)
79
- !FileUtils.compare_file(a, b)
80
- end
81
-
82
- # Copy file or dir from src to dst. Optionally, set the mode and
83
- # owner of dst.
84
- def cp(src, dst, owner = nil, mode = nil)
85
- FileUtils.cp_r(src, dst, :preserve => true, :verbose => verbose?)
86
- if owner && !File.symlink?(dst)
87
- chown(dst, owner)
88
- end
89
- if mode
90
- chmod(dst, mode)
91
- end
92
- end
93
-
94
- # Copy file or dir from src to dst, but create the dst directory
95
- # first if necessary. Optionally, set the mode and owner of dst.
96
- def cp_with_mkdir(src, dst, owner = nil, mode = nil)
97
- mkdir_if_necessary(File.dirname(dst))
98
- cp(src, dst, owner, mode)
99
- end
100
-
101
- # Copy file or dir from src to dst, but ONLY if dst doesn't exist
102
- # or has different contents than src. Optionally, set the mode and
103
- # owner of dst.
104
- def cp_if_necessary(src, dst, owner = nil, mode = nil)
105
- if !File.exists?(dst) || different?(src, dst)
106
- cp(src, dst, owner, mode)
107
- true
108
- end
109
- end
110
-
111
- # Move src to dst. Because this uses FileUtils, it works even if
112
- # dst is on a different partition.
113
- def mv(src, dst)
114
- FileUtils.mv(src, dst, :verbose => verbose?)
115
- end
116
-
117
- # Move src to dst, but create the dst directory first if
118
- # necessary.
119
- def mv_with_mkdir(src, dst)
120
- mkdir_if_necessary(File.dirname(dst))
121
- mv(src, dst)
122
- end
123
-
124
- # Chown file to be owned by user.
125
- def chown(file, user)
126
- user = user.to_s
127
- # who is the current owner?
128
- @uids ||= {}
129
- @uids[user] ||= Etc.getpwnam(user).uid
130
- uid = @uids[user]
131
- if File.stat(file).uid != uid
132
- run "chown #{user}:#{user} '#{file}'"
133
- end
134
- end
135
-
136
- # Chmod file to a new mode.
137
- def chmod(file, mode)
138
- if File.stat(file).mode != mode
139
- FileUtils.chmod(mode, file, :verbose => verbose?)
140
- end
141
- end
142
-
143
- # rm a file
144
- def rm(file)
145
- FileUtils.rm(file, :force => true, :verbose => verbose?)
146
- end
147
-
148
- # rm a file, but only if it exists.
149
- def rm_if_necessary(file)
150
- if File.exists?(file)
151
- rm(file)
152
- true
153
- end
154
- end
155
-
156
- # Create a symlink from src to dst.
157
- def ln(src, dst)
158
- FileUtils.ln_sf(src, dst, :verbose => verbose?)
159
- end
160
-
161
- # Create a symlink from src to dst, but only if it hasn't already
162
- # been created.
163
- def ln_if_necessary(src, dst)
164
- ln = false
165
- if !File.symlink?(dst)
166
- ln = true
167
- elsif File.readlink(dst) != src
168
- rm(dst)
169
- ln = true
170
- end
171
- if ln
172
- ln(src, dst)
173
- true
174
- end
175
- end
176
-
177
- # Touch a file
178
- def touch(file)
179
- FileUtils.touch(file)
180
- end
181
-
182
- # A nice printout in green.
183
- def banner(s, color = GREEN)
184
- s = "#{s} ".ljust(72, " ")
185
- $stderr.write "#{color}[#{Time.new.strftime('%H:%M:%S')}] #{s}#{RESET}\n"
186
- $stderr.flush
187
- end
188
-
189
- # Print a warning in yellow.
190
- def warning(msg)
191
- banner("Warning: #{msg}", YELLOW)
192
- end
193
-
194
- # Print a fatal error in red, then exit.
195
- def fatal(msg)
196
- banner(msg, RED)
197
- exit(1)
198
- end
199
-
200
- # Generate some random text
201
- def random_text(len)
202
- chars = ("A".."Z").to_a + ("a".."z").to_a + ("0".."9").to_a
203
- (1..len).map { chars[rand(chars.length - 1)] }.join("")
204
- end
205
-
206
- # Convert a string into something that could be a path segment
207
- def pathify(s)
208
- s = s.gsub(/^\//, "")
209
- s = s.gsub("..", ",")
210
- s = s.gsub(/[?\/&]/, ",")
211
- s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
212
- hex = i.unpack("H2").first
213
- "%#{hex}"
214
- end
215
- s = "_root_" if s.empty?
216
- s = s.downcase
217
- s
218
- end
219
-
220
- # checksum some text
221
- def md5(s)
222
- Digest::MD5.hexdigest(s.to_s)
223
- end
224
-
225
- private
226
-
227
- # Returns true if verbosity is turned on.
228
- def verbose?
229
- @run_verbose ||= nil
230
- end
231
-
232
- def vputs(s)
233
- $stderr.puts s if verbose?
234
- end
235
- end
236
- end
@@ -1,64 +0,0 @@
1
- require "active_support/core_ext"
2
- require "test/unit"
3
- require "sinew"
4
-
5
- module Sinew
6
- class TestCase < Test::Unit::TestCase
7
- TMP = "/tmp/_test_sinew"
8
- HTML_FILE = File.expand_path("#{File.dirname(__FILE__)}/test.html")
9
- HTML = File.read(HTML_FILE)
10
-
11
- #
12
- # for mocking curl
13
- #
14
-
15
- def mock_curl_200
16
- Proc.new do |cmd, args|
17
- mock_curl(args, HTML, "HTTP/1.1 200 OK")
18
- end
19
- end
20
-
21
- def mock_curl_302
22
- Proc.new do |cmd, args|
23
- mock_curl(args, "", "HTTP/1.1 302 Moved Temporarily\r\nLocation: http://www.gub.com")
24
- end
25
- end
26
-
27
- def mock_curl_500
28
- Proc.new do |cmd, args|
29
- raise Util::RunError, "curl error"
30
- end
31
- end
32
-
33
- def mock_curl(args, body, head)
34
- File.write(args[args.index("--output") + 1], body)
35
- File.write(args[args.index("--dump-header") + 1], "#{head}\r\n\r\n")
36
- end
37
- end
38
- end
39
-
40
- #
41
- # from MiniTest, but not in the gem yet
42
- #
43
-
44
- class Object
45
- def stub name, val_or_callable, &block
46
- new_name = "__minitest_stub__#{name}"
47
-
48
- metaclass = class << self; self; end
49
- metaclass.send :alias_method, new_name, name
50
- metaclass.send :define_method, name do |*args|
51
- if val_or_callable.respond_to? :call then
52
- val_or_callable.call(*args)
53
- else
54
- val_or_callable
55
- end
56
- end
57
-
58
- yield
59
- ensure
60
- metaclass.send :undef_method, name
61
- metaclass.send :alias_method, name, new_name
62
- metaclass.send :undef_method, new_name
63
- end
64
- end
@@ -1,70 +0,0 @@
1
- require "helper"
2
-
3
- module Sinew
4
- class TestCurler < TestCase
5
- def setup
6
- # create TMP dir
7
- FileUtils.rm_rf(TMP) if File.exists?(TMP)
8
- FileUtils.mkdir_p(TMP)
9
-
10
- # curler, pointed at TMP
11
- @curler = Curler.new(dir: TMP, verbose: false)
12
- end
13
-
14
- #
15
- # tests
16
- #
17
-
18
- def test_200
19
- Util.stub(:run, mock_curl_200) do
20
- path = @curler.get("http://www.example.com")
21
- assert_equal(HTML, File.read(path))
22
- end
23
- end
24
-
25
- def test_500
26
- assert_raises(Curler::Error) do
27
- Util.stub(:run, mock_curl_500) do
28
- @curler.get("http://www.example.com")
29
- end
30
- end
31
- end
32
-
33
- def test_cached
34
- Util.stub(:run, mock_curl_200) do
35
- assert_equal(HTML, File.read(@curler.get("http://www.example.com")))
36
- end
37
- # the file is cached, so this shouldn't produce an error
38
- Util.stub(:run, mock_curl_500) do
39
- @curler.get("http://www.example.com")
40
- end
41
- end
42
-
43
- def test_302
44
- Util.stub(:run, mock_curl_302) do
45
- @curler.get("http://www.example.com")
46
- assert_equal("http://www.gub.com", @curler.url)
47
- end
48
- end
49
-
50
- def test_rate_limit
51
- slept = false
52
-
53
- # change Kernel#sleep to not really sleep!
54
- Kernel.send(:alias_method, :old_sleep, :sleep)
55
- Kernel.send(:define_method, :sleep) do |x|
56
- slept = true
57
- end
58
-
59
- Util.stub(:run, mock_curl_200) do
60
- @curler.get("http://www.example.com/1")
61
- @curler.get("http://www.example.com/2")
62
- end
63
- assert(slept)
64
-
65
- # restore old Kernel#sleep
66
- Kernel.send(:alias_method, :sleep, :old_sleep)
67
- Kernel.send(:undef_method, :old_sleep)
68
- end
69
- end
70
- end