sinew 1.0.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,101 +0,0 @@
1
- require "active_support/core_ext"
2
- require "set"
3
-
4
- module Sinew
5
- module TextUtil
6
- extend self
7
-
8
- ATTRS_KEEP = Set.new %w(a img iframe)
9
- TIDY_OPTIONS = {
10
- "-asxml" => nil,
11
- "-bare" => nil,
12
- "-quiet" => nil,
13
- "-utf8" => nil,
14
- "-wrap" => 0,
15
- "--doctype" => "omit",
16
- "--hide-comments" => "yes",
17
- "--force-output" => "yes",
18
- "-f" => "/dev/null",
19
- }
20
-
21
- XML_ENTITIES = { "&"=>"&amp;", "<"=>"&lt;", ">"=>"&gt;", "'"=>"&apos;", '"'=>"&quot;" }
22
- XML_ENTITIES_INV = XML_ENTITIES.invert
23
- COMMON_ENTITIES_INV = XML_ENTITIES_INV.merge(
24
- "&frac12;" => "1/2",
25
- "&frac14;" => "1/4",
26
- "&frac34;" => "3/4",
27
- "&ldquo;" => '"',
28
- "&lsquo;" => "'",
29
- "&mdash;" => "-",
30
- "&nbsp;" => " ",
31
- "&ndash;" => "-",
32
- "&rdquo;" => '"',
33
- "&rsquo;" => "'",
34
- "&tilde;" => "~",
35
- "&#34;" => '"',
36
- "&#39;" => "'",
37
- "&#160;" => " ",
38
- "&#8232;" => "\n"
39
- )
40
-
41
- #
42
- # tidy/clean
43
- #
44
-
45
- def html_tidy(s)
46
- # run tidy
47
- args = TIDY_OPTIONS.map { |k, v| "#{k} #{v}" }.join(" ")
48
- s = IO.popen("tidy #{args}", "rb+") do |f|
49
- f.write(s)
50
- f.close_write
51
- f.read
52
- end
53
- raise "could not run tidy" if ($? >> 8) > 2
54
-
55
- # now kill some tags
56
- s.sub!(/<html\b[^>]+>/, "<html>")
57
- s.gsub!(/<\/?(meta|link)\b[^>]*>/m, "")
58
- s.gsub!(/<(style|script)\b[^>]*(\/>|>.*?<\/\1\b>)/m, "")
59
- s.gsub!(/<\?[^>]*>/m, "")
60
- s.squish!
61
-
62
- # kill whitespace around tags
63
- s.gsub!(/ ?<([^>]+)> ?/, "<\\1>")
64
-
65
- s
66
- end
67
-
68
- def html_clean(s)
69
- html_clean_from_tidy(html_tidy(s))
70
- end
71
-
72
- def html_clean_from_tidy(s)
73
- # then kill most attrs
74
- s = s.dup
75
- s.gsub!(/<([^\s>]+)[^>]*?(\/)?>/) do |i|
76
- ATTRS_KEEP.include?($1) ? i : "<#{$1}#{$2}>"
77
- end
78
- s
79
- end
80
-
81
- #
82
- # untag/unent
83
- #
84
-
85
- def xml_escape(s)
86
- s.gsub(/[&<>'"]/) { |i| XML_ENTITIES[i] }
87
- end
88
-
89
- def xml_unescape(s)
90
- s.gsub(/&(amp|lt|gt|apos|quot);/) { |i| XML_ENTITIES_INV[i] }
91
- end
92
-
93
- def untag(s)
94
- s.gsub(/<[^>]+>/, " ")
95
- end
96
-
97
- def unent(s)
98
- s.gsub(/&#?[a-z0-9]{2,};/) { |i| COMMON_ENTITIES_INV[i] }
99
- end
100
- end
101
- end
@@ -1,236 +0,0 @@
1
- require "digest/md5"
2
- require "etc"
3
- require "fileutils"
4
-
5
- module Sinew
6
- # Helper module for executing commands and printing stuff
7
- # out.
8
- #
9
- # The general idea is to only print commands that are actually
10
- # interesting. For example, mkdir_if_necessary won't print anything
11
- # if the directory already exists. That way we can scan output and
12
- # see what changes were made without getting lost in repetitive
13
- # commands that had no actual effect.
14
- module Util
15
- class RunError < StandardError ; end
16
-
17
- extend self
18
-
19
- RESET = "\e[0m"
20
- RED = "\e[1;37;41m"
21
- GREEN = "\e[1;37;42m"
22
- YELLOW = "\e[1;37;43m"
23
- BLUE = "\e[1;37;44m"
24
- MAGENTA = "\e[1;37;45m"
25
- CYAN = "\e[1;37;46m"
26
-
27
- #
28
- # running commands
29
- #
30
-
31
- # Make all commands echo before running.
32
- def run_verbose!
33
- @run_verbose = true
34
- end
35
-
36
- # Run a command, raise an error upon failure. Output goes to
37
- # $stdout/$stderr.
38
- def run(command, args = nil)
39
- line = nil
40
- if args
41
- args = args.map(&:to_s)
42
- line = "#{command} #{args.join(" ")}"
43
- vputs line
44
- system(command, *args)
45
- else
46
- line = command
47
- vputs line
48
- system(command)
49
- end
50
- if $? != 0
51
- if $?.termsig == Signal.list["INT"]
52
- raise "#{line} interrupted"
53
- end
54
- raise RunError, "#{line} failed : #{$?.to_i / 256}"
55
- end
56
- end
57
-
58
- # Like mkdir -p. Optionally, set the owner and mode.
59
- def mkdir(dir, owner = nil, mode = nil)
60
- FileUtils.mkdir_p(dir, :verbose => verbose?)
61
- chmod(dir, mode) if mode
62
- chown(dir, owner) if owner
63
- end
64
-
65
- # mkdir only if the directory doesn't already exist. Optionally,
66
- # set the owner and mode.
67
- def mkdir_if_necessary(dir, owner = nil, mode = nil)
68
- mkdir(dir, owner, mode) if !(File.exists?(dir) || File.symlink?(dir))
69
- end
70
-
71
- # rm a dir and recreate it.
72
- def rm_and_mkdir(dir)
73
- raise "don't do this" if dir == ""
74
- run "rm -rf #{dir} && mkdir -p #{dir}"
75
- end
76
-
77
- # Are two files different?
78
- def different?(a, b)
79
- !FileUtils.compare_file(a, b)
80
- end
81
-
82
- # Copy file or dir from src to dst. Optionally, set the mode and
83
- # owner of dst.
84
- def cp(src, dst, owner = nil, mode = nil)
85
- FileUtils.cp_r(src, dst, :preserve => true, :verbose => verbose?)
86
- if owner && !File.symlink?(dst)
87
- chown(dst, owner)
88
- end
89
- if mode
90
- chmod(dst, mode)
91
- end
92
- end
93
-
94
- # Copy file or dir from src to dst, but create the dst directory
95
- # first if necessary. Optionally, set the mode and owner of dst.
96
- def cp_with_mkdir(src, dst, owner = nil, mode = nil)
97
- mkdir_if_necessary(File.dirname(dst))
98
- cp(src, dst, owner, mode)
99
- end
100
-
101
- # Copy file or dir from src to dst, but ONLY if dst doesn't exist
102
- # or has different contents than src. Optionally, set the mode and
103
- # owner of dst.
104
- def cp_if_necessary(src, dst, owner = nil, mode = nil)
105
- if !File.exists?(dst) || different?(src, dst)
106
- cp(src, dst, owner, mode)
107
- true
108
- end
109
- end
110
-
111
- # Move src to dst. Because this uses FileUtils, it works even if
112
- # dst is on a different partition.
113
- def mv(src, dst)
114
- FileUtils.mv(src, dst, :verbose => verbose?)
115
- end
116
-
117
- # Move src to dst, but create the dst directory first if
118
- # necessary.
119
- def mv_with_mkdir(src, dst)
120
- mkdir_if_necessary(File.dirname(dst))
121
- mv(src, dst)
122
- end
123
-
124
- # Chown file to be owned by user.
125
- def chown(file, user)
126
- user = user.to_s
127
- # who is the current owner?
128
- @uids ||= {}
129
- @uids[user] ||= Etc.getpwnam(user).uid
130
- uid = @uids[user]
131
- if File.stat(file).uid != uid
132
- run "chown #{user}:#{user} '#{file}'"
133
- end
134
- end
135
-
136
- # Chmod file to a new mode.
137
- def chmod(file, mode)
138
- if File.stat(file).mode != mode
139
- FileUtils.chmod(mode, file, :verbose => verbose?)
140
- end
141
- end
142
-
143
- # rm a file
144
- def rm(file)
145
- FileUtils.rm(file, :force => true, :verbose => verbose?)
146
- end
147
-
148
- # rm a file, but only if it exists.
149
- def rm_if_necessary(file)
150
- if File.exists?(file)
151
- rm(file)
152
- true
153
- end
154
- end
155
-
156
- # Create a symlink from src to dst.
157
- def ln(src, dst)
158
- FileUtils.ln_sf(src, dst, :verbose => verbose?)
159
- end
160
-
161
- # Create a symlink from src to dst, but only if it hasn't already
162
- # been created.
163
- def ln_if_necessary(src, dst)
164
- ln = false
165
- if !File.symlink?(dst)
166
- ln = true
167
- elsif File.readlink(dst) != src
168
- rm(dst)
169
- ln = true
170
- end
171
- if ln
172
- ln(src, dst)
173
- true
174
- end
175
- end
176
-
177
- # Touch a file
178
- def touch(file)
179
- FileUtils.touch(file)
180
- end
181
-
182
- # A nice printout in green.
183
- def banner(s, color = GREEN)
184
- s = "#{s} ".ljust(72, " ")
185
- $stderr.write "#{color}[#{Time.new.strftime('%H:%M:%S')}] #{s}#{RESET}\n"
186
- $stderr.flush
187
- end
188
-
189
- # Print a warning in yellow.
190
- def warning(msg)
191
- banner("Warning: #{msg}", YELLOW)
192
- end
193
-
194
- # Print a fatal error in red, then exit.
195
- def fatal(msg)
196
- banner(msg, RED)
197
- exit(1)
198
- end
199
-
200
- # Generate some random text
201
- def random_text(len)
202
- chars = ("A".."Z").to_a + ("a".."z").to_a + ("0".."9").to_a
203
- (1..len).map { chars[rand(chars.length - 1)] }.join("")
204
- end
205
-
206
- # Convert a string into something that could be a path segment
207
- def pathify(s)
208
- s = s.gsub(/^\//, "")
209
- s = s.gsub("..", ",")
210
- s = s.gsub(/[?\/&]/, ",")
211
- s = s.gsub(/[^A-Za-z0-9_.,=-]/) do |i|
212
- hex = i.unpack("H2").first
213
- "%#{hex}"
214
- end
215
- s = "_root_" if s.empty?
216
- s = s.downcase
217
- s
218
- end
219
-
220
- # checksum some text
221
- def md5(s)
222
- Digest::MD5.hexdigest(s.to_s)
223
- end
224
-
225
- private
226
-
227
- # Returns true if verbosity is turned on.
228
- def verbose?
229
- @run_verbose ||= nil
230
- end
231
-
232
- def vputs(s)
233
- $stderr.puts s if verbose?
234
- end
235
- end
236
- end
@@ -1,64 +0,0 @@
1
- require "active_support/core_ext"
2
- require "test/unit"
3
- require "sinew"
4
-
5
- module Sinew
6
- class TestCase < Test::Unit::TestCase
7
- TMP = "/tmp/_test_sinew"
8
- HTML_FILE = File.expand_path("#{File.dirname(__FILE__)}/test.html")
9
- HTML = File.read(HTML_FILE)
10
-
11
- #
12
- # for mocking curl
13
- #
14
-
15
- def mock_curl_200
16
- Proc.new do |cmd, args|
17
- mock_curl(args, HTML, "HTTP/1.1 200 OK")
18
- end
19
- end
20
-
21
- def mock_curl_302
22
- Proc.new do |cmd, args|
23
- mock_curl(args, "", "HTTP/1.1 302 Moved Temporarily\r\nLocation: http://www.gub.com")
24
- end
25
- end
26
-
27
- def mock_curl_500
28
- Proc.new do |cmd, args|
29
- raise Util::RunError, "curl error"
30
- end
31
- end
32
-
33
- def mock_curl(args, body, head)
34
- File.write(args[args.index("--output") + 1], body)
35
- File.write(args[args.index("--dump-header") + 1], "#{head}\r\n\r\n")
36
- end
37
- end
38
- end
39
-
40
- #
41
- # from MiniTest, but not in the gem yet
42
- #
43
-
44
- class Object
45
- def stub name, val_or_callable, &block
46
- new_name = "__minitest_stub__#{name}"
47
-
48
- metaclass = class << self; self; end
49
- metaclass.send :alias_method, new_name, name
50
- metaclass.send :define_method, name do |*args|
51
- if val_or_callable.respond_to? :call then
52
- val_or_callable.call(*args)
53
- else
54
- val_or_callable
55
- end
56
- end
57
-
58
- yield
59
- ensure
60
- metaclass.send :undef_method, name
61
- metaclass.send :alias_method, name, new_name
62
- metaclass.send :undef_method, new_name
63
- end
64
- end
@@ -1,70 +0,0 @@
1
- require "helper"
2
-
3
- module Sinew
4
- class TestCurler < TestCase
5
- def setup
6
- # create TMP dir
7
- FileUtils.rm_rf(TMP) if File.exists?(TMP)
8
- FileUtils.mkdir_p(TMP)
9
-
10
- # curler, pointed at TMP
11
- @curler = Curler.new(dir: TMP, verbose: false)
12
- end
13
-
14
- #
15
- # tests
16
- #
17
-
18
- def test_200
19
- Util.stub(:run, mock_curl_200) do
20
- path = @curler.get("http://www.example.com")
21
- assert_equal(HTML, File.read(path))
22
- end
23
- end
24
-
25
- def test_500
26
- assert_raises(Curler::Error) do
27
- Util.stub(:run, mock_curl_500) do
28
- @curler.get("http://www.example.com")
29
- end
30
- end
31
- end
32
-
33
- def test_cached
34
- Util.stub(:run, mock_curl_200) do
35
- assert_equal(HTML, File.read(@curler.get("http://www.example.com")))
36
- end
37
- # the file is cached, so this shouldn't produce an error
38
- Util.stub(:run, mock_curl_500) do
39
- @curler.get("http://www.example.com")
40
- end
41
- end
42
-
43
- def test_302
44
- Util.stub(:run, mock_curl_302) do
45
- @curler.get("http://www.example.com")
46
- assert_equal("http://www.gub.com", @curler.url)
47
- end
48
- end
49
-
50
- def test_rate_limit
51
- slept = false
52
-
53
- # change Kernel#sleep to not really sleep!
54
- Kernel.send(:alias_method, :old_sleep, :sleep)
55
- Kernel.send(:define_method, :sleep) do |x|
56
- slept = true
57
- end
58
-
59
- Util.stub(:run, mock_curl_200) do
60
- @curler.get("http://www.example.com/1")
61
- @curler.get("http://www.example.com/2")
62
- end
63
- assert(slept)
64
-
65
- # restore old Kernel#sleep
66
- Kernel.send(:alias_method, :sleep, :old_sleep)
67
- Kernel.send(:undef_method, :old_sleep)
68
- end
69
- end
70
- end