wunderbar 0.8.13 → 0.8.14

Sign up to get free protection for your applications and to get access to all the features.
data/tools/web2script.rb DELETED
@@ -1,217 +0,0 @@
1
- require 'net/http'
2
- require 'rubygems'
3
- require 'nokogiri'
4
- require 'optparse'
5
-
6
- # Convert a webpage to a Wunderbar script
7
-
8
- OptionParser.new { |opts|
9
- opts.banner = "#{File.basename(__FILE__)} [-o output] [-w width] URLs..."
10
- opts.on '-o', '--output FILE', 'Send Output to FILE' do |file|
11
- $stdout = File.open(file, 'w')
12
- end
13
- opts.on '-w', '--width WIDTH', Integer, 'Set line width' do |width|
14
- $width = width
15
- end
16
- opts.on '-g', '--group lines', Integer,
17
- 'Insert blanks lines around blocks larger than this value' do |group|
18
- $group = group
19
- end
20
- if ''.respond_to? 'encoding'
21
- opts.on '-a', '--ascii', Integer, 'Escape non-ASCII characters' do
22
- $ascii = true
23
- end
24
- end
25
- }.parse!
26
-
27
- # Method to "enquote" a string
28
- class String
29
- def enquote
30
- if $ascii
31
- inspect.gsub(/[^\x20-\x7f]/) { |c| '\u' + c.ord.to_s(16).rjust(4,'0') }
32
- else
33
- inspect
34
- end
35
- end
36
- end
37
-
38
- # queue of lines to be output
39
- $q = []
40
- def q line
41
- $q << line
42
- end
43
-
44
- def flow_text(line, join)
45
- while $width and line.length>$width
46
- line.sub! /(.{1,#{$width-4}})(\s+|\Z)/, "\\1 #{join}"
47
- break unless line.include? "\n"
48
- q line.split("\n").first
49
- line = line[/\n(.*)/,1]
50
- end
51
- q line
52
- end
53
-
54
- def flow_attrs(line, attributes, indent)
55
- attributes.each do |attribute|
56
- line += ','
57
- if $width and (line+attribute).length > $width-1
58
- q line
59
- line = "#{indent} "
60
- end
61
- line += attribute
62
- end
63
- q line
64
- end
65
-
66
- def code(element, indent='')
67
- # restore namespaces that Nokogiri::HTML dropped
68
- element_name = element.name
69
- if $namespaced[element.name]
70
- element_name = $namespaced[element.name]
71
- element_name += ',' unless element.attributes.empty?
72
- end
73
-
74
- attributes = []
75
- element.attributes.keys.each do |key|
76
- value = element[key]
77
-
78
- # resolve relative links
79
- if %w(a img link).include? element.name and %w(href src).include? key
80
- value = ($uri + value).to_s rescue nil
81
- end
82
-
83
- if key =~ /^\w+$/
84
- if key == 'id' and value =~ /^\w+$/
85
- element_name += ".#{value}!"
86
- elsif key == 'class' and value =~ /^\w+$/
87
- element_name += ".#{value}"
88
- elsif key == 'xmlns' and %w(html svg mathml).include? element.name
89
- # drop xmlns attributes from these elements
90
- elsif key == 'type' and element.name == 'style' and value == 'text/css'
91
- # drop type attributes from script elements
92
- elsif key == 'type' and element.name == 'script' and value == 'text/javascript'
93
- # drop type attributes from script elements
94
- elsif RUBY_VERSION =~ /^1\.8/
95
- attributes << " :#{key} => #{value.enquote}"
96
- else
97
- attributes << " #{key}: #{value.enquote}"
98
- end
99
- else
100
- attributes << " #{key.enquote} => #{value.enquote}"
101
- end
102
- end
103
-
104
- line = "#{indent}_#{element_name}#{attributes.join(',')}"
105
- line.sub! /^_/, 'W_.' if element_name == 'html' and indent.empty?
106
-
107
- if element.children.empty?
108
- flow_attrs "#{indent}_#{element_name}#{attributes.pop}", attributes, indent
109
-
110
- # element has children
111
- elsif element.children.any? {|child| child.element?}
112
- # do any of the text nodes need special processing to preserve spacing?
113
- flatten = false
114
- space = true
115
- if element.children.any? {|child| child.text? and !child.text.strip.empty?}
116
- element.children.each do |child|
117
- if child.text? or child.element?
118
- next if child.text == ''
119
- flatten = true if not space and not child.text =~ /\A\s/
120
- space = (child.text =~ /\s\Z/)
121
- end
122
- end
123
- end
124
- line.sub!(/(\w)( |\.|$)/, '\1!\2') if flatten
125
-
126
- q "#{line} do"
127
-
128
- start = $q.length
129
- blank = false
130
- first = true
131
-
132
- # recursively process children
133
- element.children.each do |child|
134
- if child.text?
135
- text = child.text.gsub(/\s+/, ' ')
136
- text = text.strip unless flatten
137
- next if text.empty?
138
- flow_text "#{indent} _ #{text.enquote}", "\" +\n #{indent}\""
139
- elsif child.comment?
140
- flow_text "#{indent} _.comment #{child.text.strip.enquote}",
141
- "\" +\n #{indent}\""
142
- else
143
- code(child, indent + ' ')
144
- end
145
-
146
- # insert a blank line if either this or the previous block was large
147
- if $group and start + $group < $q.length
148
- $q[start].sub! /^(\s+_\w+)([ .])/, '\1_\2'
149
- $q.insert(start,'') if not first
150
- blank = true
151
- else
152
- $q.insert(start,'') if blank
153
- blank = false
154
- end
155
- start = $q.length
156
- first = false
157
- end
158
- q indent + "end"
159
-
160
- elsif element.name == 'pre' and element.text.include? "\n"
161
- data = element.text.sub(/\A\n/,'').sub(/\s+\Z/,'')
162
-
163
- unindent = data.sub(/s+\Z/,'').scan(/^ *\S/).map(&:length).min || 1
164
- before = Regexp.new('^'.ljust(unindent))
165
- after = "#{indent} "
166
- data.gsub! before, after
167
-
168
- flow_attrs "#{indent}_pre <<-EOD.gsub(/^\\s{#{after.length}}/,'')",
169
- attributes, indent
170
- data.split("\n").each { |line| q line }
171
- q "#{indent}EOD"
172
-
173
- # element has text but no attributes or children
174
- elsif attributes.empty?
175
- if %w(script style).include? element.name and element.text.include? "\n"
176
- script = element.text.sub(/\A\n/,'').sub(/\s+\Z/,'')
177
-
178
- unindent = script.sub(/s+\Z/,'').scan(/^ *\S/).map(&:length).min || 1
179
- before = Regexp.new('^'.ljust(unindent))
180
- after = "#{indent} "
181
- script.gsub! before, after
182
-
183
- q "#{line} %{"
184
- script.split("\n").each { |line| q line }
185
- q "#{indent}}"
186
- else
187
- flow_text "#{line} #{element.text.enquote}", "\" +\n #{indent}\""
188
- end
189
-
190
- # element has text and attributes but no children
191
- else
192
- flow_attrs "#{indent}_#{element_name} #{element.text.enquote}",
193
- attributes, indent
194
- end
195
- end
196
-
197
- # fetch and convert each web page
198
- ARGV.each do |arg|
199
- $uri = URI.parse arg
200
- doc = Net::HTTP.get($uri)
201
- $namespaced = Hash[doc.scan(/<\/(\w+):(\w+)>/).uniq.
202
- map {|p,n| [n, "#{p} :#{n}"]}]
203
- $namespaced.delete_if {|name, value| doc =~ /<#{name}[ >]/}
204
- code Nokogiri::HTML(doc).root
205
- end
206
-
207
- # headers
208
- if ''.respond_to? 'encoding'
209
- puts '# encoding: utf-8' if $q.any? {|line| line.match /[^\x20-\x7f]/}
210
- else
211
- puts "require 'rubygems'"
212
- end
213
-
214
- puts "require 'wunderbar'\n\n"
215
-
216
- # main output
217
- puts $q.join("\n")