wunderbar 0.8.13 → 0.8.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/tools/web2script.rb DELETED
@@ -1,217 +0,0 @@
1
- require 'net/http'
2
- require 'rubygems'
3
- require 'nokogiri'
4
- require 'optparse'
5
-
6
- # Convert a webpage to a Wunderbar script
7
-
8
- OptionParser.new { |opts|
9
- opts.banner = "#{File.basename(__FILE__)} [-o output] [-w width] URLs..."
10
- opts.on '-o', '--output FILE', 'Send Output to FILE' do |file|
11
- $stdout = File.open(file, 'w')
12
- end
13
- opts.on '-w', '--width WIDTH', Integer, 'Set line width' do |width|
14
- $width = width
15
- end
16
- opts.on '-g', '--group lines', Integer,
17
- 'Insert blanks lines around blocks larger than this value' do |group|
18
- $group = group
19
- end
20
- if ''.respond_to? 'encoding'
21
- opts.on '-a', '--ascii', Integer, 'Escape non-ASCII characters' do
22
- $ascii = true
23
- end
24
- end
25
- }.parse!
26
-
27
- # Method to "enquote" a string
28
- class String
29
- def enquote
30
- if $ascii
31
- inspect.gsub(/[^\x20-\x7f]/) { |c| '\u' + c.ord.to_s(16).rjust(4,'0') }
32
- else
33
- inspect
34
- end
35
- end
36
- end
37
-
38
- # queue of lines to be output
39
- $q = []
40
- def q line
41
- $q << line
42
- end
43
-
44
- def flow_text(line, join)
45
- while $width and line.length>$width
46
- line.sub! /(.{1,#{$width-4}})(\s+|\Z)/, "\\1 #{join}"
47
- break unless line.include? "\n"
48
- q line.split("\n").first
49
- line = line[/\n(.*)/,1]
50
- end
51
- q line
52
- end
53
-
54
- def flow_attrs(line, attributes, indent)
55
- attributes.each do |attribute|
56
- line += ','
57
- if $width and (line+attribute).length > $width-1
58
- q line
59
- line = "#{indent} "
60
- end
61
- line += attribute
62
- end
63
- q line
64
- end
65
-
66
- def code(element, indent='')
67
- # restore namespaces that Nokogiri::HTML dropped
68
- element_name = element.name
69
- if $namespaced[element.name]
70
- element_name = $namespaced[element.name]
71
- element_name += ',' unless element.attributes.empty?
72
- end
73
-
74
- attributes = []
75
- element.attributes.keys.each do |key|
76
- value = element[key]
77
-
78
- # resolve relative links
79
- if %w(a img link).include? element.name and %w(href src).include? key
80
- value = ($uri + value).to_s rescue nil
81
- end
82
-
83
- if key =~ /^\w+$/
84
- if key == 'id' and value =~ /^\w+$/
85
- element_name += ".#{value}!"
86
- elsif key == 'class' and value =~ /^\w+$/
87
- element_name += ".#{value}"
88
- elsif key == 'xmlns' and %w(html svg mathml).include? element.name
89
- # drop xmlns attributes from these elements
90
- elsif key == 'type' and element.name == 'style' and value == 'text/css'
91
- # drop type attributes from script elements
92
- elsif key == 'type' and element.name == 'script' and value == 'text/javascript'
93
- # drop type attributes from script elements
94
- elsif RUBY_VERSION =~ /^1\.8/
95
- attributes << " :#{key} => #{value.enquote}"
96
- else
97
- attributes << " #{key}: #{value.enquote}"
98
- end
99
- else
100
- attributes << " #{key.enquote} => #{value.enquote}"
101
- end
102
- end
103
-
104
- line = "#{indent}_#{element_name}#{attributes.join(',')}"
105
- line.sub! /^_/, 'W_.' if element_name == 'html' and indent.empty?
106
-
107
- if element.children.empty?
108
- flow_attrs "#{indent}_#{element_name}#{attributes.pop}", attributes, indent
109
-
110
- # element has children
111
- elsif element.children.any? {|child| child.element?}
112
- # do any of the text nodes need special processing to preserve spacing?
113
- flatten = false
114
- space = true
115
- if element.children.any? {|child| child.text? and !child.text.strip.empty?}
116
- element.children.each do |child|
117
- if child.text? or child.element?
118
- next if child.text == ''
119
- flatten = true if not space and not child.text =~ /\A\s/
120
- space = (child.text =~ /\s\Z/)
121
- end
122
- end
123
- end
124
- line.sub!(/(\w)( |\.|$)/, '\1!\2') if flatten
125
-
126
- q "#{line} do"
127
-
128
- start = $q.length
129
- blank = false
130
- first = true
131
-
132
- # recursively process children
133
- element.children.each do |child|
134
- if child.text?
135
- text = child.text.gsub(/\s+/, ' ')
136
- text = text.strip unless flatten
137
- next if text.empty?
138
- flow_text "#{indent} _ #{text.enquote}", "\" +\n #{indent}\""
139
- elsif child.comment?
140
- flow_text "#{indent} _.comment #{child.text.strip.enquote}",
141
- "\" +\n #{indent}\""
142
- else
143
- code(child, indent + ' ')
144
- end
145
-
146
- # insert a blank line if either this or the previous block was large
147
- if $group and start + $group < $q.length
148
- $q[start].sub! /^(\s+_\w+)([ .])/, '\1_\2'
149
- $q.insert(start,'') if not first
150
- blank = true
151
- else
152
- $q.insert(start,'') if blank
153
- blank = false
154
- end
155
- start = $q.length
156
- first = false
157
- end
158
- q indent + "end"
159
-
160
- elsif element.name == 'pre' and element.text.include? "\n"
161
- data = element.text.sub(/\A\n/,'').sub(/\s+\Z/,'')
162
-
163
- unindent = data.sub(/s+\Z/,'').scan(/^ *\S/).map(&:length).min || 1
164
- before = Regexp.new('^'.ljust(unindent))
165
- after = "#{indent} "
166
- data.gsub! before, after
167
-
168
- flow_attrs "#{indent}_pre <<-EOD.gsub(/^\\s{#{after.length}}/,'')",
169
- attributes, indent
170
- data.split("\n").each { |line| q line }
171
- q "#{indent}EOD"
172
-
173
- # element has text but no attributes or children
174
- elsif attributes.empty?
175
- if %w(script style).include? element.name and element.text.include? "\n"
176
- script = element.text.sub(/\A\n/,'').sub(/\s+\Z/,'')
177
-
178
- unindent = script.sub(/s+\Z/,'').scan(/^ *\S/).map(&:length).min || 1
179
- before = Regexp.new('^'.ljust(unindent))
180
- after = "#{indent} "
181
- script.gsub! before, after
182
-
183
- q "#{line} %{"
184
- script.split("\n").each { |line| q line }
185
- q "#{indent}}"
186
- else
187
- flow_text "#{line} #{element.text.enquote}", "\" +\n #{indent}\""
188
- end
189
-
190
- # element has text and attributes but no children
191
- else
192
- flow_attrs "#{indent}_#{element_name} #{element.text.enquote}",
193
- attributes, indent
194
- end
195
- end
196
-
197
- # fetch and convert each web page
198
- ARGV.each do |arg|
199
- $uri = URI.parse arg
200
- doc = Net::HTTP.get($uri)
201
- $namespaced = Hash[doc.scan(/<\/(\w+):(\w+)>/).uniq.
202
- map {|p,n| [n, "#{p} :#{n}"]}]
203
- $namespaced.delete_if {|name, value| doc =~ /<#{name}[ >]/}
204
- code Nokogiri::HTML(doc).root
205
- end
206
-
207
- # headers
208
- if ''.respond_to? 'encoding'
209
- puts '# encoding: utf-8' if $q.any? {|line| line.match /[^\x20-\x7f]/}
210
- else
211
- puts "require 'rubygems'"
212
- end
213
-
214
- puts "require 'wunderbar'\n\n"
215
-
216
- # main output
217
- puts $q.join("\n")