hotcell 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +15 -0
- data/Guardfile +24 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +17 -0
- data/hotcell.gemspec +22 -0
- data/lib/hotcell/.DS_Store +0 -0
- data/lib/hotcell/config.rb +31 -0
- data/lib/hotcell/context.rb +36 -0
- data/lib/hotcell/errors.rb +43 -0
- data/lib/hotcell/extensions.rb +42 -0
- data/lib/hotcell/lexer.rb +783 -0
- data/lib/hotcell/lexer.rl +299 -0
- data/lib/hotcell/manipulator.rb +31 -0
- data/lib/hotcell/node/arrayer.rb +7 -0
- data/lib/hotcell/node/assigner.rb +11 -0
- data/lib/hotcell/node/block.rb +58 -0
- data/lib/hotcell/node/calculator.rb +35 -0
- data/lib/hotcell/node/command.rb +41 -0
- data/lib/hotcell/node/hasher.rb +7 -0
- data/lib/hotcell/node/joiner.rb +7 -0
- data/lib/hotcell/node/sequencer.rb +7 -0
- data/lib/hotcell/node/summoner.rb +11 -0
- data/lib/hotcell/node/tag.rb +26 -0
- data/lib/hotcell/node.rb +55 -0
- data/lib/hotcell/parser.rb +1186 -0
- data/lib/hotcell/parser.y +231 -0
- data/lib/hotcell/scope.rb +57 -0
- data/lib/hotcell/template.rb +29 -0
- data/lib/hotcell/version.rb +3 -0
- data/lib/hotcell.rb +19 -0
- data/misc/rage.rl +1999 -0
- data/misc/unicode2ragel.rb +305 -0
- data/spec/data/dstrings +8 -0
- data/spec/data/sstrings +6 -0
- data/spec/lib/hotcell/config_spec.rb +57 -0
- data/spec/lib/hotcell/context_spec.rb +53 -0
- data/spec/lib/hotcell/lexer_spec.rb +340 -0
- data/spec/lib/hotcell/manipulator_spec.rb +64 -0
- data/spec/lib/hotcell/node/block_spec.rb +188 -0
- data/spec/lib/hotcell/node/command_spec.rb +71 -0
- data/spec/lib/hotcell/parser_spec.rb +382 -0
- data/spec/lib/hotcell/scope_spec.rb +160 -0
- data/spec/lib/hotcell/template_spec.rb +41 -0
- data/spec/lib/hotcell_spec.rb +8 -0
- data/spec/spec_helper.rb +44 -0
- metadata +139 -0
@@ -0,0 +1,305 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This script uses the unicode spec to generate a Ragel state machine
|
4
|
+
# that recognizes unicode alphanumeric characters. It generates 5
|
5
|
+
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
|
6
|
+
# Currently supported encodings are UTF-8 [default] and UCS-4.
|
7
|
+
#
|
8
|
+
# Usage: unicode2ragel.rb [options]
|
9
|
+
# -e, --encoding [ucs4 | utf8] Data encoding
|
10
|
+
# -h, --help Show this message
|
11
|
+
#
|
12
|
+
# This script was originally written as part of the Ferret search
|
13
|
+
# engine library.
|
14
|
+
#
|
15
|
+
# Author: Rakan El-Khalil <rakan@well.com>
|
16
|
+
|
17
|
+
require 'optparse'
|
18
|
+
require 'open-uri'
|
19
|
+
|
20
|
+
ENCODINGS = [ :utf8, :ucs4 ]
|
21
|
+
ALPHTYPES = { :utf8 => "unsigned char", :ucs4 => "unsigned int" }
|
22
|
+
CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
|
23
|
+
|
24
|
+
###
|
25
|
+
# Display vars & default option
|
26
|
+
|
27
|
+
TOTAL_WIDTH = 80
|
28
|
+
RANGE_WIDTH = 23
|
29
|
+
@encoding = :utf8
|
30
|
+
|
31
|
+
###
|
32
|
+
# Option parsing
|
33
|
+
|
34
|
+
cli_opts = OptionParser.new do |opts|
|
35
|
+
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
|
36
|
+
@encoding = o.downcase.to_sym
|
37
|
+
end
|
38
|
+
opts.on("-h", "--help", "Show this message") do
|
39
|
+
puts opts
|
40
|
+
exit
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
cli_opts.parse(ARGV)
|
45
|
+
unless ENCODINGS.member? @encoding
|
46
|
+
puts "Invalid encoding: #{@encoding}"
|
47
|
+
puts cli_opts
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
|
51
|
+
##
|
52
|
+
# Downloads the document at url and yields every alpha line's hex
|
53
|
+
# range and description.
|
54
|
+
|
55
|
+
def each_alpha( url, property )
|
56
|
+
open( url ) do |file|
|
57
|
+
file.each_line do |line|
|
58
|
+
next if line =~ /^#/;
|
59
|
+
next if line !~ /; #{property} #/;
|
60
|
+
|
61
|
+
range, description = line.split(/;/)
|
62
|
+
range.strip!
|
63
|
+
description.gsub!(/.*#/, '').strip!
|
64
|
+
|
65
|
+
if range =~ /\.\./
|
66
|
+
start, stop = range.split '..'
|
67
|
+
else start = stop = range
|
68
|
+
end
|
69
|
+
|
70
|
+
yield start.hex .. stop.hex, description
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
###
|
76
|
+
# Formats to hex at minimum width
|
77
|
+
|
78
|
+
def to_hex( n )
|
79
|
+
r = "%0X" % n
|
80
|
+
r = "0#{r}" unless (r.length % 2).zero?
|
81
|
+
r
|
82
|
+
end
|
83
|
+
|
84
|
+
###
|
85
|
+
# UCS4 is just a straight hex conversion of the unicode codepoint.
|
86
|
+
|
87
|
+
def to_ucs4( range )
|
88
|
+
rangestr = "0x" + to_hex(range.begin)
|
89
|
+
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
|
90
|
+
[ rangestr ]
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# 0x00 - 0x7f -> 0zzzzzzz[7]
|
95
|
+
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
|
96
|
+
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
|
97
|
+
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
|
98
|
+
|
99
|
+
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
|
100
|
+
|
101
|
+
def to_utf8_enc( n )
|
102
|
+
r = 0
|
103
|
+
if n <= 0x7f
|
104
|
+
r = n
|
105
|
+
elsif n <= 0x7ff
|
106
|
+
y = 0xc0 | (n >> 6)
|
107
|
+
z = 0x80 | (n & 0x3f)
|
108
|
+
r = y << 8 | z
|
109
|
+
elsif n <= 0xffff
|
110
|
+
x = 0xe0 | (n >> 12)
|
111
|
+
y = 0x80 | (n >> 6) & 0x3f
|
112
|
+
z = 0x80 | n & 0x3f
|
113
|
+
r = x << 16 | y << 8 | z
|
114
|
+
elsif n <= 0x10ffff
|
115
|
+
w = 0xf0 | (n >> 18)
|
116
|
+
x = 0x80 | (n >> 12) & 0x3f
|
117
|
+
y = 0x80 | (n >> 6) & 0x3f
|
118
|
+
z = 0x80 | n & 0x3f
|
119
|
+
r = w << 24 | x << 16 | y << 8 | z
|
120
|
+
end
|
121
|
+
|
122
|
+
to_hex(r)
|
123
|
+
end
|
124
|
+
|
125
|
+
def from_utf8_enc( n )
|
126
|
+
n = n.hex
|
127
|
+
r = 0
|
128
|
+
if n <= 0x7f
|
129
|
+
r = n
|
130
|
+
elsif n <= 0xdfff
|
131
|
+
y = (n >> 8) & 0x1f
|
132
|
+
z = n & 0x3f
|
133
|
+
r = y << 6 | z
|
134
|
+
elsif n <= 0xefffff
|
135
|
+
x = (n >> 16) & 0x0f
|
136
|
+
y = (n >> 8) & 0x3f
|
137
|
+
z = n & 0x3f
|
138
|
+
r = x << 10 | y << 6 | z
|
139
|
+
elsif n <= 0xf7ffffff
|
140
|
+
w = (n >> 24) & 0x07
|
141
|
+
x = (n >> 16) & 0x3f
|
142
|
+
y = (n >> 8) & 0x3f
|
143
|
+
z = n & 0x3f
|
144
|
+
r = w << 18 | x << 12 | y << 6 | z
|
145
|
+
end
|
146
|
+
r
|
147
|
+
end
|
148
|
+
|
149
|
+
###
|
150
|
+
# Given a range, splits it up into ranges that can be continuously
|
151
|
+
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
|
152
|
+
# This is not strictly needed since the current [5.1] unicode standard
|
153
|
+
# doesn't have ranges that straddle utf8 boundaries. This is included
|
154
|
+
# for completeness as there is no telling if that will ever change.
|
155
|
+
|
156
|
+
def utf8_ranges( range )
|
157
|
+
ranges = []
|
158
|
+
UTF8_BOUNDARIES.each do |max|
|
159
|
+
if range.begin <= max
|
160
|
+
return ranges << range if range.end <= max
|
161
|
+
|
162
|
+
ranges << range.begin .. max
|
163
|
+
range = (max + 1) .. range.end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
ranges
|
167
|
+
end
|
168
|
+
|
169
|
+
def build_range( start, stop )
|
170
|
+
size = start.size/2
|
171
|
+
left = size - 1
|
172
|
+
return [""] if size < 1
|
173
|
+
|
174
|
+
a = start[0..1]
|
175
|
+
b = stop[0..1]
|
176
|
+
|
177
|
+
###
|
178
|
+
# Shared prefix
|
179
|
+
|
180
|
+
if a == b
|
181
|
+
return build_range(start[2..-1], stop[2..-1]).map do |elt|
|
182
|
+
"0x#{a} " + elt
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
###
|
187
|
+
# Unshared prefix, end of run
|
188
|
+
|
189
|
+
return ["0x#{a}..0x#{b} "] if left.zero?
|
190
|
+
|
191
|
+
###
|
192
|
+
# Unshared prefix, not end of run
|
193
|
+
# Range can be 0x123456..0x56789A
|
194
|
+
# Which is equivalent to:
|
195
|
+
# 0x123456 .. 0x12FFFF
|
196
|
+
# 0x130000 .. 0x55FFFF
|
197
|
+
# 0x560000 .. 0x56789A
|
198
|
+
|
199
|
+
ret = []
|
200
|
+
ret << build_range(start, a + "FF" * left)
|
201
|
+
|
202
|
+
###
|
203
|
+
# Only generate middle range if need be.
|
204
|
+
|
205
|
+
if a.hex+1 != b.hex
|
206
|
+
max = to_hex(b.hex - 1)
|
207
|
+
max = "FF" if b == "FF"
|
208
|
+
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
|
209
|
+
end
|
210
|
+
|
211
|
+
###
|
212
|
+
# Don't generate last range if it is covered by first range
|
213
|
+
|
214
|
+
ret << build_range(b + "00" * left, stop) unless b == "FF"
|
215
|
+
ret.flatten!
|
216
|
+
end
|
217
|
+
|
218
|
+
def to_utf8( range )
|
219
|
+
utf8_ranges( range ).map do |r|
|
220
|
+
build_range to_utf8_enc(r.begin), to_utf8_enc(r.end)
|
221
|
+
end.flatten!
|
222
|
+
end
|
223
|
+
|
224
|
+
##
|
225
|
+
# Perform a 3-way comparison of the number of codepoints advertised by
|
226
|
+
# the unicode spec for the given range, the originally parsed range,
|
227
|
+
# and the resulting utf8 encoded range.
|
228
|
+
|
229
|
+
def count_codepoints( code )
|
230
|
+
code.split(' ').inject(1) do |acc, elt|
|
231
|
+
if elt =~ /0x(.+)\.\.0x(.+)/
|
232
|
+
if @encoding == :utf8
|
233
|
+
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
|
234
|
+
else
|
235
|
+
acc * ($2.hex - $1.hex + 1)
|
236
|
+
end
|
237
|
+
else
|
238
|
+
acc
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def is_valid?( range, desc, codes )
|
244
|
+
spec_count = 1
|
245
|
+
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
|
246
|
+
range_count = range.end - range.begin + 1
|
247
|
+
|
248
|
+
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
|
249
|
+
sum == spec_count and sum == range_count
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# Generate the state maching to stdout
|
254
|
+
|
255
|
+
def generate_machine( name, property )
|
256
|
+
pipe = " "
|
257
|
+
puts " #{name} = "
|
258
|
+
each_alpha( CHART_URL, property ) do |range, desc|
|
259
|
+
|
260
|
+
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
|
261
|
+
|
262
|
+
raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
|
263
|
+
is_valid? range, desc, codes
|
264
|
+
|
265
|
+
range_width = codes.map { |a| a.size }.max
|
266
|
+
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
|
267
|
+
|
268
|
+
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
|
269
|
+
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
|
270
|
+
|
271
|
+
if desc.size > desc_width
|
272
|
+
desc = desc[0..desc_width - 4] + "..."
|
273
|
+
end
|
274
|
+
|
275
|
+
codes.each_with_index do |r, idx|
|
276
|
+
desc = "" unless idx.zero?
|
277
|
+
code = "%-#{range_width}s" % r
|
278
|
+
puts " #{pipe} #{code} ##{desc}"
|
279
|
+
pipe = "|"
|
280
|
+
end
|
281
|
+
end
|
282
|
+
puts " ;"
|
283
|
+
puts ""
|
284
|
+
end
|
285
|
+
|
286
|
+
puts <<EOF
|
287
|
+
# The following Ragel file was autogenerated with #{$0}
|
288
|
+
# from: #{CHART_URL}
|
289
|
+
#
|
290
|
+
# It defines ualpha, udigit, ualnum.
|
291
|
+
#
|
292
|
+
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
|
293
|
+
# and that your input is in #{@encoding}.
|
294
|
+
|
295
|
+
%%{
|
296
|
+
machine WChar;
|
297
|
+
EOF
|
298
|
+
generate_machine( :ualpha, "Alphabetic" )
|
299
|
+
generate_machine( :ulower, "Lowercase" )
|
300
|
+
generate_machine( :uupper, "Uppercase" )
|
301
|
+
puts <<EOF
|
302
|
+
udigit = '0'..'9';
|
303
|
+
ualnum = ualpha | udigit;
|
304
|
+
}%%
|
305
|
+
EOF
|
data/spec/data/dstrings
ADDED
data/spec/data/sstrings
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hotcell::Config do
|
4
|
+
subject { Hotcell::Config.send(:new) }
|
5
|
+
|
6
|
+
let(:command_class) { Class.new(Hotcell::Command) }
|
7
|
+
let(:block_class) do
|
8
|
+
Class.new(Hotcell::Block) do
|
9
|
+
subcommands :else, :elsif
|
10
|
+
end
|
11
|
+
end
|
12
|
+
let(:misc_class) { Class.new }
|
13
|
+
|
14
|
+
specify { subject.blocks.should == {} }
|
15
|
+
specify { subject.subcommands.should == {} }
|
16
|
+
specify { subject.commands.should == {} }
|
17
|
+
|
18
|
+
describe '#register_command' do
|
19
|
+
context do
|
20
|
+
before { subject.register_command :for, command_class }
|
21
|
+
specify { subject.blocks.should == {} }
|
22
|
+
specify { subject.subcommands.should == {} }
|
23
|
+
specify { subject.commands.should == { 'for' => command_class } }
|
24
|
+
end
|
25
|
+
|
26
|
+
context do
|
27
|
+
before { subject.register_command 'for', block_class }
|
28
|
+
specify { subject.blocks.should == { 'for' => block_class } }
|
29
|
+
specify { subject.subcommands.should == { 'else' => block_class, 'elsif' => block_class } }
|
30
|
+
specify { subject.commands.should == {} }
|
31
|
+
end
|
32
|
+
|
33
|
+
context do
|
34
|
+
before { subject.register_command 'for', block_class }
|
35
|
+
before { subject.register_command :forloop, block_class }
|
36
|
+
before { subject.register_command :include, command_class }
|
37
|
+
specify { subject.blocks.should == { 'for' => block_class, 'forloop' => block_class } }
|
38
|
+
specify { subject.commands.should == { 'include' => command_class } }
|
39
|
+
end
|
40
|
+
|
41
|
+
context 'errors' do
|
42
|
+
context do
|
43
|
+
specify { expect { subject.register_command :for, misc_class }.to raise_error }
|
44
|
+
end
|
45
|
+
|
46
|
+
context do
|
47
|
+
before { subject.register_command 'for', block_class }
|
48
|
+
specify { expect { subject.register_command :for, command_class }.to raise_error }
|
49
|
+
end
|
50
|
+
|
51
|
+
context do
|
52
|
+
before { subject.register_command :for, command_class }
|
53
|
+
specify { expect { subject.register_command 'for', block_class }.to raise_error }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hotcell::Context do
|
4
|
+
describe '#initialize' do
|
5
|
+
its('scope.scope') { should == [{}] }
|
6
|
+
|
7
|
+
context do
|
8
|
+
subject { described_class.new(
|
9
|
+
scope: { foo: 42, 'bar' => 'baz' },
|
10
|
+
variables: { baz: 'moo' },
|
11
|
+
boo: 'goo',
|
12
|
+
'taz' => 'man',
|
13
|
+
rescuer: ->{},
|
14
|
+
reraise: true
|
15
|
+
) }
|
16
|
+
its('scope.scope') { should == [{foo: 42, 'bar' => 'baz', 'baz' => 'moo', 'boo' => 'goo', 'taz' => 'man'}] }
|
17
|
+
end
|
18
|
+
|
19
|
+
context do
|
20
|
+
subject { described_class.new(variables: { foo: 42, 'bar' => 'baz' }, environment: { 'baz' => 'moo' }) }
|
21
|
+
its('scope.scope') { should == [{'foo' => 42, 'bar' => 'baz', baz: 'moo'}] }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#safe' do
|
26
|
+
specify { subject.safe { 3 }.should == 3 }
|
27
|
+
specify { subject.safe(5) { 3 }.should == 3 }
|
28
|
+
specify { subject.safe(nil) { 3 }.should == 3 }
|
29
|
+
specify { subject.safe { 3 * 'foo' }.should =~ /TypeError/ }
|
30
|
+
specify { subject.safe(nil) { 3 * 'foo' }.should == nil }
|
31
|
+
specify { subject.safe(5) { 3 * 'foo' }.should == 5 }
|
32
|
+
|
33
|
+
context 'reraise' do
|
34
|
+
subject { described_class.new(reraise: true) }
|
35
|
+
|
36
|
+
specify { subject.safe { 'foo' }.should == 'foo' }
|
37
|
+
specify { subject.safe('bar') { 'foo' }.should == 'foo' }
|
38
|
+
specify { expect { subject.safe { 3 * 'foo' } }.to raise_error TypeError }
|
39
|
+
specify { expect { subject.safe('bar') { 3 * 'foo' } }.to raise_error TypeError }
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'custom rescuer' do
|
43
|
+
subject { described_class.new(rescuer: ->(e){ "Rescued from: #{e.class}" }) }
|
44
|
+
specify { subject.safe { 3 * 'foo' }.should =~ /Rescued from: TypeError/ }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#manipulator_invoke' do
|
49
|
+
subject { described_class.new(variables: { foo: 42, 'bar' => 'baz' }, environment: { 'baz' => 'moo' }) }
|
50
|
+
specify { subject.manipulator_invoke('foo').should == 42 }
|
51
|
+
specify { subject.manipulator_invoke('moo').should be_nil }
|
52
|
+
end
|
53
|
+
end
|