hotcell 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +15 -0
- data/Guardfile +24 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +17 -0
- data/hotcell.gemspec +22 -0
- data/lib/hotcell/.DS_Store +0 -0
- data/lib/hotcell/config.rb +31 -0
- data/lib/hotcell/context.rb +36 -0
- data/lib/hotcell/errors.rb +43 -0
- data/lib/hotcell/extensions.rb +42 -0
- data/lib/hotcell/lexer.rb +783 -0
- data/lib/hotcell/lexer.rl +299 -0
- data/lib/hotcell/manipulator.rb +31 -0
- data/lib/hotcell/node/arrayer.rb +7 -0
- data/lib/hotcell/node/assigner.rb +11 -0
- data/lib/hotcell/node/block.rb +58 -0
- data/lib/hotcell/node/calculator.rb +35 -0
- data/lib/hotcell/node/command.rb +41 -0
- data/lib/hotcell/node/hasher.rb +7 -0
- data/lib/hotcell/node/joiner.rb +7 -0
- data/lib/hotcell/node/sequencer.rb +7 -0
- data/lib/hotcell/node/summoner.rb +11 -0
- data/lib/hotcell/node/tag.rb +26 -0
- data/lib/hotcell/node.rb +55 -0
- data/lib/hotcell/parser.rb +1186 -0
- data/lib/hotcell/parser.y +231 -0
- data/lib/hotcell/scope.rb +57 -0
- data/lib/hotcell/template.rb +29 -0
- data/lib/hotcell/version.rb +3 -0
- data/lib/hotcell.rb +19 -0
- data/misc/rage.rl +1999 -0
- data/misc/unicode2ragel.rb +305 -0
- data/spec/data/dstrings +8 -0
- data/spec/data/sstrings +6 -0
- data/spec/lib/hotcell/config_spec.rb +57 -0
- data/spec/lib/hotcell/context_spec.rb +53 -0
- data/spec/lib/hotcell/lexer_spec.rb +340 -0
- data/spec/lib/hotcell/manipulator_spec.rb +64 -0
- data/spec/lib/hotcell/node/block_spec.rb +188 -0
- data/spec/lib/hotcell/node/command_spec.rb +71 -0
- data/spec/lib/hotcell/parser_spec.rb +382 -0
- data/spec/lib/hotcell/scope_spec.rb +160 -0
- data/spec/lib/hotcell/template_spec.rb +41 -0
- data/spec/lib/hotcell_spec.rb +8 -0
- data/spec/spec_helper.rb +44 -0
- metadata +139 -0
@@ -0,0 +1,305 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This script uses the unicode spec to generate a Ragel state machine
|
4
|
+
# that recognizes unicode alphanumeric characters. It generates 5
|
5
|
+
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
|
6
|
+
# Currently supported encodings are UTF-8 [default] and UCS-4.
|
7
|
+
#
|
8
|
+
# Usage: unicode2ragel.rb [options]
|
9
|
+
# -e, --encoding [ucs4 | utf8] Data encoding
|
10
|
+
# -h, --help Show this message
|
11
|
+
#
|
12
|
+
# This script was originally written as part of the Ferret search
|
13
|
+
# engine library.
|
14
|
+
#
|
15
|
+
# Author: Rakan El-Khalil <rakan@well.com>
|
16
|
+
|
17
|
+
require 'optparse'
|
18
|
+
require 'open-uri'
|
19
|
+
|
20
|
+
ENCODINGS = [ :utf8, :ucs4 ]
|
21
|
+
ALPHTYPES = { :utf8 => "unsigned char", :ucs4 => "unsigned int" }
|
22
|
+
CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
|
23
|
+
|
24
|
+
###
|
25
|
+
# Display vars & default option
|
26
|
+
|
27
|
+
TOTAL_WIDTH = 80
|
28
|
+
RANGE_WIDTH = 23
|
29
|
+
@encoding = :utf8
|
30
|
+
|
31
|
+
###
|
32
|
+
# Option parsing
|
33
|
+
|
34
|
+
cli_opts = OptionParser.new do |opts|
|
35
|
+
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
|
36
|
+
@encoding = o.downcase.to_sym
|
37
|
+
end
|
38
|
+
opts.on("-h", "--help", "Show this message") do
|
39
|
+
puts opts
|
40
|
+
exit
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
cli_opts.parse(ARGV)
|
45
|
+
unless ENCODINGS.member? @encoding
|
46
|
+
puts "Invalid encoding: #{@encoding}"
|
47
|
+
puts cli_opts
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
|
51
|
+
##
|
52
|
+
# Downloads the document at url and yields every alpha line's hex
|
53
|
+
# range and description.
|
54
|
+
|
55
|
+
def each_alpha( url, property )
|
56
|
+
open( url ) do |file|
|
57
|
+
file.each_line do |line|
|
58
|
+
next if line =~ /^#/;
|
59
|
+
next if line !~ /; #{property} #/;
|
60
|
+
|
61
|
+
range, description = line.split(/;/)
|
62
|
+
range.strip!
|
63
|
+
description.gsub!(/.*#/, '').strip!
|
64
|
+
|
65
|
+
if range =~ /\.\./
|
66
|
+
start, stop = range.split '..'
|
67
|
+
else start = stop = range
|
68
|
+
end
|
69
|
+
|
70
|
+
yield start.hex .. stop.hex, description
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
###
|
76
|
+
# Formats to hex at minimum width
|
77
|
+
|
78
|
+
def to_hex( n )
|
79
|
+
r = "%0X" % n
|
80
|
+
r = "0#{r}" unless (r.length % 2).zero?
|
81
|
+
r
|
82
|
+
end
|
83
|
+
|
84
|
+
###
|
85
|
+
# UCS4 is just a straight hex conversion of the unicode codepoint.
|
86
|
+
|
87
|
+
def to_ucs4( range )
|
88
|
+
rangestr = "0x" + to_hex(range.begin)
|
89
|
+
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
|
90
|
+
[ rangestr ]
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# 0x00 - 0x7f -> 0zzzzzzz[7]
|
95
|
+
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
|
96
|
+
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
|
97
|
+
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
|
98
|
+
|
99
|
+
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
|
100
|
+
|
101
|
+
def to_utf8_enc( n )
|
102
|
+
r = 0
|
103
|
+
if n <= 0x7f
|
104
|
+
r = n
|
105
|
+
elsif n <= 0x7ff
|
106
|
+
y = 0xc0 | (n >> 6)
|
107
|
+
z = 0x80 | (n & 0x3f)
|
108
|
+
r = y << 8 | z
|
109
|
+
elsif n <= 0xffff
|
110
|
+
x = 0xe0 | (n >> 12)
|
111
|
+
y = 0x80 | (n >> 6) & 0x3f
|
112
|
+
z = 0x80 | n & 0x3f
|
113
|
+
r = x << 16 | y << 8 | z
|
114
|
+
elsif n <= 0x10ffff
|
115
|
+
w = 0xf0 | (n >> 18)
|
116
|
+
x = 0x80 | (n >> 12) & 0x3f
|
117
|
+
y = 0x80 | (n >> 6) & 0x3f
|
118
|
+
z = 0x80 | n & 0x3f
|
119
|
+
r = w << 24 | x << 16 | y << 8 | z
|
120
|
+
end
|
121
|
+
|
122
|
+
to_hex(r)
|
123
|
+
end
|
124
|
+
|
125
|
+
def from_utf8_enc( n )
|
126
|
+
n = n.hex
|
127
|
+
r = 0
|
128
|
+
if n <= 0x7f
|
129
|
+
r = n
|
130
|
+
elsif n <= 0xdfff
|
131
|
+
y = (n >> 8) & 0x1f
|
132
|
+
z = n & 0x3f
|
133
|
+
r = y << 6 | z
|
134
|
+
elsif n <= 0xefffff
|
135
|
+
x = (n >> 16) & 0x0f
|
136
|
+
y = (n >> 8) & 0x3f
|
137
|
+
z = n & 0x3f
|
138
|
+
r = x << 10 | y << 6 | z
|
139
|
+
elsif n <= 0xf7ffffff
|
140
|
+
w = (n >> 24) & 0x07
|
141
|
+
x = (n >> 16) & 0x3f
|
142
|
+
y = (n >> 8) & 0x3f
|
143
|
+
z = n & 0x3f
|
144
|
+
r = w << 18 | x << 12 | y << 6 | z
|
145
|
+
end
|
146
|
+
r
|
147
|
+
end
|
148
|
+
|
149
|
+
###
|
150
|
+
# Given a range, splits it up into ranges that can be continuously
|
151
|
+
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
|
152
|
+
# This is not strictly needed since the current [5.1] unicode standard
|
153
|
+
# doesn't have ranges that straddle utf8 boundaries. This is included
|
154
|
+
# for completeness as there is no telling if that will ever change.
|
155
|
+
|
156
|
+
def utf8_ranges( range )
|
157
|
+
ranges = []
|
158
|
+
UTF8_BOUNDARIES.each do |max|
|
159
|
+
if range.begin <= max
|
160
|
+
return ranges << range if range.end <= max
|
161
|
+
|
162
|
+
ranges << range.begin .. max
|
163
|
+
range = (max + 1) .. range.end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
ranges
|
167
|
+
end
|
168
|
+
|
169
|
+
def build_range( start, stop )
|
170
|
+
size = start.size/2
|
171
|
+
left = size - 1
|
172
|
+
return [""] if size < 1
|
173
|
+
|
174
|
+
a = start[0..1]
|
175
|
+
b = stop[0..1]
|
176
|
+
|
177
|
+
###
|
178
|
+
# Shared prefix
|
179
|
+
|
180
|
+
if a == b
|
181
|
+
return build_range(start[2..-1], stop[2..-1]).map do |elt|
|
182
|
+
"0x#{a} " + elt
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
###
|
187
|
+
# Unshared prefix, end of run
|
188
|
+
|
189
|
+
return ["0x#{a}..0x#{b} "] if left.zero?
|
190
|
+
|
191
|
+
###
|
192
|
+
# Unshared prefix, not end of run
|
193
|
+
# Range can be 0x123456..0x56789A
|
194
|
+
# Which is equivalent to:
|
195
|
+
# 0x123456 .. 0x12FFFF
|
196
|
+
# 0x130000 .. 0x55FFFF
|
197
|
+
# 0x560000 .. 0x56789A
|
198
|
+
|
199
|
+
ret = []
|
200
|
+
ret << build_range(start, a + "FF" * left)
|
201
|
+
|
202
|
+
###
|
203
|
+
# Only generate middle range if need be.
|
204
|
+
|
205
|
+
if a.hex+1 != b.hex
|
206
|
+
max = to_hex(b.hex - 1)
|
207
|
+
max = "FF" if b == "FF"
|
208
|
+
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
|
209
|
+
end
|
210
|
+
|
211
|
+
###
|
212
|
+
# Don't generate last range if it is covered by first range
|
213
|
+
|
214
|
+
ret << build_range(b + "00" * left, stop) unless b == "FF"
|
215
|
+
ret.flatten!
|
216
|
+
end
|
217
|
+
|
218
|
+
def to_utf8( range )
|
219
|
+
utf8_ranges( range ).map do |r|
|
220
|
+
build_range to_utf8_enc(r.begin), to_utf8_enc(r.end)
|
221
|
+
end.flatten!
|
222
|
+
end
|
223
|
+
|
224
|
+
##
|
225
|
+
# Perform a 3-way comparison of the number of codepoints advertised by
|
226
|
+
# the unicode spec for the given range, the originally parsed range,
|
227
|
+
# and the resulting utf8 encoded range.
|
228
|
+
|
229
|
+
def count_codepoints( code )
|
230
|
+
code.split(' ').inject(1) do |acc, elt|
|
231
|
+
if elt =~ /0x(.+)\.\.0x(.+)/
|
232
|
+
if @encoding == :utf8
|
233
|
+
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
|
234
|
+
else
|
235
|
+
acc * ($2.hex - $1.hex + 1)
|
236
|
+
end
|
237
|
+
else
|
238
|
+
acc
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def is_valid?( range, desc, codes )
|
244
|
+
spec_count = 1
|
245
|
+
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
|
246
|
+
range_count = range.end - range.begin + 1
|
247
|
+
|
248
|
+
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
|
249
|
+
sum == spec_count and sum == range_count
|
250
|
+
end
|
251
|
+
|
252
|
+
##
|
253
|
+
# Generate the state maching to stdout
|
254
|
+
|
255
|
+
def generate_machine( name, property )
|
256
|
+
pipe = " "
|
257
|
+
puts " #{name} = "
|
258
|
+
each_alpha( CHART_URL, property ) do |range, desc|
|
259
|
+
|
260
|
+
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
|
261
|
+
|
262
|
+
raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
|
263
|
+
is_valid? range, desc, codes
|
264
|
+
|
265
|
+
range_width = codes.map { |a| a.size }.max
|
266
|
+
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
|
267
|
+
|
268
|
+
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
|
269
|
+
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
|
270
|
+
|
271
|
+
if desc.size > desc_width
|
272
|
+
desc = desc[0..desc_width - 4] + "..."
|
273
|
+
end
|
274
|
+
|
275
|
+
codes.each_with_index do |r, idx|
|
276
|
+
desc = "" unless idx.zero?
|
277
|
+
code = "%-#{range_width}s" % r
|
278
|
+
puts " #{pipe} #{code} ##{desc}"
|
279
|
+
pipe = "|"
|
280
|
+
end
|
281
|
+
end
|
282
|
+
puts " ;"
|
283
|
+
puts ""
|
284
|
+
end
|
285
|
+
|
286
|
+
puts <<EOF
|
287
|
+
# The following Ragel file was autogenerated with #{$0}
|
288
|
+
# from: #{CHART_URL}
|
289
|
+
#
|
290
|
+
# It defines ualpha, udigit, ualnum.
|
291
|
+
#
|
292
|
+
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
|
293
|
+
# and that your input is in #{@encoding}.
|
294
|
+
|
295
|
+
%%{
|
296
|
+
machine WChar;
|
297
|
+
EOF
|
298
|
+
generate_machine( :ualpha, "Alphabetic" )
|
299
|
+
generate_machine( :ulower, "Lowercase" )
|
300
|
+
generate_machine( :uupper, "Uppercase" )
|
301
|
+
puts <<EOF
|
302
|
+
udigit = '0'..'9';
|
303
|
+
ualnum = ualpha | udigit;
|
304
|
+
}%%
|
305
|
+
EOF
|
data/spec/data/dstrings
ADDED
data/spec/data/sstrings
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hotcell::Config do
|
4
|
+
subject { Hotcell::Config.send(:new) }
|
5
|
+
|
6
|
+
let(:command_class) { Class.new(Hotcell::Command) }
|
7
|
+
let(:block_class) do
|
8
|
+
Class.new(Hotcell::Block) do
|
9
|
+
subcommands :else, :elsif
|
10
|
+
end
|
11
|
+
end
|
12
|
+
let(:misc_class) { Class.new }
|
13
|
+
|
14
|
+
specify { subject.blocks.should == {} }
|
15
|
+
specify { subject.subcommands.should == {} }
|
16
|
+
specify { subject.commands.should == {} }
|
17
|
+
|
18
|
+
describe '#register_command' do
|
19
|
+
context do
|
20
|
+
before { subject.register_command :for, command_class }
|
21
|
+
specify { subject.blocks.should == {} }
|
22
|
+
specify { subject.subcommands.should == {} }
|
23
|
+
specify { subject.commands.should == { 'for' => command_class } }
|
24
|
+
end
|
25
|
+
|
26
|
+
context do
|
27
|
+
before { subject.register_command 'for', block_class }
|
28
|
+
specify { subject.blocks.should == { 'for' => block_class } }
|
29
|
+
specify { subject.subcommands.should == { 'else' => block_class, 'elsif' => block_class } }
|
30
|
+
specify { subject.commands.should == {} }
|
31
|
+
end
|
32
|
+
|
33
|
+
context do
|
34
|
+
before { subject.register_command 'for', block_class }
|
35
|
+
before { subject.register_command :forloop, block_class }
|
36
|
+
before { subject.register_command :include, command_class }
|
37
|
+
specify { subject.blocks.should == { 'for' => block_class, 'forloop' => block_class } }
|
38
|
+
specify { subject.commands.should == { 'include' => command_class } }
|
39
|
+
end
|
40
|
+
|
41
|
+
context 'errors' do
|
42
|
+
context do
|
43
|
+
specify { expect { subject.register_command :for, misc_class }.to raise_error }
|
44
|
+
end
|
45
|
+
|
46
|
+
context do
|
47
|
+
before { subject.register_command 'for', block_class }
|
48
|
+
specify { expect { subject.register_command :for, command_class }.to raise_error }
|
49
|
+
end
|
50
|
+
|
51
|
+
context do
|
52
|
+
before { subject.register_command :for, command_class }
|
53
|
+
specify { expect { subject.register_command 'for', block_class }.to raise_error }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Hotcell::Context do
|
4
|
+
describe '#initialize' do
|
5
|
+
its('scope.scope') { should == [{}] }
|
6
|
+
|
7
|
+
context do
|
8
|
+
subject { described_class.new(
|
9
|
+
scope: { foo: 42, 'bar' => 'baz' },
|
10
|
+
variables: { baz: 'moo' },
|
11
|
+
boo: 'goo',
|
12
|
+
'taz' => 'man',
|
13
|
+
rescuer: ->{},
|
14
|
+
reraise: true
|
15
|
+
) }
|
16
|
+
its('scope.scope') { should == [{foo: 42, 'bar' => 'baz', 'baz' => 'moo', 'boo' => 'goo', 'taz' => 'man'}] }
|
17
|
+
end
|
18
|
+
|
19
|
+
context do
|
20
|
+
subject { described_class.new(variables: { foo: 42, 'bar' => 'baz' }, environment: { 'baz' => 'moo' }) }
|
21
|
+
its('scope.scope') { should == [{'foo' => 42, 'bar' => 'baz', baz: 'moo'}] }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#safe' do
|
26
|
+
specify { subject.safe { 3 }.should == 3 }
|
27
|
+
specify { subject.safe(5) { 3 }.should == 3 }
|
28
|
+
specify { subject.safe(nil) { 3 }.should == 3 }
|
29
|
+
specify { subject.safe { 3 * 'foo' }.should =~ /TypeError/ }
|
30
|
+
specify { subject.safe(nil) { 3 * 'foo' }.should == nil }
|
31
|
+
specify { subject.safe(5) { 3 * 'foo' }.should == 5 }
|
32
|
+
|
33
|
+
context 'reraise' do
|
34
|
+
subject { described_class.new(reraise: true) }
|
35
|
+
|
36
|
+
specify { subject.safe { 'foo' }.should == 'foo' }
|
37
|
+
specify { subject.safe('bar') { 'foo' }.should == 'foo' }
|
38
|
+
specify { expect { subject.safe { 3 * 'foo' } }.to raise_error TypeError }
|
39
|
+
specify { expect { subject.safe('bar') { 3 * 'foo' } }.to raise_error TypeError }
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'custom rescuer' do
|
43
|
+
subject { described_class.new(rescuer: ->(e){ "Rescued from: #{e.class}" }) }
|
44
|
+
specify { subject.safe { 3 * 'foo' }.should =~ /Rescued from: TypeError/ }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '#manipulator_invoke' do
|
49
|
+
subject { described_class.new(variables: { foo: 42, 'bar' => 'baz' }, environment: { 'baz' => 'moo' }) }
|
50
|
+
specify { subject.manipulator_invoke('foo').should == 42 }
|
51
|
+
specify { subject.manipulator_invoke('moo').should be_nil }
|
52
|
+
end
|
53
|
+
end
|