hotcell 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -1
- data/.rspec +1 -0
- data/.rvmrc +1 -1
- data/.travis.yml +7 -0
- data/Gemfile +4 -1
- data/README.md +361 -2
- data/Rakefile +28 -6
- data/ext/lexerc/extconf.rb +3 -0
- data/ext/lexerc/lexerc.c +618 -0
- data/ext/lexerc/lexerc.h +20 -0
- data/ext/lexerc/lexerc.rl +167 -0
- data/hotcell.gemspec +8 -7
- data/lib/hotcell/commands/case.rb +59 -0
- data/lib/hotcell/commands/cycle.rb +38 -0
- data/lib/hotcell/commands/for.rb +70 -0
- data/lib/hotcell/commands/if.rb +51 -0
- data/lib/hotcell/commands/include.rb +21 -0
- data/lib/hotcell/commands/scope.rb +13 -0
- data/lib/hotcell/commands/unless.rb +23 -0
- data/lib/hotcell/commands.rb +13 -0
- data/lib/hotcell/config.rb +33 -6
- data/lib/hotcell/context.rb +40 -7
- data/lib/hotcell/errors.rb +37 -28
- data/lib/hotcell/extensions.rb +4 -0
- data/lib/hotcell/lexer.rb +19 -635
- data/lib/hotcell/lexerr.rb +572 -0
- data/lib/hotcell/lexerr.rl +137 -0
- data/lib/hotcell/node/assigner.rb +1 -5
- data/lib/hotcell/node/block.rb +17 -40
- data/lib/hotcell/node/command.rb +29 -22
- data/lib/hotcell/node/hasher.rb +1 -1
- data/lib/hotcell/node/summoner.rb +2 -6
- data/lib/hotcell/node/tag.rb +10 -7
- data/lib/hotcell/node.rb +12 -1
- data/lib/hotcell/parser.rb +474 -408
- data/lib/hotcell/parser.y +175 -117
- data/lib/hotcell/resolver.rb +44 -0
- data/lib/hotcell/source.rb +35 -0
- data/lib/hotcell/template.rb +15 -6
- data/lib/hotcell/version.rb +1 -1
- data/lib/hotcell.rb +15 -10
- data/spec/data/templates/simple.hc +1 -0
- data/spec/lib/hotcell/commands/case_spec.rb +39 -0
- data/spec/lib/hotcell/commands/cycle_spec.rb +29 -0
- data/spec/lib/hotcell/commands/for_spec.rb +65 -0
- data/spec/lib/hotcell/commands/if_spec.rb +35 -0
- data/spec/lib/hotcell/commands/include_spec.rb +39 -0
- data/spec/lib/hotcell/commands/scope_spec.rb +16 -0
- data/spec/lib/hotcell/commands/unless_spec.rb +23 -0
- data/spec/lib/hotcell/config_spec.rb +35 -10
- data/spec/lib/hotcell/context_spec.rb +58 -18
- data/spec/lib/hotcell/lexer_spec.rb +37 -28
- data/spec/lib/hotcell/node/block_spec.rb +28 -56
- data/spec/lib/hotcell/node/command_spec.rb +7 -31
- data/spec/lib/hotcell/node/tag_spec.rb +16 -0
- data/spec/lib/hotcell/parser_spec.rb +152 -123
- data/spec/lib/hotcell/resolver_spec.rb +28 -0
- data/spec/lib/hotcell/source_spec.rb +41 -0
- data/spec/lib/hotcell/template_spec.rb +47 -4
- data/spec/lib/hotcell_spec.rb +2 -1
- data/spec/spec_helper.rb +6 -2
- metadata +54 -24
- data/lib/hotcell/.DS_Store +0 -0
- data/lib/hotcell/lexer.rl +0 -299
- data/misc/rage.rl +0 -1999
- data/misc/unicode2ragel.rb +0 -305
data/misc/unicode2ragel.rb
DELETED
@@ -1,305 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
#
|
3
|
-
# This script uses the unicode spec to generate a Ragel state machine
|
4
|
-
# that recognizes unicode alphanumeric characters. It generates 5
|
5
|
-
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
|
6
|
-
# Currently supported encodings are UTF-8 [default] and UCS-4.
|
7
|
-
#
|
8
|
-
# Usage: unicode2ragel.rb [options]
|
9
|
-
# -e, --encoding [ucs4 | utf8] Data encoding
|
10
|
-
# -h, --help Show this message
|
11
|
-
#
|
12
|
-
# This script was originally written as part of the Ferret search
|
13
|
-
# engine library.
|
14
|
-
#
|
15
|
-
# Author: Rakan El-Khalil <rakan@well.com>
|
16
|
-
|
17
|
-
require 'optparse'
|
18
|
-
require 'open-uri'
|
19
|
-
|
20
|
-
ENCODINGS = [ :utf8, :ucs4 ]
|
21
|
-
ALPHTYPES = { :utf8 => "unsigned char", :ucs4 => "unsigned int" }
|
22
|
-
CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
|
23
|
-
|
24
|
-
###
|
25
|
-
# Display vars & default option
|
26
|
-
|
27
|
-
TOTAL_WIDTH = 80
|
28
|
-
RANGE_WIDTH = 23
|
29
|
-
@encoding = :utf8
|
30
|
-
|
31
|
-
###
|
32
|
-
# Option parsing
|
33
|
-
|
34
|
-
cli_opts = OptionParser.new do |opts|
|
35
|
-
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
|
36
|
-
@encoding = o.downcase.to_sym
|
37
|
-
end
|
38
|
-
opts.on("-h", "--help", "Show this message") do
|
39
|
-
puts opts
|
40
|
-
exit
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
cli_opts.parse(ARGV)
|
45
|
-
unless ENCODINGS.member? @encoding
|
46
|
-
puts "Invalid encoding: #{@encoding}"
|
47
|
-
puts cli_opts
|
48
|
-
exit
|
49
|
-
end
|
50
|
-
|
51
|
-
##
|
52
|
-
# Downloads the document at url and yields every alpha line's hex
|
53
|
-
# range and description.
|
54
|
-
|
55
|
-
def each_alpha( url, property )
|
56
|
-
open( url ) do |file|
|
57
|
-
file.each_line do |line|
|
58
|
-
next if line =~ /^#/;
|
59
|
-
next if line !~ /; #{property} #/;
|
60
|
-
|
61
|
-
range, description = line.split(/;/)
|
62
|
-
range.strip!
|
63
|
-
description.gsub!(/.*#/, '').strip!
|
64
|
-
|
65
|
-
if range =~ /\.\./
|
66
|
-
start, stop = range.split '..'
|
67
|
-
else start = stop = range
|
68
|
-
end
|
69
|
-
|
70
|
-
yield start.hex .. stop.hex, description
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
###
|
76
|
-
# Formats to hex at minimum width
|
77
|
-
|
78
|
-
def to_hex( n )
|
79
|
-
r = "%0X" % n
|
80
|
-
r = "0#{r}" unless (r.length % 2).zero?
|
81
|
-
r
|
82
|
-
end
|
83
|
-
|
84
|
-
###
|
85
|
-
# UCS4 is just a straight hex conversion of the unicode codepoint.
|
86
|
-
|
87
|
-
def to_ucs4( range )
|
88
|
-
rangestr = "0x" + to_hex(range.begin)
|
89
|
-
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
|
90
|
-
[ rangestr ]
|
91
|
-
end
|
92
|
-
|
93
|
-
##
|
94
|
-
# 0x00 - 0x7f -> 0zzzzzzz[7]
|
95
|
-
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
|
96
|
-
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
|
97
|
-
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
|
98
|
-
|
99
|
-
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
|
100
|
-
|
101
|
-
def to_utf8_enc( n )
|
102
|
-
r = 0
|
103
|
-
if n <= 0x7f
|
104
|
-
r = n
|
105
|
-
elsif n <= 0x7ff
|
106
|
-
y = 0xc0 | (n >> 6)
|
107
|
-
z = 0x80 | (n & 0x3f)
|
108
|
-
r = y << 8 | z
|
109
|
-
elsif n <= 0xffff
|
110
|
-
x = 0xe0 | (n >> 12)
|
111
|
-
y = 0x80 | (n >> 6) & 0x3f
|
112
|
-
z = 0x80 | n & 0x3f
|
113
|
-
r = x << 16 | y << 8 | z
|
114
|
-
elsif n <= 0x10ffff
|
115
|
-
w = 0xf0 | (n >> 18)
|
116
|
-
x = 0x80 | (n >> 12) & 0x3f
|
117
|
-
y = 0x80 | (n >> 6) & 0x3f
|
118
|
-
z = 0x80 | n & 0x3f
|
119
|
-
r = w << 24 | x << 16 | y << 8 | z
|
120
|
-
end
|
121
|
-
|
122
|
-
to_hex(r)
|
123
|
-
end
|
124
|
-
|
125
|
-
def from_utf8_enc( n )
|
126
|
-
n = n.hex
|
127
|
-
r = 0
|
128
|
-
if n <= 0x7f
|
129
|
-
r = n
|
130
|
-
elsif n <= 0xdfff
|
131
|
-
y = (n >> 8) & 0x1f
|
132
|
-
z = n & 0x3f
|
133
|
-
r = y << 6 | z
|
134
|
-
elsif n <= 0xefffff
|
135
|
-
x = (n >> 16) & 0x0f
|
136
|
-
y = (n >> 8) & 0x3f
|
137
|
-
z = n & 0x3f
|
138
|
-
r = x << 10 | y << 6 | z
|
139
|
-
elsif n <= 0xf7ffffff
|
140
|
-
w = (n >> 24) & 0x07
|
141
|
-
x = (n >> 16) & 0x3f
|
142
|
-
y = (n >> 8) & 0x3f
|
143
|
-
z = n & 0x3f
|
144
|
-
r = w << 18 | x << 12 | y << 6 | z
|
145
|
-
end
|
146
|
-
r
|
147
|
-
end
|
148
|
-
|
149
|
-
###
|
150
|
-
# Given a range, splits it up into ranges that can be continuously
|
151
|
-
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
|
152
|
-
# This is not strictly needed since the current [5.1] unicode standard
|
153
|
-
# doesn't have ranges that straddle utf8 boundaries. This is included
|
154
|
-
# for completeness as there is no telling if that will ever change.
|
155
|
-
|
156
|
-
def utf8_ranges( range )
|
157
|
-
ranges = []
|
158
|
-
UTF8_BOUNDARIES.each do |max|
|
159
|
-
if range.begin <= max
|
160
|
-
return ranges << range if range.end <= max
|
161
|
-
|
162
|
-
ranges << range.begin .. max
|
163
|
-
range = (max + 1) .. range.end
|
164
|
-
end
|
165
|
-
end
|
166
|
-
ranges
|
167
|
-
end
|
168
|
-
|
169
|
-
def build_range( start, stop )
|
170
|
-
size = start.size/2
|
171
|
-
left = size - 1
|
172
|
-
return [""] if size < 1
|
173
|
-
|
174
|
-
a = start[0..1]
|
175
|
-
b = stop[0..1]
|
176
|
-
|
177
|
-
###
|
178
|
-
# Shared prefix
|
179
|
-
|
180
|
-
if a == b
|
181
|
-
return build_range(start[2..-1], stop[2..-1]).map do |elt|
|
182
|
-
"0x#{a} " + elt
|
183
|
-
end
|
184
|
-
end
|
185
|
-
|
186
|
-
###
|
187
|
-
# Unshared prefix, end of run
|
188
|
-
|
189
|
-
return ["0x#{a}..0x#{b} "] if left.zero?
|
190
|
-
|
191
|
-
###
|
192
|
-
# Unshared prefix, not end of run
|
193
|
-
# Range can be 0x123456..0x56789A
|
194
|
-
# Which is equivalent to:
|
195
|
-
# 0x123456 .. 0x12FFFF
|
196
|
-
# 0x130000 .. 0x55FFFF
|
197
|
-
# 0x560000 .. 0x56789A
|
198
|
-
|
199
|
-
ret = []
|
200
|
-
ret << build_range(start, a + "FF" * left)
|
201
|
-
|
202
|
-
###
|
203
|
-
# Only generate middle range if need be.
|
204
|
-
|
205
|
-
if a.hex+1 != b.hex
|
206
|
-
max = to_hex(b.hex - 1)
|
207
|
-
max = "FF" if b == "FF"
|
208
|
-
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
|
209
|
-
end
|
210
|
-
|
211
|
-
###
|
212
|
-
# Don't generate last range if it is covered by first range
|
213
|
-
|
214
|
-
ret << build_range(b + "00" * left, stop) unless b == "FF"
|
215
|
-
ret.flatten!
|
216
|
-
end
|
217
|
-
|
218
|
-
def to_utf8( range )
|
219
|
-
utf8_ranges( range ).map do |r|
|
220
|
-
build_range to_utf8_enc(r.begin), to_utf8_enc(r.end)
|
221
|
-
end.flatten!
|
222
|
-
end
|
223
|
-
|
224
|
-
##
|
225
|
-
# Perform a 3-way comparison of the number of codepoints advertised by
|
226
|
-
# the unicode spec for the given range, the originally parsed range,
|
227
|
-
# and the resulting utf8 encoded range.
|
228
|
-
|
229
|
-
def count_codepoints( code )
|
230
|
-
code.split(' ').inject(1) do |acc, elt|
|
231
|
-
if elt =~ /0x(.+)\.\.0x(.+)/
|
232
|
-
if @encoding == :utf8
|
233
|
-
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
|
234
|
-
else
|
235
|
-
acc * ($2.hex - $1.hex + 1)
|
236
|
-
end
|
237
|
-
else
|
238
|
-
acc
|
239
|
-
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
|
243
|
-
def is_valid?( range, desc, codes )
|
244
|
-
spec_count = 1
|
245
|
-
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
|
246
|
-
range_count = range.end - range.begin + 1
|
247
|
-
|
248
|
-
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
|
249
|
-
sum == spec_count and sum == range_count
|
250
|
-
end
|
251
|
-
|
252
|
-
##
|
253
|
-
# Generate the state maching to stdout
|
254
|
-
|
255
|
-
def generate_machine( name, property )
|
256
|
-
pipe = " "
|
257
|
-
puts " #{name} = "
|
258
|
-
each_alpha( CHART_URL, property ) do |range, desc|
|
259
|
-
|
260
|
-
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
|
261
|
-
|
262
|
-
raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
|
263
|
-
is_valid? range, desc, codes
|
264
|
-
|
265
|
-
range_width = codes.map { |a| a.size }.max
|
266
|
-
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
|
267
|
-
|
268
|
-
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
|
269
|
-
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
|
270
|
-
|
271
|
-
if desc.size > desc_width
|
272
|
-
desc = desc[0..desc_width - 4] + "..."
|
273
|
-
end
|
274
|
-
|
275
|
-
codes.each_with_index do |r, idx|
|
276
|
-
desc = "" unless idx.zero?
|
277
|
-
code = "%-#{range_width}s" % r
|
278
|
-
puts " #{pipe} #{code} ##{desc}"
|
279
|
-
pipe = "|"
|
280
|
-
end
|
281
|
-
end
|
282
|
-
puts " ;"
|
283
|
-
puts ""
|
284
|
-
end
|
285
|
-
|
286
|
-
puts <<EOF
|
287
|
-
# The following Ragel file was autogenerated with #{$0}
|
288
|
-
# from: #{CHART_URL}
|
289
|
-
#
|
290
|
-
# It defines ualpha, udigit, ualnum.
|
291
|
-
#
|
292
|
-
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
|
293
|
-
# and that your input is in #{@encoding}.
|
294
|
-
|
295
|
-
%%{
|
296
|
-
machine WChar;
|
297
|
-
EOF
|
298
|
-
generate_machine( :ualpha, "Alphabetic" )
|
299
|
-
generate_machine( :ulower, "Lowercase" )
|
300
|
-
generate_machine( :uupper, "Uppercase" )
|
301
|
-
puts <<EOF
|
302
|
-
udigit = '0'..'9';
|
303
|
-
ualnum = ualpha | udigit;
|
304
|
-
}%%
|
305
|
-
EOF
|