hotcell 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +4 -1
  3. data/.rspec +1 -0
  4. data/.rvmrc +1 -1
  5. data/.travis.yml +7 -0
  6. data/Gemfile +4 -1
  7. data/README.md +361 -2
  8. data/Rakefile +28 -6
  9. data/ext/lexerc/extconf.rb +3 -0
  10. data/ext/lexerc/lexerc.c +618 -0
  11. data/ext/lexerc/lexerc.h +20 -0
  12. data/ext/lexerc/lexerc.rl +167 -0
  13. data/hotcell.gemspec +8 -7
  14. data/lib/hotcell/commands/case.rb +59 -0
  15. data/lib/hotcell/commands/cycle.rb +38 -0
  16. data/lib/hotcell/commands/for.rb +70 -0
  17. data/lib/hotcell/commands/if.rb +51 -0
  18. data/lib/hotcell/commands/include.rb +21 -0
  19. data/lib/hotcell/commands/scope.rb +13 -0
  20. data/lib/hotcell/commands/unless.rb +23 -0
  21. data/lib/hotcell/commands.rb +13 -0
  22. data/lib/hotcell/config.rb +33 -6
  23. data/lib/hotcell/context.rb +40 -7
  24. data/lib/hotcell/errors.rb +37 -28
  25. data/lib/hotcell/extensions.rb +4 -0
  26. data/lib/hotcell/lexer.rb +19 -635
  27. data/lib/hotcell/lexerr.rb +572 -0
  28. data/lib/hotcell/lexerr.rl +137 -0
  29. data/lib/hotcell/node/assigner.rb +1 -5
  30. data/lib/hotcell/node/block.rb +17 -40
  31. data/lib/hotcell/node/command.rb +29 -22
  32. data/lib/hotcell/node/hasher.rb +1 -1
  33. data/lib/hotcell/node/summoner.rb +2 -6
  34. data/lib/hotcell/node/tag.rb +10 -7
  35. data/lib/hotcell/node.rb +12 -1
  36. data/lib/hotcell/parser.rb +474 -408
  37. data/lib/hotcell/parser.y +175 -117
  38. data/lib/hotcell/resolver.rb +44 -0
  39. data/lib/hotcell/source.rb +35 -0
  40. data/lib/hotcell/template.rb +15 -6
  41. data/lib/hotcell/version.rb +1 -1
  42. data/lib/hotcell.rb +15 -10
  43. data/spec/data/templates/simple.hc +1 -0
  44. data/spec/lib/hotcell/commands/case_spec.rb +39 -0
  45. data/spec/lib/hotcell/commands/cycle_spec.rb +29 -0
  46. data/spec/lib/hotcell/commands/for_spec.rb +65 -0
  47. data/spec/lib/hotcell/commands/if_spec.rb +35 -0
  48. data/spec/lib/hotcell/commands/include_spec.rb +39 -0
  49. data/spec/lib/hotcell/commands/scope_spec.rb +16 -0
  50. data/spec/lib/hotcell/commands/unless_spec.rb +23 -0
  51. data/spec/lib/hotcell/config_spec.rb +35 -10
  52. data/spec/lib/hotcell/context_spec.rb +58 -18
  53. data/spec/lib/hotcell/lexer_spec.rb +37 -28
  54. data/spec/lib/hotcell/node/block_spec.rb +28 -56
  55. data/spec/lib/hotcell/node/command_spec.rb +7 -31
  56. data/spec/lib/hotcell/node/tag_spec.rb +16 -0
  57. data/spec/lib/hotcell/parser_spec.rb +152 -123
  58. data/spec/lib/hotcell/resolver_spec.rb +28 -0
  59. data/spec/lib/hotcell/source_spec.rb +41 -0
  60. data/spec/lib/hotcell/template_spec.rb +47 -4
  61. data/spec/lib/hotcell_spec.rb +2 -1
  62. data/spec/spec_helper.rb +6 -2
  63. metadata +54 -24
  64. data/lib/hotcell/.DS_Store +0 -0
  65. data/lib/hotcell/lexer.rl +0 -299
  66. data/misc/rage.rl +0 -1999
  67. data/misc/unicode2ragel.rb +0 -305
@@ -1,305 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # This script uses the unicode spec to generate a Ragel state machine
4
- # that recognizes unicode alphanumeric characters. It generates 5
5
- # character classes: uupper, ulower, ualpha, udigit, and ualnum.
6
- # Currently supported encodings are UTF-8 [default] and UCS-4.
7
- #
8
- # Usage: unicode2ragel.rb [options]
9
- # -e, --encoding [ucs4 | utf8] Data encoding
10
- # -h, --help Show this message
11
- #
12
- # This script was originally written as part of the Ferret search
13
- # engine library.
14
- #
15
- # Author: Rakan El-Khalil <rakan@well.com>
16
-
17
- require 'optparse'
18
- require 'open-uri'
19
-
20
- ENCODINGS = [ :utf8, :ucs4 ]
21
- ALPHTYPES = { :utf8 => "unsigned char", :ucs4 => "unsigned int" }
22
- CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
23
-
24
- ###
25
- # Display vars & default option
26
-
27
- TOTAL_WIDTH = 80
28
- RANGE_WIDTH = 23
29
- @encoding = :utf8
30
-
31
- ###
32
- # Option parsing
33
-
34
- cli_opts = OptionParser.new do |opts|
35
- opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
36
- @encoding = o.downcase.to_sym
37
- end
38
- opts.on("-h", "--help", "Show this message") do
39
- puts opts
40
- exit
41
- end
42
- end
43
-
44
- cli_opts.parse(ARGV)
45
- unless ENCODINGS.member? @encoding
46
- puts "Invalid encoding: #{@encoding}"
47
- puts cli_opts
48
- exit
49
- end
50
-
51
- ##
52
- # Downloads the document at url and yields every alpha line's hex
53
- # range and description.
54
-
55
- def each_alpha( url, property )
56
- open( url ) do |file|
57
- file.each_line do |line|
58
- next if line =~ /^#/;
59
- next if line !~ /; #{property} #/;
60
-
61
- range, description = line.split(/;/)
62
- range.strip!
63
- description.gsub!(/.*#/, '').strip!
64
-
65
- if range =~ /\.\./
66
- start, stop = range.split '..'
67
- else start = stop = range
68
- end
69
-
70
- yield start.hex .. stop.hex, description
71
- end
72
- end
73
- end
74
-
75
- ###
76
- # Formats to hex at minimum width
77
-
78
- def to_hex( n )
79
- r = "%0X" % n
80
- r = "0#{r}" unless (r.length % 2).zero?
81
- r
82
- end
83
-
84
- ###
85
- # UCS4 is just a straight hex conversion of the unicode codepoint.
86
-
87
- def to_ucs4( range )
88
- rangestr = "0x" + to_hex(range.begin)
89
- rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
90
- [ rangestr ]
91
- end
92
-
93
- ##
94
- # 0x00 - 0x7f -> 0zzzzzzz[7]
95
- # 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
96
- # 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
97
- # 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
98
-
99
- UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
100
-
101
- def to_utf8_enc( n )
102
- r = 0
103
- if n <= 0x7f
104
- r = n
105
- elsif n <= 0x7ff
106
- y = 0xc0 | (n >> 6)
107
- z = 0x80 | (n & 0x3f)
108
- r = y << 8 | z
109
- elsif n <= 0xffff
110
- x = 0xe0 | (n >> 12)
111
- y = 0x80 | (n >> 6) & 0x3f
112
- z = 0x80 | n & 0x3f
113
- r = x << 16 | y << 8 | z
114
- elsif n <= 0x10ffff
115
- w = 0xf0 | (n >> 18)
116
- x = 0x80 | (n >> 12) & 0x3f
117
- y = 0x80 | (n >> 6) & 0x3f
118
- z = 0x80 | n & 0x3f
119
- r = w << 24 | x << 16 | y << 8 | z
120
- end
121
-
122
- to_hex(r)
123
- end
124
-
125
- def from_utf8_enc( n )
126
- n = n.hex
127
- r = 0
128
- if n <= 0x7f
129
- r = n
130
- elsif n <= 0xdfff
131
- y = (n >> 8) & 0x1f
132
- z = n & 0x3f
133
- r = y << 6 | z
134
- elsif n <= 0xefffff
135
- x = (n >> 16) & 0x0f
136
- y = (n >> 8) & 0x3f
137
- z = n & 0x3f
138
- r = x << 10 | y << 6 | z
139
- elsif n <= 0xf7ffffff
140
- w = (n >> 24) & 0x07
141
- x = (n >> 16) & 0x3f
142
- y = (n >> 8) & 0x3f
143
- z = n & 0x3f
144
- r = w << 18 | x << 12 | y << 6 | z
145
- end
146
- r
147
- end
148
-
149
- ###
150
- # Given a range, splits it up into ranges that can be continuously
151
- # encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
152
- # This is not strictly needed since the current [5.1] unicode standard
153
- # doesn't have ranges that straddle utf8 boundaries. This is included
154
- # for completeness as there is no telling if that will ever change.
155
-
156
- def utf8_ranges( range )
157
- ranges = []
158
- UTF8_BOUNDARIES.each do |max|
159
- if range.begin <= max
160
- return ranges << range if range.end <= max
161
-
162
- ranges << range.begin .. max
163
- range = (max + 1) .. range.end
164
- end
165
- end
166
- ranges
167
- end
168
-
169
- def build_range( start, stop )
170
- size = start.size/2
171
- left = size - 1
172
- return [""] if size < 1
173
-
174
- a = start[0..1]
175
- b = stop[0..1]
176
-
177
- ###
178
- # Shared prefix
179
-
180
- if a == b
181
- return build_range(start[2..-1], stop[2..-1]).map do |elt|
182
- "0x#{a} " + elt
183
- end
184
- end
185
-
186
- ###
187
- # Unshared prefix, end of run
188
-
189
- return ["0x#{a}..0x#{b} "] if left.zero?
190
-
191
- ###
192
- # Unshared prefix, not end of run
193
- # Range can be 0x123456..0x56789A
194
- # Which is equivalent to:
195
- # 0x123456 .. 0x12FFFF
196
- # 0x130000 .. 0x55FFFF
197
- # 0x560000 .. 0x56789A
198
-
199
- ret = []
200
- ret << build_range(start, a + "FF" * left)
201
-
202
- ###
203
- # Only generate middle range if need be.
204
-
205
- if a.hex+1 != b.hex
206
- max = to_hex(b.hex - 1)
207
- max = "FF" if b == "FF"
208
- ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
209
- end
210
-
211
- ###
212
- # Don't generate last range if it is covered by first range
213
-
214
- ret << build_range(b + "00" * left, stop) unless b == "FF"
215
- ret.flatten!
216
- end
217
-
218
- def to_utf8( range )
219
- utf8_ranges( range ).map do |r|
220
- build_range to_utf8_enc(r.begin), to_utf8_enc(r.end)
221
- end.flatten!
222
- end
223
-
224
- ##
225
- # Perform a 3-way comparison of the number of codepoints advertised by
226
- # the unicode spec for the given range, the originally parsed range,
227
- # and the resulting utf8 encoded range.
228
-
229
- def count_codepoints( code )
230
- code.split(' ').inject(1) do |acc, elt|
231
- if elt =~ /0x(.+)\.\.0x(.+)/
232
- if @encoding == :utf8
233
- acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
234
- else
235
- acc * ($2.hex - $1.hex + 1)
236
- end
237
- else
238
- acc
239
- end
240
- end
241
- end
242
-
243
- def is_valid?( range, desc, codes )
244
- spec_count = 1
245
- spec_count = $1.to_i if desc =~ /\[(\d+)\]/
246
- range_count = range.end - range.begin + 1
247
-
248
- sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
249
- sum == spec_count and sum == range_count
250
- end
251
-
252
- ##
253
- # Generate the state maching to stdout
254
-
255
- def generate_machine( name, property )
256
- pipe = " "
257
- puts " #{name} = "
258
- each_alpha( CHART_URL, property ) do |range, desc|
259
-
260
- codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
261
-
262
- raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
263
- is_valid? range, desc, codes
264
-
265
- range_width = codes.map { |a| a.size }.max
266
- range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
267
-
268
- desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
269
- desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
270
-
271
- if desc.size > desc_width
272
- desc = desc[0..desc_width - 4] + "..."
273
- end
274
-
275
- codes.each_with_index do |r, idx|
276
- desc = "" unless idx.zero?
277
- code = "%-#{range_width}s" % r
278
- puts " #{pipe} #{code} ##{desc}"
279
- pipe = "|"
280
- end
281
- end
282
- puts " ;"
283
- puts ""
284
- end
285
-
286
- puts <<EOF
287
- # The following Ragel file was autogenerated with #{$0}
288
- # from: #{CHART_URL}
289
- #
290
- # It defines ualpha, udigit, ualnum.
291
- #
292
- # To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
293
- # and that your input is in #{@encoding}.
294
-
295
- %%{
296
- machine WChar;
297
- EOF
298
- generate_machine( :ualpha, "Alphabetic" )
299
- generate_machine( :ulower, "Lowercase" )
300
- generate_machine( :uupper, "Uppercase" )
301
- puts <<EOF
302
- udigit = '0'..'9';
303
- ualnum = ualpha | udigit;
304
- }%%
305
- EOF